From 9e02ab4728d5b94b1a4848cb20063d087d38d428 Mon Sep 17 00:00:00 2001
From: Gabriel Bianconi <1275491+GabrielBianconi@users.noreply.github.com>
Date: Thu, 29 Jan 2026 18:50:07 -0500
Subject: [PATCH 1/2] Support tensorzero_reasoning_content for
 OpenAI-compatible endpoint

---
 .../types/chat_completions.rs                 | 338 +++++++++++++++++-
 .../openai_compatible/types/streaming.rs      | 202 ++++++++++-
 2 files changed, 510 insertions(+), 30 deletions(-)

diff --git a/tensorzero-core/src/endpoints/openai_compatible/types/chat_completions.rs b/tensorzero-core/src/endpoints/openai_compatible/types/chat_completions.rs
index 4b15e42cb1..065c98c232 100644
--- a/tensorzero-core/src/endpoints/openai_compatible/types/chat_completions.rs
+++ b/tensorzero-core/src/endpoints/openai_compatible/types/chat_completions.rs
@@ -34,7 +34,7 @@ use crate::inference::types::extra_headers::UnfilteredInferenceExtraHeaders;
 use crate::inference::types::usage::{RawResponseEntry, RawUsageEntry};
 use crate::inference::types::{
     Arguments, ContentBlockChatOutput, FinishReason, Input, InputMessage, InputMessageContent,
-    RawText, Role, System, Template, Text, current_timestamp,
+    RawText, Role, System, Template, Text, Thought, current_timestamp,
 };
 use crate::tool::{DynamicToolParams, ProviderTool, ToolResult};
 use crate::variant::JsonMode;
@@ -57,6 +57,8 @@ pub struct OpenAICompatibleUserMessage {
 pub struct OpenAICompatibleAssistantMessage {
     pub content: Option<Value>,
     pub tool_calls: Option<Vec<OpenAICompatibleToolCall>>,
+    #[serde(default)]
+    pub tensorzero_reasoning_content: Option<Vec<OpenAICompatibleInputThought>>,
 }
 
 #[derive(Clone, Debug, PartialEq, TensorZeroDeserialize)]
@@ -170,8 +172,11 @@ pub struct OpenAICompatibleParams {
 #[derive(Clone, Debug, PartialEq, Serialize)]
 pub struct OpenAICompatibleResponseMessage {
     pub content: Option<String>,
+    #[serde(skip_serializing_if = "is_none_or_empty")]
     pub tool_calls: Option<Vec<OpenAICompatibleToolCall>>,
     pub role: String,
+    #[serde(skip_serializing_if = "is_none_or_empty")]
+    pub tensorzero_reasoning_content: Option<Vec<OpenAICompatibleThought>>,
 }
 
 #[derive(Clone, Debug, PartialEq, Serialize)]
@@ -224,6 +229,28 @@ pub struct OpenAICompatibleResponse {
     pub tensorzero_raw_response: Option<Vec<RawResponseEntry>>,
 }
 
+// Signature dictated by Serde
+#[expect(clippy::ref_option)]
+fn is_none_or_empty<T>(v: &Option<Vec<T>>) -> bool {
+    v.as_ref().is_none_or(Vec::is_empty)
+}
+
+/// OpenAI-compatible Thought for responses (with index for position in content array)
+#[derive(Clone, Debug, PartialEq, Serialize)]
+pub struct OpenAICompatibleThought {
+    pub index: usize,
+    #[serde(flatten)]
+    pub thought: Thought,
+}
+
+/// OpenAI-compatible Thought for input (with optional index)
+#[derive(Clone, Debug, Deserialize, PartialEq)]
+pub struct OpenAICompatibleInputThought {
+    pub index: Option<usize>,
+    #[serde(flatten)]
+    pub thought: Thought,
+}
+
 // ============================================================================
 // Content Block Types
 // ============================================================================
@@ -552,6 +579,31 @@ pub fn openai_messages_to_input(
                         message_content.push(InputMessageContent::ToolCall(tool_call.into()));
                     }
                 }
+                // Process reasoning content with index-based insertion
+                if let Some(thoughts) = msg.tensorzero_reasoning_content {
+                    // Collect indexed thoughts (with explicit index) and unindexed thoughts separately
+                    let mut indexed: Vec<(usize, Thought)> = Vec::new();
+                    let mut unindexed: Vec<Thought> = Vec::new();
+                    for input_thought in thoughts {
+                        match input_thought.index {
+                            Some(idx) => indexed.push((idx, input_thought.thought)),
+                            None => unindexed.push(input_thought.thought),
+                        }
+                    }
+
+                    // First, insert thoughts with explicit indices at their specified positions
+                    // Sort by index to handle insertions correctly
+                    indexed.sort_by_key(|(idx, _)| *idx);
+                    for (idx, thought) in indexed {
+                        let idx = idx.min(message_content.len());
+                        message_content.insert(idx, InputMessageContent::Thought(thought));
+                    }
+
+                    // Then, prepend all thoughts without an index at position 0 (reverse to maintain order)
+                    for thought in unindexed.into_iter().rev() {
+                        message_content.insert(0, InputMessageContent::Thought(thought));
+                    }
+                }
                 messages.push(InputMessage {
                     role: Role::Assistant,
                     content: message_content,
@@ -689,7 +741,7 @@ impl From<(InferenceResponse, String, bool, bool)> for OpenAICompatibleResponse
     ) -> Self {
         match inference_response {
             InferenceResponse::Chat(response) => {
-                let (content, tool_calls) = process_chat_content(response.content);
+                let (content, tool_calls, thoughts) = process_chat_content(response.content);
                 let tensorzero_original_response = if include_original_response {
                     response.original_response
                 } else {
@@ -708,8 +760,17 @@ impl From<(InferenceResponse, String, bool, bool)> for OpenAICompatibleResponse
                         finish_reason: response.finish_reason.unwrap_or(FinishReason::Stop).into(),
                         message: OpenAICompatibleResponseMessage {
                             content,
-                            tool_calls: Some(tool_calls),
+                            tool_calls: if tool_calls.is_empty() {
+                                None
+                            } else {
+                                Some(tool_calls)
+                            },
                             role: "assistant".to_string(),
+                            tensorzero_reasoning_content: if thoughts.is_empty() {
+                                None
+                            } else {
+                                Some(thoughts)
+                            },
                         },
                     }],
                     created: current_timestamp() as u32,
@@ -745,6 +806,7 @@ impl From<(InferenceResponse, String, bool, bool)> for OpenAICompatibleResponse
                             content: response.output.raw,
                             tool_calls: None,
                             role: "assistant".to_string(),
+                            tensorzero_reasoning_content: None,
                         },
                     }],
                     created: current_timestamp() as u32,
@@ -763,14 +825,20 @@ impl From<(InferenceResponse, String, bool, bool)> for OpenAICompatibleResponse
     }
 }
 
-/// Takes a vector of ContentBlockOutput and returns a tuple of (Option<String>, Vec<OpenAICompatibleToolCall>).
-/// This is useful since the OpenAI format separates text and tool calls in the response fields.
+/// Takes a vector of ContentBlockOutput and returns a tuple of
+/// (Option<String>, Vec<OpenAICompatibleToolCall>, Vec<OpenAICompatibleThought>).
+/// This is useful since the OpenAI format separates text, tool calls, and reasoning in the response fields.
 pub fn process_chat_content(
     content: Vec<ContentBlockChatOutput>,
-) -> (Option<String>, Vec<OpenAICompatibleToolCall>) {
+) -> (
+    Option<String>,
+    Vec<OpenAICompatibleToolCall>,
+    Vec<OpenAICompatibleThought>,
+) {
     let mut content_str: Option<String> = None;
     let mut tool_calls = Vec::new();
-    for block in content {
+    let mut thoughts = Vec::new();
+    for (index, block) in content.into_iter().enumerate() {
         match block {
             ContentBlockChatOutput::Text(text) => match content_str {
                 Some(ref mut content) => content.push_str(&text.text),
@@ -779,12 +847,8 @@ pub fn process_chat_content(
             ContentBlockChatOutput::ToolCall(tool_call) => {
                 tool_calls.push(tool_call.into());
             }
-            ContentBlockChatOutput::Thought(_thought) => {
-                // OpenAI compatible endpoint does not support thought blocks
-                // Users of this endpoint will need to check observability to see them
-                tracing::warn!(
-                    "Ignoring 'thought' content block when constructing OpenAI-compatible response"
-                );
+            ContentBlockChatOutput::Thought(thought) => {
+                thoughts.push(OpenAICompatibleThought { index, thought });
             }
             ContentBlockChatOutput::Unknown(_) => {
                 tracing::warn!(
@@ -793,7 +857,7 @@ pub fn process_chat_content(
             }
         }
     }
-    (content_str, tool_calls)
+    (content_str, tool_calls, thoughts)
 }
 
 #[cfg(test)]
@@ -862,6 +926,7 @@ mod tests {
                         arguments: "{}".to_string(),
                     },
                 }]),
+                tensorzero_reasoning_content: None,
             }),
             OpenAICompatibleMessage::Assistant(OpenAICompatibleAssistantMessage {
                 content: None,
@@ -873,6 +938,7 @@ mod tests {
                         arguments: "{}".to_string(),
                     },
                 }]),
+                tensorzero_reasoning_content: None,
             }),
             OpenAICompatibleMessage::Assistant(OpenAICompatibleAssistantMessage {
                 content: None,
@@ -884,6 +950,7 @@ mod tests {
                         arguments: "{}".to_string(),
                     },
                 }]),
+                tensorzero_reasoning_content: None,
             }),
             OpenAICompatibleMessage::Tool(OpenAICompatibleToolMessage {
                 content: Some(Value::String("Tool result 1".to_string())),
@@ -1067,6 +1134,7 @@ mod tests {
                     "city": "Tokyo",
                 }])),
                 tool_calls: None,
+                tensorzero_reasoning_content: None,
             },
         )];
         let input: Input = openai_messages_to_input(messages).unwrap();
@@ -1100,6 +1168,7 @@ mod tests {
                         arguments: "{}".to_string(),
                     },
                 }]),
+                tensorzero_reasoning_content: None,
             },
         )];
         let input: Input = openai_messages_to_input(messages).unwrap();
@@ -1133,6 +1202,7 @@ mod tests {
             OpenAICompatibleMessage::Assistant(OpenAICompatibleAssistantMessage {
                 content: Some(Value::String("Assistant message".to_string())),
                 tool_calls: None,
+                tensorzero_reasoning_content: None,
             }),
             OpenAICompatibleMessage::System(OpenAICompatibleSystemMessage {
                 content: Value::String("System message".to_string()),
@@ -1495,16 +1565,19 @@ mod tests {
                 text: ", world!".to_string(),
             }),
         ];
-        let (content_str, tool_calls) = process_chat_content(content);
+        let (content_str, tool_calls, thoughts) = process_chat_content(content);
         assert_eq!(content_str, Some("Hello, world!".to_string()));
         assert_eq!(tool_calls.len(), 1);
         assert_eq!(tool_calls[0].id, "1");
         assert_eq!(tool_calls[0].function.name, "test_tool");
         assert_eq!(tool_calls[0].function.arguments, "{}");
+        assert!(thoughts.is_empty());
+
         let content: Vec<ContentBlockChatOutput> = vec![];
-        let (content_str, tool_calls) = process_chat_content(content);
+        let (content_str, tool_calls, thoughts) = process_chat_content(content);
         assert_eq!(content_str, None);
         assert!(tool_calls.is_empty());
+        assert!(thoughts.is_empty());
 
         let content = vec![
             ContentBlockChatOutput::Text(Text {
@@ -1527,7 +1600,7 @@ mod tests {
                 text: " fourth part".to_string(),
             }),
         ];
-        let (content_str, tool_calls) = process_chat_content(content);
+        let (content_str, tool_calls, thoughts) = process_chat_content(content);
         assert_eq!(
             content_str,
             Some("First part second part third part fourth part".to_string())
@@ -1536,6 +1609,7 @@ mod tests {
         assert_eq!(tool_calls[0].id, "123");
         assert_eq!(tool_calls[0].function.name, "middle_tool");
         assert_eq!(tool_calls[0].function.arguments, "{\"key\": \"value\"}");
+        assert!(thoughts.is_empty());
     }
 
     #[test]
@@ -1616,4 +1690,234 @@ mod tests {
             }
         );
     }
+
+    #[test]
+    fn test_process_chat_content_with_thoughts() {
+        let content = vec![
+            ContentBlockChatOutput::Thought(Thought {
+                text: Some("Let me think about this...".to_string()),
+                signature: Some("sig123".to_string()),
+                summary: None,
+                provider_type: Some("anthropic".to_string()),
+                extra_data: None,
+            }),
+            ContentBlockChatOutput::Text(Text {
+                text: "Hello".to_string(),
+            }),
+            ContentBlockChatOutput::Thought(Thought {
+                text: Some("Another thought".to_string()),
+                signature: None,
+                summary: None,
+                provider_type: None,
+                extra_data: None,
+            }),
+        ];
+        let (content_str, tool_calls, thoughts) = process_chat_content(content);
+        assert_eq!(
+            content_str,
+            Some("Hello".to_string()),
+            "Text content should be extracted"
+        );
+        assert!(tool_calls.is_empty(), "No tool calls expected");
+        assert_eq!(thoughts.len(), 2, "Should have two thoughts");
+
+        // First thought at index 0
+        assert_eq!(thoughts[0].index, 0, "First thought should have index 0");
+        assert_eq!(
+            thoughts[0].thought.text,
+            Some("Let me think about this...".to_string())
+        );
+        assert_eq!(thoughts[0].thought.signature, Some("sig123".to_string()));
+        assert_eq!(
+            thoughts[0].thought.provider_type,
+            Some("anthropic".to_string())
+        );
+
+        // Second thought at index 2
+        assert_eq!(thoughts[1].index, 2, "Second thought should have index 2");
+        assert_eq!(
+            thoughts[1].thought.text,
+            Some("Another thought".to_string())
+        );
+        assert_eq!(thoughts[1].thought.signature, None);
+    }
+
+    #[test]
+    fn test_input_reasoning_content_ordering() {
+        // Test with indexed and unindexed thoughts
+        let messages = vec![OpenAICompatibleMessage::Assistant(
+            OpenAICompatibleAssistantMessage {
+                content: Some(Value::String("Response text".to_string())),
+                tool_calls: None,
+                tensorzero_reasoning_content: Some(vec![
+                    // Unindexed thought - should be prepended
+                    OpenAICompatibleInputThought {
+                        index: None,
+                        thought: Thought {
+                            text: Some("Unindexed thought 1".to_string()),
+                            signature: None,
+                            summary: None,
+                            provider_type: None,
+                            extra_data: None,
+                        },
+                    },
+                    // Indexed thought at position 2
+                    OpenAICompatibleInputThought {
+                        index: Some(2),
+                        thought: Thought {
+                            text: Some("Indexed thought at 2".to_string()),
+                            signature: Some("sig456".to_string()),
+                            summary: None,
+                            provider_type: Some("anthropic".to_string()),
+                            extra_data: None,
+                        },
+                    },
+                    // Another unindexed thought - should be prepended
+                    OpenAICompatibleInputThought {
+                        index: None,
+                        thought: Thought {
+                            text: Some("Unindexed thought 2".to_string()),
+                            signature: None,
+                            summary: None,
+                            provider_type: None,
+                            extra_data: None,
+                        },
+                    },
+                    // Indexed thought at position 0
+                    OpenAICompatibleInputThought {
+                        index: Some(0),
+                        thought: Thought {
+                            text: Some("Indexed thought at 0".to_string()),
+                            signature: None,
+                            summary: None,
+                            provider_type: None,
+                            extra_data: None,
+                        },
+                    },
+                ]),
+            },
+        )];
+        let input: Input = openai_messages_to_input(messages).unwrap();
+        assert_eq!(input.messages.len(), 1, "Should have one message");
+        let content = &input.messages[0].content;
+
+        // Expected order based on the algorithm:
+        // 1. Start with text content: ["Response text"]
+        // 2. Apply indexed thoughts (sorted by index):
+        //    - Insert at 0: ["Indexed thought at 0", "Response text"]
+        //    - Insert at 2 (capped to len=2): ["Indexed thought at 0", "Response text", "Indexed thought at 2"]
+        // 3. Prepend unindexed thoughts (reversed to maintain order):
+        //    - Prepend "Unindexed thought 2": ["Unindexed thought 2", "Indexed thought at 0", "Response text", "Indexed thought at 2"]
+        //    - Prepend "Unindexed thought 1": ["Unindexed thought 1", "Unindexed thought 2", "Indexed thought at 0", "Response text", "Indexed thought at 2"]
+        assert_eq!(content.len(), 5, "Should have 5 content blocks");
+
+        // Verify content order
+        match &content[0] {
+            InputMessageContent::Thought(t) => {
+                assert_eq!(
+                    t.text,
+                    Some("Unindexed thought 1".to_string()),
+                    "First should be unindexed thought 1"
+                );
+            }
+            _ => panic!("Expected Thought at position 0"),
+        }
+        match &content[1] {
+            InputMessageContent::Thought(t) => {
+                assert_eq!(
+                    t.text,
+                    Some("Unindexed thought 2".to_string()),
+                    "Second should be unindexed thought 2"
+                );
+            }
+            _ => panic!("Expected Thought at position 1"),
+        }
+        match &content[2] {
+            InputMessageContent::Thought(t) => {
+                assert_eq!(
+                    t.text,
+                    Some("Indexed thought at 0".to_string()),
+                    "Third should be indexed thought at 0"
+                );
+            }
+            _ => panic!("Expected Thought at position 2"),
+        }
+        match &content[3] {
+            InputMessageContent::Text(t) => {
+                assert_eq!(t.text, "Response text", "Fourth should be the text content");
+            }
+            _ => panic!("Expected Text at position 3"),
+        }
+        match &content[4] {
+            InputMessageContent::Thought(t) => {
+                assert_eq!(
+                    t.text,
+                    Some("Indexed thought at 2".to_string()),
+                    "Fifth should be indexed thought at 2"
+                );
+                assert_eq!(
+                    t.signature,
+                    Some("sig456".to_string()),
+                    "Should preserve signature"
+                );
+                assert_eq!(
+                    t.provider_type,
+                    Some("anthropic".to_string()),
+                    "Should preserve provider_type"
+                );
+            }
+            _ => panic!("Expected Thought at position 4"),
+        }
+    }
+
+    #[test]
+    fn test_openai_compatible_input_thought_deserialization() {
+        // Test that serde(flatten) works correctly for input thoughts
+        let json_with_index = json!({
+            "index": 5,
+            "text": "My thought",
+            "signature": "sig789",
+            "provider_type": "anthropic"
+        });
+        let thought: OpenAICompatibleInputThought =
+            serde_json::from_value(json_with_index).unwrap();
+        assert_eq!(thought.index, Some(5));
+        assert_eq!(thought.thought.text, Some("My thought".to_string()));
+        assert_eq!(thought.thought.signature, Some("sig789".to_string()));
+        assert_eq!(thought.thought.provider_type, Some("anthropic".to_string()));
+
+        // Test without index
+        let json_without_index = json!({
+            "text": "Another thought"
+        });
+        let thought: OpenAICompatibleInputThought =
+            serde_json::from_value(json_without_index).unwrap();
+        assert_eq!(thought.index, None);
+        assert_eq!(thought.thought.text, Some("Another thought".to_string()));
+        assert_eq!(thought.thought.signature, None);
+    }
+
+    #[test]
+    fn test_openai_compatible_thought_serialization() {
+        // Test that serde(flatten) works correctly for output thoughts
+        let thought = OpenAICompatibleThought {
+            index: 3,
+            thought: Thought {
+                text: Some("Thinking...".to_string()),
+                signature: Some("sig_abc".to_string()),
+                summary: None,
+                provider_type: Some("anthropic".to_string()),
+                extra_data: None,
+            },
+        };
+        let json = serde_json::to_value(&thought).unwrap();
+
+        // With flatten, all fields should be at the top level
+        assert_eq!(json["index"], 3);
+        assert_eq!(json["text"], "Thinking...");
+        assert_eq!(json["signature"], "sig_abc");
+        assert_eq!(json["provider_type"], "anthropic");
+        // summary should not be present since it's None and has skip_serializing_if
+        assert!(json.get("summary").is_none() || json["summary"].is_null());
+    }
 }
diff --git a/tensorzero-core/src/endpoints/openai_compatible/types/streaming.rs b/tensorzero-core/src/endpoints/openai_compatible/types/streaming.rs
index 41a599f677..b23fb6afdb 100644
--- a/tensorzero-core/src/endpoints/openai_compatible/types/streaming.rs
+++ b/tensorzero-core/src/endpoints/openai_compatible/types/streaming.rs
@@ -11,6 +11,7 @@ use std::collections::HashMap;
 use tokio_stream::StreamExt;
 
 use crate::error::{Error, ErrorDetails};
+use crate::inference::types::streams::ThoughtChunk;
 use crate::inference::types::usage::{RawResponseEntry, RawUsageEntry};
 use crate::inference::types::{ContentBlockChunk, FinishReason, current_timestamp};
 
@@ -60,6 +61,14 @@ fn is_none_or_empty<T>(v: &Option<Vec<T>>) -> bool {
     v.as_ref().is_none_or(Vec::is_empty)
 }
 
+/// OpenAI-compatible ThoughtChunk for streaming responses (with index for position in content array)
+#[derive(Clone, Debug, PartialEq, Serialize)]
+pub struct OpenAICompatibleThoughtChunk {
+    pub index: usize,
+    #[serde(flatten)]
+    pub chunk: ThoughtChunk,
+}
+
 #[derive(Clone, Debug, PartialEq, Serialize)]
 pub struct OpenAICompatibleDelta {
     #[serde(skip_serializing_if = "Option::is_none")]
@@ -68,12 +77,15 @@ pub struct OpenAICompatibleDelta {
     pub content: Option<String>,
     #[serde(skip_serializing_if = "is_none_or_empty")]
     pub tool_calls: Option<Vec<OpenAICompatibleToolCallChunk>>,
+    #[serde(skip_serializing_if = "is_none_or_empty")]
+    pub tensorzero_reasoning_content: Option<Vec<OpenAICompatibleThoughtChunk>>,
 }
 
 #[expect(clippy::too_many_arguments)]
 pub fn convert_inference_response_chunk_to_openai_compatible(
     chunk: InferenceResponseChunk,
     tool_id_to_index: &mut HashMap<String, usize>,
+    thought_id_to_index: &mut HashMap<String, usize>,
     response_model_prefix: &str,
     is_first_chunk: bool,
     include_usage: bool,
@@ -90,7 +102,8 @@ pub fn convert_inference_response_chunk_to_openai_compatible(
 
     let response_chunk = match chunk {
         InferenceResponseChunk::Chat(c) => {
-            let (content, tool_calls) = process_chat_content_chunk(c.content, tool_id_to_index);
+            let (content, tool_calls, thoughts) =
+                process_chat_content_chunk(c.content, tool_id_to_index, thought_id_to_index);
             let usage = if include_usage {
                 c.usage.map(OpenAICompatibleUsage::from)
             } else {
@@ -123,7 +136,16 @@ pub fn convert_inference_response_chunk_to_openai_compatible(
                     delta: OpenAICompatibleDelta {
                         role: role.clone(),
                         content,
-                        tool_calls: Some(tool_calls),
+                        tool_calls: if tool_calls.is_empty() {
+                            None
+                        } else {
+                            Some(tool_calls)
+                        },
+                        tensorzero_reasoning_content: if thoughts.is_empty() {
+                            None
+                        } else {
+                            Some(thoughts)
+                        },
                     },
                 }],
                 created: current_timestamp() as u32,
@@ -172,6 +194,7 @@ pub fn convert_inference_response_chunk_to_openai_compatible(
                         role,
                         content: Some(c.raw),
                         tool_calls: None,
+                        tensorzero_reasoning_content: None,
                     },
                 }],
                 created: current_timestamp() as u32,
@@ -194,9 +217,15 @@ pub fn convert_inference_response_chunk_to_openai_compatible(
 pub fn process_chat_content_chunk(
     content: Vec<ContentBlockChunk>,
     tool_id_to_index: &mut HashMap<String, usize>,
-) -> (Option<String>, Vec<OpenAICompatibleToolCallChunk>) {
+    thought_id_to_index: &mut HashMap<String, usize>,
+) -> (
+    Option<String>,
+    Vec<OpenAICompatibleToolCallChunk>,
+    Vec<OpenAICompatibleThoughtChunk>,
+) {
     let mut content_str: Option<String> = None;
     let mut tool_calls = Vec::new();
+    let mut thoughts = Vec::new();
     for block in content {
         match block {
             ContentBlockChunk::Text(text) => match content_str {
@@ -217,12 +246,13 @@ pub fn process_chat_content_chunk(
                     },
                 });
             }
-            ContentBlockChunk::Thought(_thought) => {
-                // OpenAI compatible endpoint does not support thought blocks
-                // Users of this endpoint will need to check observability to see them
-                tracing::warn!(
-                    "Ignoring 'thought' content block chunk when constructing OpenAI-compatible response"
-                );
+            ContentBlockChunk::Thought(thought) => {
+                let len = thought_id_to_index.len();
+                let index = *thought_id_to_index.entry(thought.id.clone()).or_insert(len);
+                thoughts.push(OpenAICompatibleThoughtChunk {
+                    index,
+                    chunk: thought,
+                });
             }
             ContentBlockChunk::Unknown(_) => {
                 // OpenAI compatible endpoint does not support unknown blocks
@@ -233,7 +263,7 @@ pub fn process_chat_content_chunk(
             }
         }
     }
-    (content_str, tool_calls)
+    (content_str, tool_calls, thoughts)
 }
 
 /// Prepares an Event for SSE on the way out of the gateway.
@@ -249,6 +279,7 @@ pub fn prepare_serialized_openai_compatible_events(
 ) -> impl Stream<Item = Result<Event, Error>> {
     async_stream::stream! {
         let mut tool_id_to_index = HashMap::new();
+        let mut thought_id_to_index = HashMap::new();
         let mut is_first_chunk = true;
 
         while let Some(chunk) = stream.next().await {
@@ -261,6 +292,7 @@ pub fn prepare_serialized_openai_compatible_events(
             let openai_compatible_chunks = convert_inference_response_chunk_to_openai_compatible(
                 chunk,
                 &mut tool_id_to_index,
+                &mut thought_id_to_index,
                 &response_model_prefix,
                 is_first_chunk,
                 include_usage,
@@ -313,9 +345,11 @@ mod tests {
         });
 
         let mut tool_id_to_index = HashMap::new();
+        let mut thought_id_to_index = HashMap::new();
         let result = convert_inference_response_chunk_to_openai_compatible(
             chunk,
             &mut tool_id_to_index,
+            &mut thought_id_to_index,
             "test_prefix::",
             true,
             true,  // include_usage
@@ -369,9 +403,11 @@ mod tests {
         });
 
         let mut tool_id_to_index = HashMap::new();
+        let mut thought_id_to_index = HashMap::new();
         let result = convert_inference_response_chunk_to_openai_compatible(
             chunk,
             &mut tool_id_to_index,
+            &mut thought_id_to_index,
             "test_prefix::",
             true,
             true,  // include_usage
@@ -416,9 +452,11 @@ mod tests {
         });
 
         let mut tool_id_to_index = HashMap::new();
+        let mut thought_id_to_index = HashMap::new();
         let result = convert_inference_response_chunk_to_openai_compatible(
             chunk,
             &mut tool_id_to_index,
+            &mut thought_id_to_index,
             "test_prefix::",
             true,
             true,  // include_usage
@@ -473,9 +511,11 @@ mod tests {
         });
 
         let mut tool_id_to_index = HashMap::new();
+        let mut thought_id_to_index = HashMap::new();
         let result = convert_inference_response_chunk_to_openai_compatible(
             chunk,
             &mut tool_id_to_index,
+            &mut thought_id_to_index,
             "test_prefix::",
             true,
             true,  // include_usage
@@ -513,9 +553,11 @@ mod tests {
         });
 
         let mut tool_id_to_index = HashMap::new();
+        let mut thought_id_to_index = HashMap::new();
         let result = convert_inference_response_chunk_to_openai_compatible(
             chunk,
             &mut tool_id_to_index,
+            &mut thought_id_to_index,
             "test_prefix::",
             true,
             true,  // include_usage
@@ -564,9 +606,11 @@ mod tests {
         });
 
         let mut tool_id_to_index = HashMap::new();
+        let mut thought_id_to_index = HashMap::new();
         let result = convert_inference_response_chunk_to_openai_compatible(
             chunk,
             &mut tool_id_to_index,
+            &mut thought_id_to_index,
             "test_prefix::",
             true,
             false, // include_usage = false
@@ -606,9 +650,11 @@ mod tests {
         });
 
         let mut tool_id_to_index = HashMap::new();
+        let mut thought_id_to_index = HashMap::new();
         let result = convert_inference_response_chunk_to_openai_compatible(
             chunk,
             &mut tool_id_to_index,
+            &mut thought_id_to_index,
             "test_prefix::",
             true,
             false, // include_usage
@@ -653,9 +699,11 @@ mod tests {
         });
 
         let mut tool_id_to_index = HashMap::new();
+        let mut thought_id_to_index = HashMap::new();
         let result = convert_inference_response_chunk_to_openai_compatible(
             chunk,
             &mut tool_id_to_index,
+            &mut thought_id_to_index,
             "test_prefix::",
             true,
             false, // include_usage
@@ -692,9 +740,11 @@ mod tests {
         });
 
         let mut tool_id_to_index = HashMap::new();
+        let mut thought_id_to_index = HashMap::new();
         let result = convert_inference_response_chunk_to_openai_compatible(
             chunk,
             &mut tool_id_to_index,
+            &mut thought_id_to_index,
             "test_prefix::",
             true,
             false, // include_usage
@@ -734,18 +784,23 @@ mod tests {
             }),
         ];
         let mut tool_id_to_index = HashMap::new();
-        let (content_str, tool_calls) = process_chat_content_chunk(content, &mut tool_id_to_index);
+        let mut thought_id_to_index = HashMap::new();
+        let (content_str, tool_calls, thoughts) =
+            process_chat_content_chunk(content, &mut tool_id_to_index, &mut thought_id_to_index);
         assert_eq!(content_str, Some("Hello, world!".to_string()));
         assert_eq!(tool_calls.len(), 1);
         assert_eq!(tool_calls[0].id, Some("1".to_string()));
         assert_eq!(tool_calls[0].index, 0);
         assert_eq!(tool_calls[0].function.name, "test_tool".to_string());
         assert_eq!(tool_calls[0].function.arguments, "{}");
+        assert!(thoughts.is_empty());
 
         let content: Vec<ContentBlockChunk> = vec![];
-        let (content_str, tool_calls) = process_chat_content_chunk(content, &mut tool_id_to_index);
+        let (content_str, tool_calls, thoughts) =
+            process_chat_content_chunk(content, &mut tool_id_to_index, &mut thought_id_to_index);
         assert_eq!(content_str, None);
         assert!(tool_calls.is_empty());
+        assert!(thoughts.is_empty());
 
         let content = vec![
             ContentBlockChunk::Text(TextChunk {
@@ -776,7 +831,9 @@ mod tests {
             }),
         ];
         let mut tool_id_to_index = HashMap::new();
-        let (content_str, tool_calls) = process_chat_content_chunk(content, &mut tool_id_to_index);
+        let mut thought_id_to_index = HashMap::new();
+        let (content_str, tool_calls, thoughts) =
+            process_chat_content_chunk(content, &mut tool_id_to_index, &mut thought_id_to_index);
         assert_eq!(
             content_str,
             Some("First part second part third part fourth part".to_string())
@@ -790,5 +847,124 @@ mod tests {
         assert_eq!(tool_calls[1].index, 1);
         assert_eq!(tool_calls[1].function.name, "last_tool".to_string());
         assert_eq!(tool_calls[1].function.arguments, "{\"key\": \"value\"}");
+        assert!(thoughts.is_empty());
+    }
+
+    #[test]
+    fn test_process_chat_content_chunk_with_thoughts() {
+        let content = vec![
+            ContentBlockChunk::Thought(ThoughtChunk {
+                id: "thought_1".to_string(),
+                text: Some("Let me think...".to_string()),
+                signature: Some("sig123".to_string()),
+                summary_id: None,
+                summary_text: None,
+                provider_type: Some("anthropic".to_string()),
+                extra_data: None,
+            }),
+            ContentBlockChunk::Text(TextChunk {
+                id: "text_1".to_string(),
+                text: "Response text".to_string(),
+            }),
+            ContentBlockChunk::Thought(ThoughtChunk {
+                id: "thought_1".to_string(), // Same ID, new chunk
+                text: Some(" more thinking".to_string()),
+                signature: None,
+                summary_id: None,
+                summary_text: None,
+                provider_type: None,
+                extra_data: None,
+            }),
+            ContentBlockChunk::Thought(ThoughtChunk {
+                id: "thought_2".to_string(), // New thought
+                text: Some("Different thought".to_string()),
+                signature: None,
+                summary_id: None,
+                summary_text: None,
+                provider_type: None,
+                extra_data: None,
+            }),
+        ];
+
+        let mut tool_id_to_index = HashMap::new();
+        let mut thought_id_to_index = HashMap::new();
+        let (content_str, tool_calls, thoughts) =
+            process_chat_content_chunk(content, &mut tool_id_to_index, &mut thought_id_to_index);
+
+        assert_eq!(
+            content_str,
+            Some("Response text".to_string()),
+            "Text content should be extracted"
+        );
+        assert!(tool_calls.is_empty(), "No tool calls expected");
+        assert_eq!(thoughts.len(), 3, "Should have three thought chunks");
+
+        // First thought chunk
+        assert_eq!(
+            thoughts[0].index, 0,
+            "First thought should have index 0 (assigned based on thought_1)"
+        );
+        assert_eq!(thoughts[0].chunk.id, "thought_1");
+        assert_eq!(thoughts[0].chunk.text, Some("Let me think...".to_string()));
+        assert_eq!(thoughts[0].chunk.signature, Some("sig123".to_string()));
+        assert_eq!(
+            thoughts[0].chunk.provider_type,
+            Some("anthropic".to_string())
+        );
+
+        // Second thought chunk (same thought ID, should have same index)
+        assert_eq!(
+            thoughts[1].index, 0,
+            "Second chunk of thought_1 should still have index 0"
+        );
+        assert_eq!(thoughts[1].chunk.id, "thought_1");
+        assert_eq!(thoughts[1].chunk.text, Some(" more thinking".to_string()));
+
+        // Third thought chunk (new thought ID, should have new index)
+        assert_eq!(thoughts[2].index, 1, "New thought_2 should have index 1");
+        assert_eq!(thoughts[2].chunk.id, "thought_2");
+        assert_eq!(
+            thoughts[2].chunk.text,
+            Some("Different thought".to_string())
+        );
+
+        // Verify the hashmap tracked the thought IDs correctly
+        assert_eq!(
+            thought_id_to_index.get("thought_1"),
+            Some(&0),
+            "thought_1 should be mapped to index 0"
+        );
+        assert_eq!(
+            thought_id_to_index.get("thought_2"),
+            Some(&1),
+            "thought_2 should be mapped to index 1"
+        );
+    }
+
+    #[test]
+    fn test_openai_compatible_thought_chunk_serialization() {
+        // Test that serde(flatten) works correctly for streaming thought chunks
+        let chunk = OpenAICompatibleThoughtChunk {
+            index: 2,
+            chunk: ThoughtChunk {
+                id: "chunk_id".to_string(),
+                text: Some("Thinking...".to_string()),
+                signature: Some("sig_chunk".to_string()),
+                summary_id: Some("summary_1".to_string()),
+                summary_text: Some("Summary text".to_string()),
+                provider_type: Some("anthropic".to_string()),
+                extra_data: None,
+            },
+        };
+        let json = serde_json::to_value(&chunk).unwrap();
+
+        // With flatten, all fields should be at the top level
+        assert_eq!(json["index"], 2);
+        assert_eq!(json["id"], "chunk_id");
+        assert_eq!(json["text"], "Thinking...");
+        assert_eq!(json["signature"], "sig_chunk");
+        assert_eq!(json["summary_id"], "summary_1");
+        assert_eq!(json["summary_text"], "Summary text");
+        assert_eq!(json["provider_type"], "anthropic");
     }
 }

From 48e0fae5c053cd4ff004103927368f51b77b71c6 Mon Sep 17 00:00:00 2001
From: Gabriel Bianconi <1275491+GabrielBianconi@users.noreply.github.com>
Date: Fri, 30 Jan 2026 10:20:40 -0500
Subject: [PATCH 2/2] Fix

---
 .../types/chat_completions.rs                 | 428 +++++++++++-------
 .../openai_compatible/types/streaming.rs      | 274 ++++++-----
 2 files changed, 448 insertions(+), 254 deletions(-)

diff --git a/tensorzero-core/src/endpoints/openai_compatible/types/chat_completions.rs b/tensorzero-core/src/endpoints/openai_compatible/types/chat_completions.rs
index 065c98c232..145653406f 100644
--- a/tensorzero-core/src/endpoints/openai_compatible/types/chat_completions.rs
+++ b/tensorzero-core/src/endpoints/openai_compatible/types/chat_completions.rs
@@ -34,7 +34,7 @@ use crate::inference::types::extra_headers::UnfilteredInferenceExtraHeaders;
 use crate::inference::types::usage::{RawResponseEntry, RawUsageEntry};
 use crate::inference::types::{
     Arguments, ContentBlockChatOutput, FinishReason, Input, InputMessage, InputMessageContent,
-    RawText, Role, System, Template, Text, Thought, current_timestamp,
+    RawText, Role, System, Template, Text, Thought, Unknown, current_timestamp,
 };
 use crate::tool::{DynamicToolParams, ProviderTool, ToolResult};
 use crate::variant::JsonMode;
@@ -58,7 +58,7 @@ pub struct OpenAICompatibleAssistantMessage {
     pub content: Option<Value>,
     pub tool_calls: Option<Vec<OpenAICompatibleToolCall>>,
     #[serde(default)]
-    pub tensorzero_reasoning_content: Option<Vec<OpenAICompatibleInputThought>>,
+    pub tensorzero_extra_content_experimental: Option<Vec<InputExtraContentBlock>>,
 }
 
 #[derive(Clone, Debug, PartialEq, TensorZeroDeserialize)]
@@ -176,7 +176,7 @@ pub struct OpenAICompatibleResponseMessage {
     pub tool_calls: Option<Vec<OpenAICompatibleToolCall>>,
     pub role: String,
     #[serde(skip_serializing_if = "is_none_or_empty")]
-    pub tensorzero_reasoning_content: Option<Vec<OpenAICompatibleThought>>,
+    pub tensorzero_extra_content_experimental: Option<Vec<ExtraContentBlock>>,
 }
 
 #[derive(Clone, Debug, PartialEq, Serialize)]
@@ -235,20 +235,36 @@ fn is_none_or_empty<T>(v: &Option<Vec<T>>) -> bool {
     v.as_ref().is_none_or(Vec::is_empty)
 }
 
-/// OpenAI-compatible Thought for responses (with index for position in content array)
+/// Extra content block for OpenAI-compatible responses (Thought or Unknown)
 #[derive(Clone, Debug, PartialEq, Serialize)]
-pub struct OpenAICompatibleThought {
-    pub index: usize,
-    #[serde(flatten)]
-    pub thought: Thought,
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum ExtraContentBlock {
+    Thought {
+        insert_index: usize,
+        #[serde(flatten)]
+        thought: Thought,
+    },
+    Unknown {
+        insert_index: usize,
+        #[serde(flatten)]
+        unknown: Unknown,
+    },
 }
 
-/// OpenAI-compatible Thought for input (with optional index)
+/// Extra content block for OpenAI-compatible input (with optional insert_index)
 #[derive(Clone, Debug, Deserialize, PartialEq)]
-pub struct OpenAICompatibleInputThought {
-    pub index: Option<usize>,
-    #[serde(flatten)]
-    pub thought: Thought,
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum InputExtraContentBlock {
+    Thought {
+        insert_index: Option<usize>,
+        #[serde(flatten)]
+        thought: Thought,
+    },
+    Unknown {
+        insert_index: Option<usize>,
+        #[serde(flatten)]
+        unknown: Unknown,
+    },
 }
 
 // ============================================================================
@@ -579,29 +595,48 @@ pub fn openai_messages_to_input(
                         message_content.push(InputMessageContent::ToolCall(tool_call.into()));
                     }
                 }
-                // Process reasoning content with index-based insertion
-                if let Some(thoughts) = msg.tensorzero_reasoning_content {
-                    // Collect indexed thoughts (with explicit index) and unindexed thoughts separately
-                    let mut indexed: Vec<(usize, Thought)> = Vec::new();
-                    let mut unindexed: Vec<Thought> = Vec::new();
-                    for input_thought in thoughts {
-                        match input_thought.index {
-                            Some(idx) => indexed.push((idx, input_thought.thought)),
-                            None => unindexed.push(input_thought.thought),
+                // Process extra content with index-based insertion
+                if let Some(extra_content) = msg.tensorzero_extra_content_experimental {
+                    // First pass: items with insert_index (in input order)
+                    for block in &extra_content {
+                        match block {
+                            InputExtraContentBlock::Thought {
+                                insert_index: Some(idx),
+                                thought,
+                            } => {
+                                let idx = (*idx).min(message_content.len());
+                                message_content
+                                    .insert(idx, InputMessageContent::Thought(thought.clone()));
+                            }
+                            InputExtraContentBlock::Unknown {
+                                insert_index: Some(idx),
+                                unknown,
+                            } => {
+                                let idx = (*idx).min(message_content.len());
+                                message_content
+                                    .insert(idx, InputMessageContent::Unknown(unknown.clone()));
+                            }
+                            _ => {} // Handle in second pass
                         }
                     }
 
-                    // First, insert thoughts with explicit indices at their specified positions
-                    // Sort by index to handle insertions correctly
-                    indexed.sort_by_key(|(idx, _)| *idx);
-                    for (idx, thought) in indexed {
-                        let idx = idx.min(message_content.len());
-                        message_content.insert(idx, InputMessageContent::Thought(thought));
-                    }
-
-                    // Then, prepend all thoughts without an index at position 0 (reverse to maintain order)
-                    for thought in unindexed.into_iter().rev() {
-                        message_content.insert(0, InputMessageContent::Thought(thought));
+                    // Second pass: items without insert_index (append to end)
+                    for block in extra_content {
+                        match block {
+                            InputExtraContentBlock::Thought {
+                                insert_index: None,
+                                thought,
+                            } => {
+                                message_content.push(InputMessageContent::Thought(thought));
+                            }
+                            InputExtraContentBlock::Unknown {
+                                insert_index: None,
+                                unknown,
+                            } => {
+                                message_content.push(InputMessageContent::Unknown(unknown));
+                            }
+                            _ => {} // Already handled
+                        }
                     }
                 }
                 messages.push(InputMessage {
@@ -741,7 +776,7 @@ impl From<(InferenceResponse, String, bool, bool)> for OpenAICompatibleResponse
     ) -> Self {
         match inference_response {
             InferenceResponse::Chat(response) => {
-                let (content, tool_calls, thoughts) = process_chat_content(response.content);
+                let (content, tool_calls, extra_content) = process_chat_content(response.content);
                 let tensorzero_original_response = if include_original_response {
                     response.original_response
                 } else {
@@ -766,10 +801,10 @@ impl From<(InferenceResponse, String, bool, bool)> for OpenAICompatibleResponse
                                 Some(tool_calls)
                             },
                             role: "assistant".to_string(),
-                            tensorzero_reasoning_content: if thoughts.is_empty() {
+                            tensorzero_extra_content_experimental: if extra_content.is_empty() {
                                 None
                             } else {
-                                Some(thoughts)
+                                Some(extra_content)
                             },
                         },
                     }],
@@ -806,7 +841,7 @@ impl From<(InferenceResponse, String, bool, bool)> for OpenAICompatibleResponse
                             content: response.output.raw,
                             tool_calls: None,
                             role: "assistant".to_string(),
-                            tensorzero_reasoning_content: None,
+                            tensorzero_extra_content_experimental: None,
                         },
                     }],
                     created: current_timestamp() as u32,
@@ -826,19 +861,19 @@ impl From<(InferenceResponse, String, bool, bool)> for OpenAICompatibleResponse
 }
 
 /// Takes a vector of ContentBlockOutput and returns a tuple of
-/// (Option<String>, Vec<OpenAICompatibleToolCall>, Vec<OpenAICompatibleThought>).
-/// This is useful since the OpenAI format separates text, tool calls, and reasoning in the response fields.
+/// (Option<String>, Vec<OpenAICompatibleToolCall>, Vec<ExtraContentBlock>).
+/// This is useful since the OpenAI format separates text, tool calls, and extra content in the response fields.
 pub fn process_chat_content(
     content: Vec<ContentBlockChatOutput>,
 ) -> (
     Option<String>,
     Vec<OpenAICompatibleToolCall>,
-    Vec<OpenAICompatibleThought>,
+    Vec<ExtraContentBlock>,
 ) {
     let mut content_str: Option<String> = None;
     let mut tool_calls = Vec::new();
-    let mut thoughts = Vec::new();
-    for (index, block) in content.into_iter().enumerate() {
+    let mut extra_content = Vec::new();
+    for (insert_index, block) in content.into_iter().enumerate() {
         match block {
             ContentBlockChatOutput::Text(text) => match content_str {
                 Some(ref mut content) => content.push_str(&text.text),
@@ -848,16 +883,20 @@ pub fn process_chat_content(
                 tool_calls.push(tool_call.into());
             }
             ContentBlockChatOutput::Thought(thought) => {
-                thoughts.push(OpenAICompatibleThought { index, thought });
+                extra_content.push(ExtraContentBlock::Thought {
+                    insert_index,
+                    thought,
+                });
             }
-            ContentBlockChatOutput::Unknown(_) => {
-                tracing::warn!(
-                    "Ignoring 'unknown' content block when constructing OpenAI-compatible response"
-                );
+            ContentBlockChatOutput::Unknown(unknown) => {
+                extra_content.push(ExtraContentBlock::Unknown {
+                    insert_index,
+                    unknown,
+                });
             }
         }
     }
-    (content_str, tool_calls, thoughts)
+    (content_str, tool_calls, extra_content)
 }
 
 #[cfg(test)]
@@ -926,7 +965,7 @@ mod tests {
                         arguments: "{}".to_string(),
                     },
                 }]),
-                tensorzero_reasoning_content: None,
+                tensorzero_extra_content_experimental: None,
             }),
             OpenAICompatibleMessage::Assistant(OpenAICompatibleAssistantMessage {
                 content: None,
@@ -938,7 +977,7 @@ mod tests {
                         arguments: "{}".to_string(),
                     },
                 }]),
-                tensorzero_reasoning_content: None,
+                tensorzero_extra_content_experimental: None,
             }),
             OpenAICompatibleMessage::Assistant(OpenAICompatibleAssistantMessage {
                 content: None,
@@ -950,7 +989,7 @@ mod tests {
                         arguments: "{}".to_string(),
                     },
                 }]),
-                tensorzero_reasoning_content: None,
+                tensorzero_extra_content_experimental: None,
             }),
             OpenAICompatibleMessage::Tool(OpenAICompatibleToolMessage {
                 content: Some(Value::String("Tool result 1".to_string())),
@@ -1134,7 +1173,7 @@ mod tests {
                     "city": "Tokyo",
                 }])),
                 tool_calls: None,
-                tensorzero_reasoning_content: None,
+                tensorzero_extra_content_experimental: None,
             },
         )];
         let input: Input = openai_messages_to_input(messages).unwrap();
@@ -1168,7 +1207,7 @@ mod tests {
                         arguments: "{}".to_string(),
                     },
                 }]),
-                tensorzero_reasoning_content: None,
+                tensorzero_extra_content_experimental: None,
             },
         )];
         let input: Input = openai_messages_to_input(messages).unwrap();
@@ -1202,7 +1241,7 @@ mod tests {
             OpenAICompatibleMessage::Assistant(OpenAICompatibleAssistantMessage {
                 content: Some(Value::String("Assistant message".to_string())),
                 tool_calls: None,
-                tensorzero_reasoning_content: None,
+                tensorzero_extra_content_experimental: None,
             }),
             OpenAICompatibleMessage::System(OpenAICompatibleSystemMessage {
                 content: Value::String("System message".to_string()),
@@ -1692,7 +1731,7 @@ mod tests {
     }
 
     #[test]
-    fn test_process_chat_content_with_thoughts() {
+    fn test_process_chat_content_with_extra_content() {
         let content = vec![
             ContentBlockChatOutput::Thought(Thought {
                 text: Some("Let me think about this...".to_string()),
@@ -1704,6 +1743,11 @@ mod tests {
             ContentBlockChatOutput::Text(Text {
                 text: "Hello".to_string(),
             }),
+            ContentBlockChatOutput::Unknown(Unknown {
+                data: serde_json::json!({"custom": "data"}),
+                model_name: Some("test_model".to_string()),
+                provider_name: None,
+            }),
             ContentBlockChatOutput::Thought(Thought {
                 text: Some("Another thought".to_string()),
                 signature: None,
@@ -1712,47 +1756,74 @@ mod tests {
                 extra_data: None,
             }),
         ];
-        let (content_str, tool_calls, thoughts) = process_chat_content(content);
+        let (content_str, tool_calls, extra_content) = process_chat_content(content);
         assert_eq!(
             content_str,
             Some("Hello".to_string()),
             "Text content should be extracted"
         );
         assert!(tool_calls.is_empty(), "No tool calls expected");
-        assert_eq!(thoughts.len(), 2, "Should have two thoughts");
-
-        // First thought at index 0
-        assert_eq!(thoughts[0].index, 0, "First thought should have index 0");
-        assert_eq!(
-            thoughts[0].thought.text,
-            Some("Let me think about this...".to_string())
-        );
-        assert_eq!(thoughts[0].thought.signature, Some("sig123".to_string()));
         assert_eq!(
-            thoughts[0].thought.provider_type,
-            Some("anthropic".to_string())
+            extra_content.len(),
+            3,
+            "Should have three extra content blocks"
         );
 
-        // Second thought at index 2
-        assert_eq!(thoughts[1].index, 2, "Second thought should have index 2");
-        assert_eq!(
-            thoughts[1].thought.text,
-            Some("Another thought".to_string())
-        );
-        assert_eq!(thoughts[1].thought.signature, None);
+        // First: Thought at insert_index 0
+        match &extra_content[0] {
+            ExtraContentBlock::Thought {
+                insert_index,
+                thought,
+            } => {
+                assert_eq!(*insert_index, 0, "First thought should have insert_index 0");
+                assert_eq!(thought.text, Some("Let me think about this...".to_string()));
+                assert_eq!(thought.signature, Some("sig123".to_string()));
+                assert_eq!(thought.provider_type, Some("anthropic".to_string()));
+            }
+            ExtraContentBlock::Unknown { .. } => panic!("Expected Thought at position 0"),
+        }
+
+        // Second: Unknown at insert_index 2
+        match &extra_content[1] {
+            ExtraContentBlock::Unknown {
+                insert_index,
+                unknown,
+            } => {
+                assert_eq!(*insert_index, 2, "Unknown should have insert_index 2");
+                assert_eq!(unknown.data, serde_json::json!({"custom": "data"}));
+                assert_eq!(unknown.model_name, Some("test_model".to_string()));
+            }
+            ExtraContentBlock::Thought { .. } => panic!("Expected Unknown at position 1"),
+        }
+
+        // Third: Thought at insert_index 3
+        match &extra_content[2] {
+            ExtraContentBlock::Thought {
+                insert_index,
+                thought,
+            } => {
+                assert_eq!(
+                    *insert_index, 3,
+                    "Second thought should have insert_index 3"
+                );
+                assert_eq!(thought.text, Some("Another thought".to_string()));
+                assert_eq!(thought.signature, None);
+            }
+            ExtraContentBlock::Unknown { .. } => panic!("Expected Thought at position 2"),
+        }
     }
 
     #[test]
-    fn test_input_reasoning_content_ordering() {
-        // Test with indexed and unindexed thoughts
+    fn test_input_extra_content_ordering() {
+        // Test with indexed and unindexed extra content blocks
         let messages = vec![OpenAICompatibleMessage::Assistant(
             OpenAICompatibleAssistantMessage {
                 content: Some(Value::String("Response text".to_string())),
                 tool_calls: None,
-                tensorzero_reasoning_content: Some(vec![
-                    // Unindexed thought - should be prepended
-                    OpenAICompatibleInputThought {
-                        index: None,
+                tensorzero_extra_content_experimental: Some(vec![
+                    // Unindexed thought - should be appended
+                    InputExtraContentBlock::Thought {
+                        insert_index: None,
                         thought: Thought {
                             text: Some("Unindexed thought 1".to_string()),
                             signature: None,
@@ -1762,8 +1833,8 @@ mod tests {
                         },
                     },
                     // Indexed thought at position 2
-                    OpenAICompatibleInputThought {
-                        index: Some(2),
+                    InputExtraContentBlock::Thought {
+                        insert_index: Some(2),
                         thought: Thought {
                             text: Some("Indexed thought at 2".to_string()),
                             signature: Some("sig456".to_string()),
@@ -1772,20 +1843,18 @@ mod tests {
                             extra_data: None,
                         },
                     },
-                    // Another unindexed thought - should be prepended
-                    OpenAICompatibleInputThought {
-                        index: None,
-                        thought: Thought {
-                            text: Some("Unindexed thought 2".to_string()),
-                            signature: None,
-                            summary: None,
-                            provider_type: None,
-                            extra_data: None,
+                    // Unindexed unknown - should be appended
+                    InputExtraContentBlock::Unknown {
+                        insert_index: None,
+                        unknown: Unknown {
+                            data: serde_json::json!({"test": "data"}),
+                            model_name: None,
+                            provider_name: None,
                         },
                     },
                     // Indexed thought at position 0
-                    OpenAICompatibleInputThought {
-                        index: Some(0),
+                    InputExtraContentBlock::Thought {
+                        insert_index: Some(0),
                         thought: Thought {
                             text: Some("Indexed thought at 0".to_string()),
                             signature: None,
@@ -1803,12 +1872,12 @@ mod tests {
 
         // Expected order based on the algorithm:
         // 1. Start with text content: ["Response text"]
-        // 2. Apply indexed thoughts (sorted by index):
-        //    - Insert at 0: ["Indexed thought at 0", "Response text"]
-        //    - Insert at 2 (capped to len=2): ["Indexed thought at 0", "Response text", "Indexed thought at 2"]
-        // 3. Prepend unindexed thoughts (reversed to maintain order):
-        //    - Prepend "Unindexed thought 2": ["Unindexed thought 2", "Indexed thought at 0", "Response text", "Indexed thought at 2"]
-        //    - Prepend "Unindexed thought 1": ["Unindexed thought 1", "Unindexed thought 2", "Indexed thought at 0", "Response text", "Indexed thought at 2"]
+        // 2. First pass - items with insert_index (in input order):
+        //    - "Indexed thought at 2" insert at min(2, 1)=1: ["Response text", "Indexed at 2"]
+        //    - "Indexed thought at 0" insert at min(0, 2)=0: ["Indexed at 0", "Response text", "Indexed at 2"]
+        // 3. Second pass - items without insert_index (append to end):
+        //    - Append "Unindexed thought 1": ["Indexed at 0", "Response text", "Indexed at 2", "Unindexed 1"]
+        //    - Append Unknown: ["Indexed at 0", "Response text", "Indexed at 2", "Unindexed 1", Unknown]
         assert_eq!(content.len(), 5, "Should have 5 content blocks");
 
         // Verify content order
@@ -1816,44 +1885,24 @@ mod tests {
             InputMessageContent::Thought(t) => {
                 assert_eq!(
                     t.text,
-                    Some("Unindexed thought 1".to_string()),
-                    "First should be unindexed thought 1"
+                    Some("Indexed thought at 0".to_string()),
+                    "First should be indexed thought at 0"
                 );
             }
             _ => panic!("Expected Thought at position 0"),
         }
         match &content[1] {
-            InputMessageContent::Thought(t) => {
-                assert_eq!(
-                    t.text,
-                    Some("Unindexed thought 2".to_string()),
-                    "Second should be unindexed thought 2"
-                );
-            }
-            _ => panic!("Expected Thought at position 1"),
-        }
-        match &content[2] {
-            InputMessageContent::Thought(t) => {
-                assert_eq!(
-                    t.text,
-                    Some("Indexed thought at 0".to_string()),
-                    "Third should be indexed thought at 0"
-                );
-            }
-            _ => panic!("Expected Thought at position 2"),
-        }
-        match &content[3] {
             InputMessageContent::Text(t) => {
-                assert_eq!(t.text, "Response text", "Fourth should be the text content");
+                assert_eq!(t.text, "Response text", "Second should be the text content");
             }
-            _ => panic!("Expected Text at position 3"),
+            _ => panic!("Expected Text at position 1"),
         }
-        match &content[4] {
+        match &content[2] {
             InputMessageContent::Thought(t) => {
                 assert_eq!(
                     t.text,
                     Some("Indexed thought at 2".to_string()),
-                    "Fifth should be indexed thought at 2"
+                    "Third should be indexed thought at 2"
                 );
                 assert_eq!(
                     t.signature,
@@ -1866,42 +1915,100 @@ mod tests {
                     "Should preserve provider_type"
                 );
             }
-            _ => panic!("Expected Thought at position 4"),
+            _ => panic!("Expected Thought at position 2"),
+        }
+        match &content[3] {
+            InputMessageContent::Thought(t) => {
+                assert_eq!(
+                    t.text,
+                    Some("Unindexed thought 1".to_string()),
+                    "Fourth should be unindexed thought 1 (appended)"
+                );
+            }
+            _ => panic!("Expected Thought at position 3"),
+        }
+        match &content[4] {
+            InputMessageContent::Unknown(u) => {
+                assert_eq!(
+                    u.data,
+                    serde_json::json!({"test": "data"}),
+                    "Fifth should be the unknown block (appended)"
+                );
+            }
+            _ => panic!("Expected Unknown at position 4"),
         }
     }
 
     #[test]
-    fn test_openai_compatible_input_thought_deserialization() {
-        // Test that serde(flatten) works correctly for input thoughts
-        let json_with_index = json!({
-            "index": 5,
+    fn test_input_extra_content_block_deserialization() {
+        // Test Thought variant deserialization with insert_index
+        let json_thought_with_index = json!({
+            "type": "thought",
+            "insert_index": 5,
             "text": "My thought",
             "signature": "sig789",
             "provider_type": "anthropic"
         });
-        let thought: OpenAICompatibleInputThought =
-            serde_json::from_value(json_with_index).unwrap();
-        assert_eq!(thought.index, Some(5));
-        assert_eq!(thought.thought.text, Some("My thought".to_string()));
-        assert_eq!(thought.thought.signature, Some("sig789".to_string()));
-        assert_eq!(thought.thought.provider_type, Some("anthropic".to_string()));
-
-        // Test without index
-        let json_without_index = json!({
+        let block: InputExtraContentBlock =
+            serde_json::from_value(json_thought_with_index).unwrap();
+        match block {
+            InputExtraContentBlock::Thought {
+                insert_index,
+                thought,
+            } => {
+                assert_eq!(insert_index, Some(5));
+                assert_eq!(thought.text, Some("My thought".to_string()));
+                assert_eq!(thought.signature, Some("sig789".to_string()));
+                assert_eq!(thought.provider_type, Some("anthropic".to_string()));
+            }
+            InputExtraContentBlock::Unknown { .. } => panic!("Expected Thought variant"),
+        }
+
+        // Test Thought variant deserialization without insert_index
+        let json_thought_without_index = json!({
+            "type": "thought",
             "text": "Another thought"
         });
-        let thought: OpenAICompatibleInputThought =
-            serde_json::from_value(json_without_index).unwrap();
-        assert_eq!(thought.index, None);
-        assert_eq!(thought.thought.text, Some("Another thought".to_string()));
-        assert_eq!(thought.thought.signature, None);
+        let block: InputExtraContentBlock =
+            serde_json::from_value(json_thought_without_index).unwrap();
+        match block {
+            InputExtraContentBlock::Thought {
+                insert_index,
+                thought,
+            } => {
+                assert_eq!(insert_index, None);
+                assert_eq!(thought.text, Some("Another thought".to_string()));
+                assert_eq!(thought.signature, None);
+            }
+            InputExtraContentBlock::Unknown { .. } => panic!("Expected Thought variant"),
+        }
+
+        // Test Unknown variant deserialization
+        let json_unknown = json!({
+            "type": "unknown",
+            "insert_index": 2,
+            "data": {"custom": "value"},
+            "model_name": "test_model"
+        });
+        let block: InputExtraContentBlock = serde_json::from_value(json_unknown).unwrap();
+        match block {
+            InputExtraContentBlock::Unknown {
+                insert_index,
+                unknown,
+            } => {
+                assert_eq!(insert_index, Some(2));
+                assert_eq!(unknown.data, json!({"custom": "value"}));
+                assert_eq!(unknown.model_name, Some("test_model".to_string()));
+            }
+            InputExtraContentBlock::Thought { .. } => panic!("Expected Unknown variant"),
+        }
     }
 
     #[test]
-    fn test_openai_compatible_thought_serialization() {
-        // Test that serde(flatten) works correctly for output thoughts
-        let thought = OpenAICompatibleThought {
-            index: 3,
+    fn test_extra_content_block_serialization() {
+        // Test Thought variant serialization
+        let thought_block = ExtraContentBlock::Thought {
+            insert_index: 3,
             thought: Thought {
                 text: Some("Thinking...".to_string()),
                 signature: Some("sig_abc".to_string()),
@@ -1910,14 +2017,29 @@ mod tests {
                 extra_data: None,
             },
         };
-        let json = serde_json::to_value(&thought).unwrap();
+        let json = serde_json::to_value(&thought_block).unwrap();
 
-        // With flatten, all fields should be at the top level
-        assert_eq!(json["index"], 3);
+        // With tagged enum + flatten, type tag and flattened fields at top level
+        assert_eq!(json["type"], "thought");
+        assert_eq!(json["insert_index"], 3);
         assert_eq!(json["text"], "Thinking...");
         assert_eq!(json["signature"], "sig_abc");
         assert_eq!(json["provider_type"], "anthropic");
-        // summary should not be present since it's None and has skip_serializing_if
-        assert!(json.get("summary").is_none() || json["summary"].is_null());
+
+        // Test Unknown variant serialization
+        let unknown_block = ExtraContentBlock::Unknown {
+            insert_index: 5,
+            unknown: Unknown {
+                data: json!({"custom": "data"}),
+                model_name: Some("test_model".to_string()),
+                provider_name: None,
+            },
+        };
+        let json = serde_json::to_value(&unknown_block).unwrap();
+
+        assert_eq!(json["type"], "unknown");
+        assert_eq!(json["insert_index"], 5);
+        assert_eq!(json["data"], json!({"custom": "data"}));
+        assert_eq!(json["model_name"], "test_model");
     }
 }
diff --git a/tensorzero-core/src/endpoints/openai_compatible/types/streaming.rs b/tensorzero-core/src/endpoints/openai_compatible/types/streaming.rs
index b23fb6afdb..b2396bfd9c 100644
--- a/tensorzero-core/src/endpoints/openai_compatible/types/streaming.rs
+++ b/tensorzero-core/src/endpoints/openai_compatible/types/streaming.rs
@@ -11,7 +11,7 @@ use std::collections::HashMap;
 use tokio_stream::StreamExt;
 
 use crate::error::{Error, ErrorDetails};
-use crate::inference::types::streams::ThoughtChunk;
+use crate::inference::types::streams::{ThoughtChunk, UnknownChunk};
 use crate::inference::types::usage::{RawResponseEntry, RawUsageEntry};
 use crate::inference::types::{ContentBlockChunk, FinishReason, current_timestamp};
 
@@ -61,12 +61,20 @@ fn is_none_or_empty<T>(v: &Option<Vec<T>>) -> bool {
     v.as_ref().is_none_or(Vec::is_empty)
 }
 
-/// OpenAI-compatible ThoughtChunk for streaming responses (with index for position in content array)
+/// Extra content block chunk for streaming responses (Thought or Unknown)
 #[derive(Clone, Debug, PartialEq, Serialize)]
-pub struct OpenAICompatibleThoughtChunk {
-    pub index: usize,
-    #[serde(flatten)]
-    pub chunk: ThoughtChunk,
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum ExtraContentBlockChunk {
+    Thought {
+        insert_index: usize,
+        #[serde(flatten)]
+        chunk: ThoughtChunk,
+    },
+    Unknown {
+        insert_index: usize,
+        #[serde(flatten)]
+        chunk: UnknownChunk,
+    },
 }
 
 #[derive(Clone, Debug, PartialEq, Serialize)]
@@ -78,14 +86,14 @@ pub struct OpenAICompatibleDelta {
     #[serde(skip_serializing_if = "is_none_or_empty")]
     pub tool_calls: Option<Vec<OpenAICompatibleToolCallChunk>>,
     #[serde(skip_serializing_if = "is_none_or_empty")]
-    pub tensorzero_reasoning_content: Option<Vec<OpenAICompatibleThoughtChunk>>,
+    pub tensorzero_extra_content_experimental: Option<Vec<ExtraContentBlockChunk>>,
 }
 
 #[expect(clippy::too_many_arguments)]
 pub fn convert_inference_response_chunk_to_openai_compatible(
     chunk: InferenceResponseChunk,
     tool_id_to_index: &mut HashMap<String, usize>,
-    thought_id_to_index: &mut HashMap<String, usize>,
+    extra_id_to_index: &mut HashMap<String, usize>,
     response_model_prefix: &str,
     is_first_chunk: bool,
     include_usage: bool,
@@ -102,8 +110,8 @@ pub fn convert_inference_response_chunk_to_openai_compatible(
 
     let response_chunk = match chunk {
         InferenceResponseChunk::Chat(c) => {
-            let (content, tool_calls, thoughts) =
-                process_chat_content_chunk(c.content, tool_id_to_index, thought_id_to_index);
+            let (content, tool_calls, extra_content) =
+                process_chat_content_chunk(c.content, tool_id_to_index, extra_id_to_index);
             let usage = if include_usage {
                 c.usage.map(OpenAICompatibleUsage::from)
             } else {
@@ -141,10 +149,10 @@ pub fn convert_inference_response_chunk_to_openai_compatible(
                         } else {
                             Some(tool_calls)
                         },
-                        tensorzero_reasoning_content: if thoughts.is_empty() {
+                        tensorzero_extra_content_experimental: if extra_content.is_empty() {
                             None
                         } else {
-                            Some(thoughts)
+                            Some(extra_content)
                         },
                     },
                 }],
@@ -194,7 +202,7 @@ pub fn convert_inference_response_chunk_to_openai_compatible(
                         role,
                         content: Some(c.raw),
                         tool_calls: None,
-                        tensorzero_reasoning_content: None,
+                        tensorzero_extra_content_experimental: None,
                     },
                 }],
                 created: current_timestamp() as u32,
@@ -217,15 +225,15 @@ pub fn convert_inference_response_chunk_to_openai_compatible(
 pub fn process_chat_content_chunk(
     content: Vec<ContentBlockChunk>,
     tool_id_to_index: &mut HashMap<String, usize>,
-    thought_id_to_index: &mut HashMap<String, usize>,
+    extra_id_to_index: &mut HashMap<String, usize>,
 ) -> (
     Option<String>,
     Vec<OpenAICompatibleToolCallChunk>,
-    Vec<OpenAICompatibleThoughtChunk>,
+    Vec<ExtraContentBlockChunk>,
 ) {
     let mut content_str: Option<String> = None;
     let mut tool_calls = Vec::new();
-    let mut thoughts = Vec::new();
+    let mut extra_content = Vec::new();
     for block in content {
         match block {
             ContentBlockChunk::Text(text) => match content_str {
@@ -246,24 +254,25 @@ pub fn process_chat_content_chunk(
                     },
                 });
             }
-            ContentBlockChunk::Thought(thought) => {
-                let len = thought_id_to_index.len();
-                let index = *thought_id_to_index.entry(thought.id.clone()).or_insert(len);
-                thoughts.push(OpenAICompatibleThoughtChunk {
-                    index,
-                    chunk: thought,
+            ContentBlockChunk::Thought(chunk) => {
+                let len = extra_id_to_index.len();
+                let insert_index = *extra_id_to_index.entry(chunk.id.clone()).or_insert(len);
+                extra_content.push(ExtraContentBlockChunk::Thought {
+                    insert_index,
+                    chunk,
                 });
             }
-            ContentBlockChunk::Unknown(_) => {
-                // OpenAI compatible endpoint does not support unknown blocks
-                // Users of this endpoint will need to check observability to see them
-                tracing::warn!(
-                    "Ignoring 'unknown' content block chunk when constructing OpenAI-compatible response"
-                );
+            ContentBlockChunk::Unknown(chunk) => {
+                let len = extra_id_to_index.len();
+                let insert_index = *extra_id_to_index.entry(chunk.id.clone()).or_insert(len);
+                extra_content.push(ExtraContentBlockChunk::Unknown {
+                    insert_index,
+                    chunk,
+                });
             }
         }
     }
-    (content_str, tool_calls, thoughts)
+    (content_str, tool_calls, extra_content)
 }
 
 /// Prepares an Event for SSE on the way out of the gateway.
@@ -279,7 +288,7 @@ pub fn prepare_serialized_openai_compatible_events(
 ) -> impl Stream<Item = Result<Event, Error>> {
     async_stream::stream! {
         let mut tool_id_to_index = HashMap::new();
-        let mut thought_id_to_index = HashMap::new();
+        let mut extra_id_to_index = HashMap::new();
         let mut is_first_chunk = true;
 
         while let Some(chunk) = stream.next().await {
@@ -292,7 +301,7 @@ pub fn prepare_serialized_openai_compatible_events(
             let openai_compatible_chunks = convert_inference_response_chunk_to_openai_compatible(
                 chunk,
                 &mut tool_id_to_index,
-                &mut thought_id_to_index,
+                &mut extra_id_to_index,
                 &response_model_prefix,
                 is_first_chunk,
                 include_usage,
@@ -345,11 +354,11 @@ mod tests {
         });
 
         let mut tool_id_to_index = HashMap::new();
-        let mut thought_id_to_index = HashMap::new();
+        let mut extra_id_to_index = HashMap::new();
         let result = convert_inference_response_chunk_to_openai_compatible(
             chunk,
             &mut tool_id_to_index,
-            &mut thought_id_to_index,
+            &mut extra_id_to_index,
             "test_prefix::",
             true,
             true,  // include_usage
@@ -403,11 +412,11 @@ mod tests {
         });
 
         let mut tool_id_to_index = HashMap::new();
-        let mut thought_id_to_index = HashMap::new();
+        let mut extra_id_to_index = HashMap::new();
         let result = convert_inference_response_chunk_to_openai_compatible(
             chunk,
             &mut tool_id_to_index,
-            &mut thought_id_to_index,
+            &mut extra_id_to_index,
             "test_prefix::",
             true,
             true,  // include_usage
@@ -452,11 +461,11 @@ mod tests {
         });
 
         let mut tool_id_to_index = HashMap::new();
-        let mut thought_id_to_index = HashMap::new();
+        let mut extra_id_to_index = HashMap::new();
         let result = convert_inference_response_chunk_to_openai_compatible(
             chunk,
             &mut tool_id_to_index,
-            &mut thought_id_to_index,
+            &mut extra_id_to_index,
             "test_prefix::",
             true,
             true,  // include_usage
@@ -511,11 +520,11 @@ mod tests {
         });
 
         let mut tool_id_to_index = HashMap::new();
-        let mut thought_id_to_index = HashMap::new();
+        let mut extra_id_to_index = HashMap::new();
         let result = convert_inference_response_chunk_to_openai_compatible(
             chunk,
             &mut tool_id_to_index,
-            &mut thought_id_to_index,
+            &mut extra_id_to_index,
             "test_prefix::",
             true,
             true,  // include_usage
@@ -553,11 +562,11 @@ mod tests {
         });
 
         let mut tool_id_to_index = HashMap::new();
-        let mut thought_id_to_index = HashMap::new();
+        let mut extra_id_to_index = HashMap::new();
         let result = convert_inference_response_chunk_to_openai_compatible(
             chunk,
             &mut tool_id_to_index,
-            &mut thought_id_to_index,
+            &mut extra_id_to_index,
             "test_prefix::",
             true,
             true,  // include_usage
@@ -606,11 +615,11 @@ mod tests {
         });
 
         let mut tool_id_to_index = HashMap::new();
-        let mut thought_id_to_index = HashMap::new();
+        let mut extra_id_to_index = HashMap::new();
         let result = convert_inference_response_chunk_to_openai_compatible(
             chunk,
             &mut tool_id_to_index,
-            &mut thought_id_to_index,
+            &mut extra_id_to_index,
             "test_prefix::",
             true,
             false, // include_usage = false
@@ -650,11 +659,11 @@ mod tests {
         });
 
         let mut tool_id_to_index = HashMap::new();
-        let mut thought_id_to_index = HashMap::new();
+        let mut extra_id_to_index = HashMap::new();
         let result = convert_inference_response_chunk_to_openai_compatible(
             chunk,
             &mut tool_id_to_index,
-            &mut thought_id_to_index,
+            &mut extra_id_to_index,
             "test_prefix::",
             true,
             false, // include_usage
@@ -699,11 +708,11 @@ mod tests {
         });
 
         let mut tool_id_to_index = HashMap::new();
-        let mut thought_id_to_index = HashMap::new();
+        let mut extra_id_to_index = HashMap::new();
         let result = convert_inference_response_chunk_to_openai_compatible(
             chunk,
             &mut tool_id_to_index,
-            &mut thought_id_to_index,
+            &mut extra_id_to_index,
             "test_prefix::",
             true,
             false, // include_usage
@@ -740,11 +749,11 @@ mod tests {
         });
 
         let mut tool_id_to_index = HashMap::new();
-        let mut thought_id_to_index = HashMap::new();
+        let mut extra_id_to_index = HashMap::new();
         let result = convert_inference_response_chunk_to_openai_compatible(
             chunk,
             &mut tool_id_to_index,
-            &mut thought_id_to_index,
+            &mut extra_id_to_index,
             "test_prefix::",
             true,
             false, // include_usage
@@ -784,23 +793,23 @@ mod tests {
             }),
         ];
         let mut tool_id_to_index = HashMap::new();
-        let mut thought_id_to_index = HashMap::new();
-        let (content_str, tool_calls, thoughts) =
-            process_chat_content_chunk(content, &mut tool_id_to_index, &mut thought_id_to_index);
+        let mut extra_id_to_index = HashMap::new();
+        let (content_str, tool_calls, extra_content) =
+            process_chat_content_chunk(content, &mut tool_id_to_index, &mut extra_id_to_index);
         assert_eq!(content_str, Some("Hello, world!".to_string()));
         assert_eq!(tool_calls.len(), 1);
         assert_eq!(tool_calls[0].id, Some("1".to_string()));
         assert_eq!(tool_calls[0].index, 0);
         assert_eq!(tool_calls[0].function.name, "test_tool".to_string());
         assert_eq!(tool_calls[0].function.arguments, "{}");
-        assert!(thoughts.is_empty());
+        assert!(extra_content.is_empty());
 
         let content: Vec<ContentBlockChunk> = vec![];
-        let (content_str, tool_calls, thoughts) =
-            process_chat_content_chunk(content, &mut tool_id_to_index, &mut thought_id_to_index);
+        let (content_str, tool_calls, extra_content) =
+            process_chat_content_chunk(content, &mut tool_id_to_index, &mut extra_id_to_index);
         assert_eq!(content_str, None);
         assert!(tool_calls.is_empty());
-        assert!(thoughts.is_empty());
+        assert!(extra_content.is_empty());
 
         let content = vec![
             ContentBlockChunk::Text(TextChunk {
@@ -831,9 +840,9 @@ mod tests {
             }),
         ];
         let mut tool_id_to_index = HashMap::new();
-        let mut thought_id_to_index = HashMap::new();
-        let (content_str, tool_calls, thoughts) =
-            process_chat_content_chunk(content, &mut tool_id_to_index, &mut thought_id_to_index);
+        let mut extra_id_to_index = HashMap::new();
+        let (content_str, tool_calls, extra_content) =
+            process_chat_content_chunk(content, &mut tool_id_to_index, &mut extra_id_to_index);
         assert_eq!(
             content_str,
             Some("First part second part third part fourth part".to_string())
@@ -847,11 +856,11 @@ mod tests {
         assert_eq!(tool_calls[1].index, 1);
         assert_eq!(tool_calls[1].function.name, "last_tool".to_string());
         assert_eq!(tool_calls[1].function.arguments, "{\"key\": \"value\"}");
-        assert!(thoughts.is_empty());
+        assert!(extra_content.is_empty());
     }
 
     #[test]
-    fn test_process_chat_content_chunk_with_thoughts() {
+    fn test_process_chat_content_chunk_with_extra_content() {
         let content = vec![
             ContentBlockChunk::Thought(ThoughtChunk {
                 id: "thought_1".to_string(),
@@ -866,6 +875,12 @@ mod tests {
                 id: "text_1".to_string(),
                 text: "Response text".to_string(),
             }),
+            ContentBlockChunk::Unknown(UnknownChunk {
+                id: "unknown_1".to_string(),
+                data: serde_json::json!({"custom": "data"}),
+                model_name: Some("test_model".to_string()),
+                provider_name: None,
+            }),
             ContentBlockChunk::Thought(ThoughtChunk {
                 id: "thought_1".to_string(), // Same ID, new chunk
                 text: Some(" more thinking".to_string()),
@@ -887,9 +902,9 @@ mod tests {
         ];
 
         let mut tool_id_to_index = HashMap::new();
-        let mut thought_id_to_index = HashMap::new();
-        let (content_str, tool_calls, thoughts) =
-            process_chat_content_chunk(content, &mut tool_id_to_index, &mut thought_id_to_index);
+        let mut extra_id_to_index = HashMap::new();
+        let (content_str, tool_calls, extra_content) =
+            process_chat_content_chunk(content, &mut tool_id_to_index, &mut extra_id_to_index);
 
         assert_eq!(
             content_str,
@@ -897,55 +912,93 @@ mod tests {
             "Text content should be extracted"
         );
         assert!(tool_calls.is_empty(), "No tool calls expected");
-        assert_eq!(thoughts.len(), 3, "Should have three thought chunks");
-
-        // First thought chunk
         assert_eq!(
-            thoughts[0].index, 0,
-            "First thought should have index 0 (assigned based on thought_1)"
-        );
-        assert_eq!(thoughts[0].chunk.id, "thought_1");
-        assert_eq!(thoughts[0].chunk.text, Some("Let me think...".to_string()));
-        assert_eq!(thoughts[0].chunk.signature, Some("sig123".to_string()));
-        assert_eq!(
-            thoughts[0].chunk.provider_type,
-            Some("anthropic".to_string())
+            extra_content.len(),
+            4,
+            "Should have four extra content chunks"
         );
 
-        // Second thought chunk (same thought ID, should have same index)
-        assert_eq!(
-            thoughts[1].index, 0,
-            "Second chunk of thought_1 should still have index 0"
-        );
-        assert_eq!(thoughts[1].chunk.id, "thought_1");
-        assert_eq!(thoughts[1].chunk.text, Some(" more thinking".to_string()));
+        // First: Thought chunk
+        match &extra_content[0] {
+            ExtraContentBlockChunk::Thought {
+                insert_index,
+                chunk,
+            } => {
+                assert_eq!(*insert_index, 0, "First thought should have insert_index 0");
+                assert_eq!(chunk.id, "thought_1");
+                assert_eq!(chunk.text, Some("Let me think...".to_string()));
+                assert_eq!(chunk.signature, Some("sig123".to_string()));
+                assert_eq!(chunk.provider_type, Some("anthropic".to_string()));
+            }
+            ExtraContentBlockChunk::Unknown { .. } => panic!("Expected Thought at position 0"),
+        }
 
-        // Third thought chunk (new thought ID, should have new index)
-        assert_eq!(thoughts[2].index, 1, "New thought_2 should have index 1");
-        assert_eq!(thoughts[2].chunk.id, "thought_2");
-        assert_eq!(
-            thoughts[2].chunk.text,
-            Some("Different thought".to_string())
-        );
+        // Second: Unknown chunk
+        match &extra_content[1] {
+            ExtraContentBlockChunk::Unknown {
+                insert_index,
+                chunk,
+            } => {
+                assert_eq!(*insert_index, 1, "Unknown should have insert_index 1");
+                assert_eq!(chunk.id, "unknown_1");
+                assert_eq!(chunk.data, serde_json::json!({"custom": "data"}));
+                assert_eq!(chunk.model_name, Some("test_model".to_string()));
+            }
+            ExtraContentBlockChunk::Thought { .. } => panic!("Expected Unknown at position 1"),
+        }
 
-        // Verify the hashmap tracked the thought IDs correctly
+        // Third: Second Thought chunk (same ID, same insert_index)
+        match &extra_content[2] {
+            ExtraContentBlockChunk::Thought {
+                insert_index,
+                chunk,
+            } => {
+                assert_eq!(
+                    *insert_index, 0,
+                    "Second chunk of thought_1 should still have insert_index 0"
+                );
+                assert_eq!(chunk.id, "thought_1");
+                assert_eq!(chunk.text, Some(" more thinking".to_string()));
+            }
+            ExtraContentBlockChunk::Unknown { .. } => panic!("Expected Thought at position 2"),
+        }
+
+        // Fourth: New Thought chunk (new ID, new insert_index)
+        match &extra_content[3] {
+            ExtraContentBlockChunk::Thought {
+                insert_index,
+                chunk,
+            } => {
+                assert_eq!(*insert_index, 2, "New thought_2 should have insert_index 2");
+                assert_eq!(chunk.id, "thought_2");
+                assert_eq!(chunk.text, Some("Different thought".to_string()));
+            }
+            ExtraContentBlockChunk::Unknown { .. } => panic!("Expected Thought at position 3"),
+        }
+
+        // Verify the hashmap tracked the IDs correctly
         assert_eq!(
-            thought_id_to_index.get("thought_1"),
+            extra_id_to_index.get("thought_1"),
             Some(&0),
             "thought_1 should be mapped to index 0"
         );
         assert_eq!(
-            thought_id_to_index.get("thought_2"),
+            extra_id_to_index.get("unknown_1"),
             Some(&1),
-            "thought_2 should be mapped to index 1"
+            "unknown_1 should be mapped to index 1"
+        );
+        assert_eq!(
+            extra_id_to_index.get("thought_2"),
+            Some(&2),
+            "thought_2 should be mapped to index 2"
         );
     }
 
     #[test]
-    fn test_openai_compatible_thought_chunk_serialization() {
-        // Test that serde(flatten) works correctly for streaming thought chunks
-        let chunk = OpenAICompatibleThoughtChunk {
-            index: 2,
+    fn test_extra_content_block_chunk_serialization() {
+        // Test Thought variant serialization
+        let thought_chunk = ExtraContentBlockChunk::Thought {
+            insert_index: 2,
             chunk: ThoughtChunk {
                 id: "chunk_id".to_string(),
                 text: Some("Thinking...".to_string()),
@@ -956,15 +1009,34 @@ mod tests {
                 extra_data: None,
             },
         };
-        let json = serde_json::to_value(&chunk).unwrap();
+        let json = serde_json::to_value(&thought_chunk).unwrap();
 
-        // With flatten, all fields should be at the top level
-        assert_eq!(json["index"], 2);
+        // With tagged enum + flatten, type tag and flattened fields at top level
+        assert_eq!(json["type"], "thought");
+        assert_eq!(json["insert_index"], 2);
         assert_eq!(json["id"], "chunk_id");
         assert_eq!(json["text"], "Thinking...");
         assert_eq!(json["signature"], "sig_chunk");
         assert_eq!(json["summary_id"], "summary_1");
         assert_eq!(json["summary_text"], "Summary text");
         assert_eq!(json["provider_type"], "anthropic");
+
+        // Test Unknown variant serialization
+        let unknown_chunk = ExtraContentBlockChunk::Unknown {
+            insert_index: 5,
+            chunk: UnknownChunk {
+                id: "unknown_id".to_string(),
+                data: serde_json::json!({"custom": "data"}),
+                model_name: Some("test_model".to_string()),
+                provider_name: None,
+            },
+        };
+        let json = serde_json::to_value(&unknown_chunk).unwrap();
+
+        assert_eq!(json["type"], "unknown");
+        assert_eq!(json["insert_index"], 5);
+        assert_eq!(json["id"], "unknown_id");
+        assert_eq!(json["data"], serde_json::json!({"custom": "data"}));
+        assert_eq!(json["model_name"], "test_model");
     }
 }