diff --git a/src/models/chat.rs b/src/models/chat.rs index 6e0b767..63f4254 100644 --- a/src/models/chat.rs +++ b/src/models/chat.rs @@ -43,45 +43,6 @@ impl ReasoningConfig { Ok(()) } - - // For OpenAI/Azure - Direct passthrough (but prioritize max_tokens over effort) - pub fn to_openai_effort(&self) -> Option { - if self.max_tokens.is_some() { - // If max_tokens is specified, don't use effort for OpenAI - None - } else { - // Only return effort if it's not empty - self.effort - .as_ref() - .filter(|e| !e.trim().is_empty()) - .cloned() - } - } - - // For Vertex AI (Gemini) - Use max_tokens directly - pub fn to_gemini_thinking_budget(&self) -> Option { - self.max_tokens.map(|tokens| tokens as i32) - } - - // For Anthropic/Bedrock - Custom prompt generation (prioritize max_tokens over effort) - pub fn to_thinking_prompt(&self) -> Option { - if self.max_tokens.is_some() { - // If max_tokens is specified, use a generic thinking prompt - Some("Think through this step-by-step with detailed reasoning.".to_string()) - } else { - match self.effort.as_deref() { - Some(effort) if !effort.trim().is_empty() => match effort { - "high" => { - Some("Think through this step-by-step with detailed reasoning.".to_string()) - } - "medium" => Some("Consider this problem thoughtfully.".to_string()), - "low" => Some("Think about this briefly.".to_string()), - _ => None, - }, - _ => None, - } - } - } } #[derive(Deserialize, Serialize, Clone, ToSchema)] diff --git a/src/providers/anthropic/models.rs b/src/providers/anthropic/models.rs index 3828562..8d71742 100644 --- a/src/providers/anthropic/models.rs +++ b/src/providers/anthropic/models.rs @@ -90,7 +90,7 @@ impl From for AnthropicChatCompletionRequest { )) ); - let mut system = request + let system = request .messages .iter() .find(|msg| msg.role == "system") @@ -103,16 +103,6 @@ impl From for AnthropicChatCompletionRequest { _ => None, }); - // Add reasoning prompt if reasoning is requested - if let Some(reasoning_config) = &request.reasoning { - if let Some(thinking_prompt) = reasoning_config.to_thinking_prompt() { - system = Some(match system { - Some(existing) => format!("{}\n\n{}", existing, thinking_prompt), - None => thinking_prompt, - }); - } - } - let messages: Vec = request .messages .into_iter() diff --git a/src/providers/anthropic/provider.rs b/src/providers/anthropic/provider.rs index 0555999..6f06804 100644 --- a/src/providers/anthropic/provider.rs +++ b/src/providers/anthropic/provider.rs @@ -1,7 +1,6 @@ use async_trait::async_trait; use axum::http::StatusCode; use reqwest::Client; -use tracing::info; use super::models::{AnthropicChatCompletionRequest, AnthropicChatCompletionResponse}; use crate::config::models::{ModelConfig, Provider as ProviderConfig}; @@ -42,29 +41,9 @@ impl Provider for AnthropicProvider { ) -> Result { // Validate reasoning config if present if let Some(reasoning) = &payload.reasoning { - if let Err(e) = reasoning.validate() { - tracing::error!("Invalid reasoning config: {}", e); + if let Err(_e) = reasoning.validate() { return Err(StatusCode::BAD_REQUEST); } - - if let Some(max_tokens) = reasoning.max_tokens { - info!( - "✅ Anthropic reasoning enabled with max_tokens: {}", - max_tokens - ); - } else if let Some(thinking_prompt) = reasoning.to_thinking_prompt() { - info!( - "✅ Anthropic reasoning enabled with effort level: {:?} -> prompt: \"{}\"", - reasoning.effort, - thinking_prompt.chars().take(50).collect::() + "..." - ); - } else { - tracing::debug!( - "ℹ️ Anthropic reasoning config present but no valid parameters (effort: {:?}, max_tokens: {:?})", - reasoning.effort, - reasoning.max_tokens - ); - } } let request = AnthropicChatCompletionRequest::from(payload); diff --git a/src/providers/azure/provider.rs b/src/providers/azure/provider.rs index 0e81af4..feaea72 100644 --- a/src/providers/azure/provider.rs +++ b/src/providers/azure/provider.rs @@ -24,7 +24,20 @@ struct AzureChatCompletionRequest { impl From for AzureChatCompletionRequest { fn from(mut base: ChatCompletionRequest) -> Self { - let reasoning_effort = base.reasoning.as_ref().and_then(|r| r.to_openai_effort()); + // Handle Azure reasoning effort logic inline (same as OpenAI) + let reasoning_effort = base.reasoning.as_ref().and_then(|reasoning| { + if reasoning.max_tokens.is_some() { + // If max_tokens is specified, don't use effort for Azure + None + } else { + // Only return effort if it's not empty + reasoning + .effort + .as_ref() + .filter(|e| !e.trim().is_empty()) + .cloned() + } + }); // Remove reasoning field from base request since Azure uses reasoning_effort base.reasoning = None; @@ -81,28 +94,9 @@ impl Provider for AzureProvider { ) -> Result { // Validate reasoning config if present if let Some(reasoning) = &payload.reasoning { - if let Err(e) = reasoning.validate() { - tracing::error!("Invalid reasoning config: {}", e); + if let Err(_e) = reasoning.validate() { return Err(StatusCode::BAD_REQUEST); } - - if let Some(max_tokens) = reasoning.max_tokens { - info!( - "✅ Azure reasoning with max_tokens: {} (note: Azure uses effort levels, max_tokens ignored)", - max_tokens - ); - } else if let Some(effort) = reasoning.to_openai_effort() { - info!( - "✅ Azure reasoning enabled with effort level: \"{}\"", - effort - ); - } else { - tracing::debug!( - "ℹ️ Azure reasoning config present but no valid parameters (effort: {:?}, max_tokens: {:?})", - reasoning.effort, - reasoning.max_tokens - ); - } } let deployment = model_config.params.get("deployment").unwrap(); diff --git a/src/providers/bedrock/test.rs b/src/providers/bedrock/test.rs index 38966f4..4081f9a 100644 --- a/src/providers/bedrock/test.rs +++ b/src/providers/bedrock/test.rs @@ -708,42 +708,56 @@ mod arn_tests { #[test] fn test_reasoning_config_to_thinking_prompt() { - // Test effort-based prompts - let high_effort_config = crate::models::chat::ReasoningConfig { - effort: Some("high".to_string()), - max_tokens: None, - exclude: None, - }; - assert!(high_effort_config.to_thinking_prompt().is_some()); - - let medium_effort_config = crate::models::chat::ReasoningConfig { - effort: Some("medium".to_string()), - max_tokens: None, - exclude: None, - }; - assert!(medium_effort_config.to_thinking_prompt().is_some()); + use crate::models::chat::ChatCompletionRequest; + use crate::models::content::ChatCompletionMessage; + use crate::providers::anthropic::AnthropicChatCompletionRequest; - let low_effort_config = crate::models::chat::ReasoningConfig { - effort: Some("low".to_string()), + // Test that reasoning config no longer adds prompts to system message + let high_effort_request = ChatCompletionRequest { + model: "test".to_string(), + messages: vec![ChatCompletionMessage { + role: "user".to_string(), + content: Some(crate::models::content::ChatMessageContent::String( + "test".to_string(), + )), + name: None, + tool_calls: None, + refusal: None, + }], + reasoning: Some(crate::models::chat::ReasoningConfig { + effort: Some("high".to_string()), + max_tokens: None, + exclude: None, + }), + temperature: None, + top_p: None, + n: None, + stream: None, + stop: None, max_tokens: None, - exclude: None, + max_completion_tokens: None, + parallel_tool_calls: None, + presence_penalty: None, + frequency_penalty: None, + logit_bias: None, + tool_choice: None, + tools: None, + user: None, + logprobs: None, + top_logprobs: None, + response_format: None, }; - assert!(low_effort_config.to_thinking_prompt().is_some()); - // Test max_tokens takes priority over effort - let max_tokens_config = crate::models::chat::ReasoningConfig { - effort: Some("high".to_string()), - max_tokens: Some(1000), - exclude: None, - }; - assert!(max_tokens_config.to_thinking_prompt().is_some()); + let anthropic_request = AnthropicChatCompletionRequest::from(high_effort_request); + // System should be None since no system message was provided and reasoning logic removed + assert!(anthropic_request.system.is_none()); } #[tokio::test] async fn test_anthropic_reasoning_prompt_transformation() { use crate::providers::anthropic::AnthropicChatCompletionRequest; - // Test that reasoning config transforms into system prompt for Anthropic + // Test that reasoning config no longer transforms into system prompt for Anthropic let payload = ChatCompletionRequest { model: "anthropic.claude-3-5-sonnet-20241022-v2:0".to_string(), messages: vec![ChatCompletionMessage { @@ -780,16 +794,10 @@ mod arn_tests { // Transform the request to Anthropic format let anthropic_request = AnthropicChatCompletionRequest::from(payload); - // Verify reasoning prompt is included in system message - assert!( - anthropic_request.system.is_some(), - "System message should be present for reasoning" - ); - let system_message = anthropic_request.system.unwrap(); + // Verify reasoning prompt is no longer included in system message assert!( - system_message.contains("Think through this step-by-step with detailed reasoning"), - "System message should contain reasoning prompt: {}", - system_message + anthropic_request.system.is_none(), + "System message should not be present since reasoning logic was removed" ); } @@ -844,20 +852,15 @@ mod arn_tests { let anthropic_request = AnthropicChatCompletionRequest::from(payload); - // Verify both original system message and reasoning prompt are present + // Verify original system message is preserved (reasoning logic removed) assert!( anthropic_request.system.is_some(), "System message should be present" ); let system_message = anthropic_request.system.unwrap(); - assert!( - system_message.contains("You are a helpful assistant"), - "Should preserve original system message: {}", - system_message - ); - assert!( - system_message.contains("Consider this problem thoughtfully"), - "Should append reasoning prompt: {}", + assert_eq!( + system_message, "You are a helpful assistant.", + "Should only contain original system message: {}", system_message ); } diff --git a/src/providers/openai/provider.rs b/src/providers/openai/provider.rs index 52f611f..d8733eb 100644 --- a/src/providers/openai/provider.rs +++ b/src/providers/openai/provider.rs @@ -23,16 +23,25 @@ struct OpenAIChatCompletionRequest { impl From for OpenAIChatCompletionRequest { fn from(mut base: ChatCompletionRequest) -> Self { - let reasoning_effort = base.reasoning.as_ref().and_then(|r| r.to_openai_effort()); - - // Handle max_completion_tokens logic - use max_completion_tokens if provided and > 0, - // otherwise fall back to max_tokens - base.max_completion_tokens = match (base.max_completion_tokens, base.max_tokens) { - (Some(v), _) if v > 0 => Some(v), - (_, Some(v)) if v > 0 => Some(v), - _ => None, - }; + // Handle OpenAI reasoning effort logic inline + let reasoning_effort = base.reasoning.as_ref().and_then(|reasoning| { + if reasoning.max_tokens.is_some() { + // If max_tokens is specified, don't use effort for OpenAI + None + } else { + // Only return effort if it's not empty + reasoning + .effort + .as_ref() + .filter(|e| !e.trim().is_empty()) + .cloned() + } + }); + // Convert max_tokens to max_completion_tokens if present + if base.max_tokens.is_some() && base.max_completion_tokens.is_none() { + base.max_completion_tokens = base.max_tokens; + } base.max_tokens = None; // Remove reasoning field from base request since OpenAI uses reasoning_effort @@ -84,8 +93,7 @@ impl Provider for OpenAIProvider { ) -> Result { // Validate reasoning config if present if let Some(reasoning) = &payload.reasoning { - if let Err(e) = reasoning.validate() { - tracing::error!("Invalid reasoning config: {}", e); + if let Err(_e) = reasoning.validate() { return Err(StatusCode::BAD_REQUEST); } } diff --git a/src/providers/vertexai/models.rs b/src/providers/vertexai/models.rs index 303e7af..2fa7256 100644 --- a/src/providers/vertexai/models.rs +++ b/src/providers/vertexai/models.rs @@ -333,10 +333,6 @@ impl GeminiSchema { impl From for GeminiChatRequest { fn from(req: ChatCompletionRequest) -> Self { - tracing::debug!( - "🔄 Converting ChatCompletionRequest to GeminiChatRequest, reasoning: {:?}", - req.reasoning - ); let system_instruction = req .messages .iter() @@ -415,18 +411,13 @@ impl From for GeminiChatRequest { .reasoning .as_ref() .and_then(|r| { - tracing::debug!("📝 Processing reasoning config for thinkingConfig: {:?}", r); - r.to_gemini_thinking_budget() + // Handle Gemini thinking budget logic inline + r.max_tokens.map(|tokens| tokens as i32) }) - .map(|budget| { - tracing::debug!("🎛️ Creating ThinkingConfig with budget: {} tokens", budget); - ThinkingConfig { - thinking_budget: Some(budget), - } + .map(|budget| ThinkingConfig { + thinking_budget: Some(budget), }); - tracing::debug!("🔧 Final thinking_config: {:?}", thinking_config); - let generation_config = Some(GenerationConfig { temperature: req.temperature, top_p: req.top_p, @@ -461,20 +452,14 @@ impl From for GeminiChatRequest { _ => GeminiToolChoice::None, }); - let result = Self { + Self { contents, generation_config, safety_settings: None, tools, tool_choice, system_instruction, - }; - - tracing::debug!( - "📦 Created GeminiChatRequest with generation_config: {:?}", - result.generation_config - ); - result + } } } diff --git a/src/providers/vertexai/provider.rs b/src/providers/vertexai/provider.rs index d0ba08a..1b19b3d 100644 --- a/src/providers/vertexai/provider.rs +++ b/src/providers/vertexai/provider.rs @@ -145,27 +145,9 @@ impl Provider for VertexAIProvider { // Validate reasoning config if present if let Some(reasoning) = &payload.reasoning { - tracing::debug!("🧠 VertexAI processing reasoning config: {:?}", reasoning); - - if let Err(e) = reasoning.validate() { - tracing::error!("❌ VertexAI reasoning validation failed: {}", e); + if let Err(_e) = reasoning.validate() { return Err(StatusCode::BAD_REQUEST); } - - if let Some(thinking_budget) = reasoning.to_gemini_thinking_budget() { - tracing::info!( - "✅ VertexAI reasoning enabled with thinking_budget: {} tokens", - thinking_budget - ); - } else { - tracing::debug!( - "ℹ️ VertexAI reasoning config present but no valid parameters (effort: {:?}, max_tokens: {:?})", - reasoning.effort, - reasoning.max_tokens - ); - } - } else { - tracing::debug!("ℹ️ VertexAI no reasoning config provided"); } let auth_token = self.get_auth_token().await?;