Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 0 additions & 39 deletions src/models/chat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,45 +43,6 @@ impl ReasoningConfig {

Ok(())
}

// For OpenAI/Azure - Direct passthrough (but prioritize max_tokens over effort)
pub fn to_openai_effort(&self) -> Option<String> {
if self.max_tokens.is_some() {
// If max_tokens is specified, don't use effort for OpenAI
None
} else {
// Only return effort if it's not empty
self.effort
.as_ref()
.filter(|e| !e.trim().is_empty())
.cloned()
}
}

// For Vertex AI (Gemini) - Use max_tokens directly
pub fn to_gemini_thinking_budget(&self) -> Option<i32> {
self.max_tokens.map(|tokens| tokens as i32)
}

// For Anthropic/Bedrock - Custom prompt generation (prioritize max_tokens over effort)
pub fn to_thinking_prompt(&self) -> Option<String> {
if self.max_tokens.is_some() {
// If max_tokens is specified, use a generic thinking prompt
Some("Think through this step-by-step with detailed reasoning.".to_string())
} else {
match self.effort.as_deref() {
Some(effort) if !effort.trim().is_empty() => match effort {
"high" => {
Some("Think through this step-by-step with detailed reasoning.".to_string())
}
"medium" => Some("Consider this problem thoughtfully.".to_string()),
"low" => Some("Think about this briefly.".to_string()),
_ => None,
},
_ => None,
}
}
}
}

#[derive(Deserialize, Serialize, Clone, ToSchema)]
Expand Down
12 changes: 1 addition & 11 deletions src/providers/anthropic/models.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ impl From<ChatCompletionRequest> for AnthropicChatCompletionRequest {
))
);

let mut system = request
let system = request
.messages
.iter()
.find(|msg| msg.role == "system")
Expand All @@ -103,16 +103,6 @@ impl From<ChatCompletionRequest> for AnthropicChatCompletionRequest {
_ => None,
});

// Add reasoning prompt if reasoning is requested
if let Some(reasoning_config) = &request.reasoning {
if let Some(thinking_prompt) = reasoning_config.to_thinking_prompt() {
system = Some(match system {
Some(existing) => format!("{}\n\n{}", existing, thinking_prompt),
None => thinking_prompt,
});
}
}

let messages: Vec<ChatCompletionMessage> = request
.messages
.into_iter()
Expand Down
23 changes: 1 addition & 22 deletions src/providers/anthropic/provider.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use async_trait::async_trait;
use axum::http::StatusCode;
use reqwest::Client;
use tracing::info;

use super::models::{AnthropicChatCompletionRequest, AnthropicChatCompletionResponse};
use crate::config::models::{ModelConfig, Provider as ProviderConfig};
Expand Down Expand Up @@ -42,29 +41,9 @@ impl Provider for AnthropicProvider {
) -> Result<ChatCompletionResponse, StatusCode> {
// Validate reasoning config if present
if let Some(reasoning) = &payload.reasoning {
if let Err(e) = reasoning.validate() {
tracing::error!("Invalid reasoning config: {}", e);
if let Err(_e) = reasoning.validate() {
return Err(StatusCode::BAD_REQUEST);
}

if let Some(max_tokens) = reasoning.max_tokens {
info!(
"✅ Anthropic reasoning enabled with max_tokens: {}",
max_tokens
);
} else if let Some(thinking_prompt) = reasoning.to_thinking_prompt() {
info!(
"✅ Anthropic reasoning enabled with effort level: {:?} -> prompt: \"{}\"",
reasoning.effort,
thinking_prompt.chars().take(50).collect::<String>() + "..."
);
} else {
tracing::debug!(
"ℹ️ Anthropic reasoning config present but no valid parameters (effort: {:?}, max_tokens: {:?})",
reasoning.effort,
reasoning.max_tokens
);
}
}

let request = AnthropicChatCompletionRequest::from(payload);
Expand Down
36 changes: 15 additions & 21 deletions src/providers/azure/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,20 @@ struct AzureChatCompletionRequest {

impl From<ChatCompletionRequest> for AzureChatCompletionRequest {
fn from(mut base: ChatCompletionRequest) -> Self {
let reasoning_effort = base.reasoning.as_ref().and_then(|r| r.to_openai_effort());
// Handle Azure reasoning effort logic inline (same as OpenAI)
let reasoning_effort = base.reasoning.as_ref().and_then(|reasoning| {
if reasoning.max_tokens.is_some() {
// If max_tokens is specified, don't use effort for Azure
None
} else {
// Only return effort if it's not empty
reasoning
.effort
.as_ref()
.filter(|e| !e.trim().is_empty())
.cloned()
}
});

// Remove reasoning field from base request since Azure uses reasoning_effort
base.reasoning = None;
Expand Down Expand Up @@ -81,28 +94,9 @@ impl Provider for AzureProvider {
) -> Result<ChatCompletionResponse, StatusCode> {
// Validate reasoning config if present
if let Some(reasoning) = &payload.reasoning {
if let Err(e) = reasoning.validate() {
tracing::error!("Invalid reasoning config: {}", e);
if let Err(_e) = reasoning.validate() {
return Err(StatusCode::BAD_REQUEST);
}

if let Some(max_tokens) = reasoning.max_tokens {
info!(
"✅ Azure reasoning with max_tokens: {} (note: Azure uses effort levels, max_tokens ignored)",
max_tokens
);
} else if let Some(effort) = reasoning.to_openai_effort() {
info!(
"✅ Azure reasoning enabled with effort level: \"{}\"",
effort
);
} else {
tracing::debug!(
"ℹ️ Azure reasoning config present but no valid parameters (effort: {:?}, max_tokens: {:?})",
reasoning.effort,
reasoning.max_tokens
);
}
}

let deployment = model_config.params.get("deployment").unwrap();
Expand Down
91 changes: 47 additions & 44 deletions src/providers/bedrock/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -708,42 +708,56 @@ mod arn_tests {

#[test]
fn test_reasoning_config_to_thinking_prompt() {
// Test effort-based prompts
let high_effort_config = crate::models::chat::ReasoningConfig {
effort: Some("high".to_string()),
max_tokens: None,
exclude: None,
};
assert!(high_effort_config.to_thinking_prompt().is_some());

let medium_effort_config = crate::models::chat::ReasoningConfig {
effort: Some("medium".to_string()),
max_tokens: None,
exclude: None,
};
assert!(medium_effort_config.to_thinking_prompt().is_some());
use crate::models::chat::ChatCompletionRequest;
use crate::models::content::ChatCompletionMessage;
use crate::providers::anthropic::AnthropicChatCompletionRequest;

let low_effort_config = crate::models::chat::ReasoningConfig {
effort: Some("low".to_string()),
// Test that reasoning config no longer adds prompts to system message
let high_effort_request = ChatCompletionRequest {
model: "test".to_string(),
messages: vec![ChatCompletionMessage {
role: "user".to_string(),
content: Some(crate::models::content::ChatMessageContent::String(
"test".to_string(),
)),
name: None,
tool_calls: None,
refusal: None,
}],
reasoning: Some(crate::models::chat::ReasoningConfig {
effort: Some("high".to_string()),
max_tokens: None,
exclude: None,
}),
temperature: None,
top_p: None,
n: None,
stream: None,
stop: None,
max_tokens: None,
exclude: None,
max_completion_tokens: None,
parallel_tool_calls: None,
presence_penalty: None,
frequency_penalty: None,
logit_bias: None,
tool_choice: None,
tools: None,
user: None,
logprobs: None,
top_logprobs: None,
response_format: None,
};
assert!(low_effort_config.to_thinking_prompt().is_some());

// Test max_tokens takes priority over effort
let max_tokens_config = crate::models::chat::ReasoningConfig {
effort: Some("high".to_string()),
max_tokens: Some(1000),
exclude: None,
};
assert!(max_tokens_config.to_thinking_prompt().is_some());
let anthropic_request = AnthropicChatCompletionRequest::from(high_effort_request);
// System should be None since no system message was provided and reasoning logic removed
assert!(anthropic_request.system.is_none());
}

#[tokio::test]
async fn test_anthropic_reasoning_prompt_transformation() {
use crate::providers::anthropic::AnthropicChatCompletionRequest;

// Test that reasoning config transforms into system prompt for Anthropic
// Test that reasoning config no longer transforms into system prompt for Anthropic
let payload = ChatCompletionRequest {
model: "anthropic.claude-3-5-sonnet-20241022-v2:0".to_string(),
messages: vec![ChatCompletionMessage {
Expand Down Expand Up @@ -780,16 +794,10 @@ mod arn_tests {
// Transform the request to Anthropic format
let anthropic_request = AnthropicChatCompletionRequest::from(payload);

// Verify reasoning prompt is included in system message
assert!(
anthropic_request.system.is_some(),
"System message should be present for reasoning"
);
let system_message = anthropic_request.system.unwrap();
// Verify reasoning prompt is no longer included in system message
assert!(
system_message.contains("Think through this step-by-step with detailed reasoning"),
"System message should contain reasoning prompt: {}",
system_message
anthropic_request.system.is_none(),
"System message should not be present since reasoning logic was removed"
);
}

Expand Down Expand Up @@ -844,20 +852,15 @@ mod arn_tests {

let anthropic_request = AnthropicChatCompletionRequest::from(payload);

// Verify both original system message and reasoning prompt are present
// Verify original system message is preserved (reasoning logic removed)
assert!(
anthropic_request.system.is_some(),
"System message should be present"
);
let system_message = anthropic_request.system.unwrap();
assert!(
system_message.contains("You are a helpful assistant"),
"Should preserve original system message: {}",
system_message
);
assert!(
system_message.contains("Consider this problem thoughtfully"),
"Should append reasoning prompt: {}",
assert_eq!(
system_message, "You are a helpful assistant.",
"Should only contain original system message: {}",
system_message
);
}
Expand Down
30 changes: 19 additions & 11 deletions src/providers/openai/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,25 @@ struct OpenAIChatCompletionRequest {

impl From<ChatCompletionRequest> for OpenAIChatCompletionRequest {
fn from(mut base: ChatCompletionRequest) -> Self {
let reasoning_effort = base.reasoning.as_ref().and_then(|r| r.to_openai_effort());

// Handle max_completion_tokens logic - use max_completion_tokens if provided and > 0,
// otherwise fall back to max_tokens
base.max_completion_tokens = match (base.max_completion_tokens, base.max_tokens) {
(Some(v), _) if v > 0 => Some(v),
(_, Some(v)) if v > 0 => Some(v),
_ => None,
};
// Handle OpenAI reasoning effort logic inline
let reasoning_effort = base.reasoning.as_ref().and_then(|reasoning| {
if reasoning.max_tokens.is_some() {
// If max_tokens is specified, don't use effort for OpenAI
None
} else {
// Only return effort if it's not empty
reasoning
.effort
.as_ref()
.filter(|e| !e.trim().is_empty())
.cloned()
}
});

// Convert max_tokens to max_completion_tokens if present
if base.max_tokens.is_some() && base.max_completion_tokens.is_none() {
base.max_completion_tokens = base.max_tokens;
}
base.max_tokens = None;

// Remove reasoning field from base request since OpenAI uses reasoning_effort
Expand Down Expand Up @@ -84,8 +93,7 @@ impl Provider for OpenAIProvider {
) -> Result<ChatCompletionResponse, StatusCode> {
// Validate reasoning config if present
if let Some(reasoning) = &payload.reasoning {
if let Err(e) = reasoning.validate() {
tracing::error!("Invalid reasoning config: {}", e);
if let Err(_e) = reasoning.validate() {
return Err(StatusCode::BAD_REQUEST);
}
}
Expand Down
27 changes: 6 additions & 21 deletions src/providers/vertexai/models.rs
Original file line number Diff line number Diff line change
Expand Up @@ -333,10 +333,6 @@ impl GeminiSchema {

impl From<ChatCompletionRequest> for GeminiChatRequest {
fn from(req: ChatCompletionRequest) -> Self {
tracing::debug!(
"🔄 Converting ChatCompletionRequest to GeminiChatRequest, reasoning: {:?}",
req.reasoning
);
let system_instruction = req
.messages
.iter()
Expand Down Expand Up @@ -415,18 +411,13 @@ impl From<ChatCompletionRequest> for GeminiChatRequest {
.reasoning
.as_ref()
.and_then(|r| {
tracing::debug!("📝 Processing reasoning config for thinkingConfig: {:?}", r);
r.to_gemini_thinking_budget()
// Handle Gemini thinking budget logic inline
r.max_tokens.map(|tokens| tokens as i32)
})
.map(|budget| {
tracing::debug!("🎛️ Creating ThinkingConfig with budget: {} tokens", budget);
ThinkingConfig {
thinking_budget: Some(budget),
}
.map(|budget| ThinkingConfig {
thinking_budget: Some(budget),
});

tracing::debug!("🔧 Final thinking_config: {:?}", thinking_config);

let generation_config = Some(GenerationConfig {
temperature: req.temperature,
top_p: req.top_p,
Expand Down Expand Up @@ -461,20 +452,14 @@ impl From<ChatCompletionRequest> for GeminiChatRequest {
_ => GeminiToolChoice::None,
});

let result = Self {
Self {
contents,
generation_config,
safety_settings: None,
tools,
tool_choice,
system_instruction,
};

tracing::debug!(
"📦 Created GeminiChatRequest with generation_config: {:?}",
result.generation_config
);
result
}
}
}

Expand Down
Loading