diff --git a/crates/chat-cli/src/cli/chat/cli/hooks.rs b/crates/chat-cli/src/cli/chat/cli/hooks.rs index 28207d0ac6..507f749760 100644 --- a/crates/chat-cli/src/cli/chat/cli/hooks.rs +++ b/crates/chat-cli/src/cli/chat/cli/hooks.rs @@ -378,8 +378,7 @@ impl HookExecutor { /// Sanitizes a string value to be used as an environment variable fn sanitize_user_prompt(input: &str) -> String { - // Limit the size of input to first 4096 characters - let truncated = if input.len() > 4096 { &input[0..4096] } else { input }; + let truncated = truncate_safe(input, 4096); // Remove any potentially problematic characters truncated.replace(|c: char| c.is_control() && c != '\n' && c != '\r' && c != '\t', "") @@ -750,4 +749,31 @@ mod tests { assert_eq!(*exit_code, 0); assert!(hook_output.contains("Turn completed successfully")); } + + #[test] + fn test_sanitize_user_prompt_cjk_characters() { + // Test with CJK characters that would cause panic with naive byte slicing + let korean_text = "한".repeat(2000); // Each Korean character is 3 bytes in UTF-8 + let result = sanitize_user_prompt(&korean_text); + + // Should not panic and should be truncated safely + assert!(result.len() <= 4096); + assert!(!result.is_empty()); + + // Test with mixed ASCII and CJK at boundary + let mixed_text = "a".repeat(4094) + "한국어"; // 4094 + 9 bytes = 4103 bytes + let result = sanitize_user_prompt(&mixed_text); + assert!(result.len() <= 4096); + assert!(result.ends_with("a")); // Should end with ASCII, not partial CJK + + // Test with text shorter than limit + let short_text = "안녕하세요"; + let result = sanitize_user_prompt(short_text); + assert_eq!(result, short_text); + + // Test with control characters + let text_with_controls = "Hello\x00World\nTest\r\tEnd"; + let result = sanitize_user_prompt(text_with_controls); + assert_eq!(result, "HelloWorld\nTest\r\tEnd"); + } }