From 97459ca433377b019ad5fd56a478e546b0648419 Mon Sep 17 00:00:00 2001 From: Heejae Kim Date: Fri, 3 Oct 2025 13:25:18 +0900 Subject: [PATCH 1/4] fix: ensure `sanitize_user_prompt` handles UTF-8 boundaries correctly and add related tests --- crates/chat-cli/src/cli/chat/cli/hooks.rs | 39 +++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/crates/chat-cli/src/cli/chat/cli/hooks.rs b/crates/chat-cli/src/cli/chat/cli/hooks.rs index 28207d0ac6..0db87fd732 100644 --- a/crates/chat-cli/src/cli/chat/cli/hooks.rs +++ b/crates/chat-cli/src/cli/chat/cli/hooks.rs @@ -378,8 +378,16 @@ impl HookExecutor { /// Sanitizes a string value to be used as an environment variable fn sanitize_user_prompt(input: &str) -> String { - // Limit the size of input to first 4096 characters - let truncated = if input.len() > 4096 { &input[0..4096] } else { input }; + // Limit the size of input to first 4096 bytes, respecting character boundaries + let truncated = if input.len() > 4096 { + let mut end = 4096; + while end > 0 && !input.is_char_boundary(end) { + end -= 1; + } + &input[0..end] + } else { + input + }; // Remove any potentially problematic characters truncated.replace(|c: char| c.is_control() && c != '\n' && c != '\r' && c != '\t', "") @@ -750,4 +758,31 @@ mod tests { assert_eq!(*exit_code, 0); assert!(hook_output.contains("Turn completed successfully")); } + + #[test] + fn test_sanitize_user_prompt_cjk_characters() { + // Test with CJK characters that would cause panic with naive byte slicing + let korean_text = "한".repeat(2000); // Each Korean character is 3 bytes in UTF-8 + let result = sanitize_user_prompt(&korean_text); + + // Should not panic and should be truncated safely + assert!(result.len() <= 4096); + assert!(!result.is_empty()); + + // Test with mixed ASCII and CJK at boundary + let mixed_text = "a".repeat(4094) + "한국어"; // 4094 + 9 bytes = 4103 bytes + let result = sanitize_user_prompt(&mixed_text); + assert!(result.len() <= 4096); + assert!(result.ends_with("a")); // Should end with ASCII, not partial CJK + + // Test with text shorter than limit + let short_text = "안녕하세요"; + let result = sanitize_user_prompt(short_text); + assert_eq!(result, short_text); + + // Test with control characters + let text_with_controls = "Hello\x00World\nTest\r\tEnd"; + let result = sanitize_user_prompt(text_with_controls); + assert_eq!(result, "HelloWorld\nTest\r\tEnd"); + } } From db4362cdce094dd60a27913e4d06bea687ed5d1d Mon Sep 17 00:00:00 2001 From: Heejae Kim Date: Fri, 3 Oct 2025 13:46:42 +0900 Subject: [PATCH 2/4] refactor: change the code into a more concise form. --- crates/chat-cli/src/cli/chat/cli/hooks.rs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/crates/chat-cli/src/cli/chat/cli/hooks.rs b/crates/chat-cli/src/cli/chat/cli/hooks.rs index 0db87fd732..8a2258db23 100644 --- a/crates/chat-cli/src/cli/chat/cli/hooks.rs +++ b/crates/chat-cli/src/cli/chat/cli/hooks.rs @@ -379,12 +379,11 @@ impl HookExecutor { /// Sanitizes a string value to be used as an environment variable fn sanitize_user_prompt(input: &str) -> String { // Limit the size of input to first 4096 bytes, respecting character boundaries - let truncated = if input.len() > 4096 { - let mut end = 4096; - while end > 0 && !input.is_char_boundary(end) { - end -= 1; - } - &input[0..end] + const MAX_LEN: usize = 4096; + + let truncated = if input.len() > MAX_LEN { + let end = (0..=MAX_LEN).rev().find(|&i| input.is_char_boundary(i)).unwrap_or(0); + &input[..end] } else { input }; From 92e71ea584c9d37471d18ac6f4d6990c71adc943 Mon Sep 17 00:00:00 2001 From: Heejae Kim Date: Sat, 4 Oct 2025 07:15:05 +0900 Subject: [PATCH 3/4] refactor: simplify string truncation logic by using `truncate_safe` function --- crates/chat-cli/src/cli/chat/cli/hooks.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/crates/chat-cli/src/cli/chat/cli/hooks.rs b/crates/chat-cli/src/cli/chat/cli/hooks.rs index 8a2258db23..54c5d8560e 100644 --- a/crates/chat-cli/src/cli/chat/cli/hooks.rs +++ b/crates/chat-cli/src/cli/chat/cli/hooks.rs @@ -382,8 +382,7 @@ fn sanitize_user_prompt(input: &str) -> String { const MAX_LEN: usize = 4096; let truncated = if input.len() > MAX_LEN { - let end = (0..=MAX_LEN).rev().find(|&i| input.is_char_boundary(i)).unwrap_or(0); - &input[..end] + truncate_safe(input, MAX_LEN) } else { input }; From 4cea4898e62d6edfb36b1dfb2a9ccb2481ce667f Mon Sep 17 00:00:00 2001 From: Heejae Kim Date: Sat, 4 Oct 2025 07:27:02 +0900 Subject: [PATCH 4/4] refactor: Simplify `sanitize_user_prompt` by removing redundant length check. --- crates/chat-cli/src/cli/chat/cli/hooks.rs | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/crates/chat-cli/src/cli/chat/cli/hooks.rs b/crates/chat-cli/src/cli/chat/cli/hooks.rs index 54c5d8560e..507f749760 100644 --- a/crates/chat-cli/src/cli/chat/cli/hooks.rs +++ b/crates/chat-cli/src/cli/chat/cli/hooks.rs @@ -378,14 +378,7 @@ impl HookExecutor { /// Sanitizes a string value to be used as an environment variable fn sanitize_user_prompt(input: &str) -> String { - // Limit the size of input to first 4096 bytes, respecting character boundaries - const MAX_LEN: usize = 4096; - - let truncated = if input.len() > MAX_LEN { - truncate_safe(input, MAX_LEN) - } else { - input - }; + let truncated = truncate_safe(input, 4096); // Remove any potentially problematic characters truncated.replace(|c: char| c.is_control() && c != '\n' && c != '\r' && c != '\t', "")