11use std:: sync:: Arc ;
22
3- use minimad:: { Line , parse_text} ;
43use syntect:: easy:: HighlightLines ;
54use syntect:: highlighting:: ThemeSet ;
65use syntect:: parsing:: SyntaxSet ;
@@ -55,50 +54,58 @@ pub struct CodeBlockParser {
5554
5655impl CodeBlockParser {
5756 /// Extract code blocks from markdown content.
58- pub fn parse ( content : & str ) -> Self {
59- let text = parse_text ( content, minimad:: Options :: default ( ) . keep_code_fences ( true ) ) ;
57+ /// Supports both standard and indented code blocks (up to 3 spaces of
58+ /// indentation).
59+ pub fn new ( content : & str ) -> Self {
6060 let original_lines: Vec < & str > = content. lines ( ) . collect ( ) ;
6161 let mut blocks = Vec :: new ( ) ;
62-
6362 let mut result = String :: new ( ) ;
64- let mut orig_idx = 0 ;
65- let mut code_lines: Vec < & str > = Vec :: new ( ) ;
66- let mut lang = "" ;
6763 let mut in_code = false ;
64+ let mut code_lines: Vec < & str > = Vec :: new ( ) ;
65+ let mut lang = String :: new ( ) ;
6866
69- for line in & text. lines {
70- match line {
71- Line :: CodeFence ( c) if !in_code => {
72- lang = c. compounds . first ( ) . map ( |c| c. src ) . unwrap_or ( "" ) ;
67+ for line in & original_lines {
68+ // Check if line is a code fence (with or without indentation)
69+ if let Some ( fence_lang) = Self :: detect_code_fence ( line) {
70+ if !in_code {
71+ // Opening fence
72+ lang = fence_lang;
7373 in_code = true ;
74- orig_idx += 1 ;
75- }
76- Line :: CodeFence ( _) => {
74+ } else {
75+ // Closing fence
7776 result. push_str ( & format ! ( "\x00 {}\x00 \n " , blocks. len( ) ) ) ;
78- blocks. push ( CodeBlock { code : code_lines. join ( "\n " ) , lang : lang. to_string ( ) } ) ;
77+ blocks. push ( CodeBlock { code : code_lines. join ( "\n " ) , lang : lang. clone ( ) } ) ;
7978 code_lines. clear ( ) ;
8079 in_code = false ;
81- orig_idx += 1 ;
82- }
83- _ if in_code => {
84- if orig_idx < original_lines. len ( ) {
85- code_lines. push ( original_lines[ orig_idx] ) ;
86- }
87- orig_idx += 1 ;
88- }
89- _ => {
90- if orig_idx < original_lines. len ( ) {
91- result. push_str ( original_lines[ orig_idx] ) ;
92- result. push ( '\n' ) ;
93- }
94- orig_idx += 1 ;
9580 }
81+ } else if in_code {
82+ // Inside code block - collect lines
83+ code_lines. push ( line) ;
84+ } else {
85+ // Regular markdown line
86+ result. push_str ( line) ;
87+ result. push ( '\n' ) ;
9688 }
9789 }
9890
9991 Self { markdown : result, blocks }
10092 }
10193
94+ /// Detect if a line is a code fence marker (```).
95+ /// Returns Some(language) if it's an opening fence with a language tag,
96+ /// Some("") if it's a fence without a language tag (opening or closing),
97+ /// None if it's not a code fence.
98+ fn detect_code_fence ( line : & str ) -> Option < String > {
99+ let trimmed = line. trim_start ( ) ;
100+ if let Some ( stripped) = trimmed. strip_prefix ( "```" ) {
101+ // Extract language tag (everything after ``` until whitespace or end)
102+ let lang = stripped. split_whitespace ( ) . next ( ) . unwrap_or ( "" ) ;
103+ Some ( lang. to_string ( ) )
104+ } else {
105+ None
106+ }
107+ }
108+
102109 /// Get the processed markdown with placeholders.
103110 pub fn markdown ( & self ) -> & str {
104111 & self . markdown
@@ -122,64 +129,138 @@ impl CodeBlockParser {
122129
123130#[ cfg( test) ]
124131mod tests {
132+ use pretty_assertions:: assert_eq;
133+
125134 use super :: * ;
126135
127136 fn strip_ansi ( s : & str ) -> String {
128137 strip_ansi_escapes:: strip_str ( s) . to_string ( )
129138 }
130139
140+ fn fixture_parser ( name : & str ) -> CodeBlockParser {
141+ let content = match name {
142+ "code-01" => include_str ! ( "fixtures/code-01.md" ) ,
143+ "code-02" => include_str ! ( "fixtures/code-02.md" ) ,
144+ _ => panic ! ( "Unknown fixture: {}" , name) ,
145+ } ;
146+ CodeBlockParser :: new ( content)
147+ }
148+
131149 #[ test]
132150 fn test_no_code_blocks ( ) {
133- let r = CodeBlockParser :: parse ( "Hello world" ) ;
134- assert ! ( r. markdown( ) . contains( "Hello world" ) ) ;
135- assert ! ( r. blocks( ) . is_empty( ) ) ;
151+ let fixture = "Hello world\n This is plain text." ;
152+ let parser = CodeBlockParser :: new ( fixture) ;
153+
154+ let actual = parser. blocks ( ) . len ( ) ;
155+ let expected = 0 ;
156+
157+ assert_eq ! ( actual, expected) ;
136158 }
137159
138160 #[ test]
139161 fn test_single_code_block ( ) {
140- let r = CodeBlockParser :: parse ( "```rust\n fn main() {}\n ```" ) ;
141- assert ! ( r. markdown( ) . contains( "\x00 0\x00 " ) ) ;
142- assert_eq ! ( r. blocks( ) . len( ) , 1 ) ;
143- assert_eq ! ( r. blocks( ) [ 0 ] . code, "fn main() {}" ) ;
144- assert_eq ! ( r. blocks( ) [ 0 ] . lang, "rust" ) ;
162+ let fixture = "```rust\n fn main() {}\n ```" ;
163+ let parser = CodeBlockParser :: new ( fixture) ;
164+
165+ let actual = parser. blocks ( ) . len ( ) ;
166+ let expected = 1 ;
167+
168+ assert_eq ! ( actual, expected) ;
169+ assert_eq ! ( parser. blocks( ) [ 0 ] . lang, "rust" ) ;
170+ assert_eq ! ( parser. blocks( ) [ 0 ] . code, "fn main() {}" ) ;
145171 }
146172
147173 #[ test]
148- fn test_preserves_indentation ( ) {
149- let r = CodeBlockParser :: parse ( "```rust\n let x = 1;\n ```" ) ;
150- assert_eq ! ( r. blocks( ) [ 0 ] . code, " let x = 1;" ) ;
174+ fn test_preserves_indentation_inside_code_block ( ) {
175+ let fixture = "```rust\n let x = 1;\n ```" ;
176+ let parser = CodeBlockParser :: new ( fixture) ;
177+
178+ let actual = & parser. blocks ( ) [ 0 ] . code ;
179+ let expected = " let x = 1;" ;
180+
181+ assert_eq ! ( actual, expected) ;
151182 }
152183
153184 #[ test]
154- fn test_restore ( ) {
155- let highlighter = SyntaxHighlighter :: default ( ) ;
156- let r = CodeBlockParser :: parse ( "```rust\n code\n ```" ) ;
157- let result = r. restore ( & highlighter, "X\n \x00 0\x00 \n Y" . into ( ) ) ;
158- assert ! ( strip_ansi( & result) . contains( "code" ) ) ;
185+ fn test_detects_indented_code_fence ( ) {
186+ let fixture = "1. Item\n \n ```rust\n code\n ```" ;
187+ let parser = CodeBlockParser :: new ( fixture) ;
188+
189+ let actual = parser. blocks ( ) . len ( ) ;
190+ let expected = 1 ;
191+
192+ assert_eq ! ( actual, expected) ;
193+ assert_eq ! ( parser. blocks( ) [ 0 ] . lang, "rust" ) ;
159194 }
160195
161196 #[ test]
162- fn test_full_flow ( ) {
197+ fn test_multiple_languages ( ) {
198+ let fixture = "```rust\n rust code\n ```\n \n ```python\n python code\n ```" ;
199+ let parser = CodeBlockParser :: new ( fixture) ;
200+
201+ let actual = parser. blocks ( ) . len ( ) ;
202+ let expected = 2 ;
203+
204+ assert_eq ! ( actual, expected) ;
205+ assert_eq ! ( parser. blocks( ) [ 0 ] . lang, "rust" ) ;
206+ assert_eq ! ( parser. blocks( ) [ 1 ] . lang, "python" ) ;
207+ }
208+
209+ #[ test]
210+ fn test_extracts_indented_code_blocks_from_fixture ( ) {
211+ let parser = fixture_parser ( "code-01" ) ;
212+
213+ let actual = parser. blocks ( ) . len ( ) ;
214+ let expected = 4 ;
215+
216+ assert_eq ! ( actual, expected) ;
217+ }
218+
219+ #[ test]
220+ fn test_extracts_standard_code_blocks_from_fixture ( ) {
221+ let parser = fixture_parser ( "code-02" ) ;
222+
223+ let actual = parser. blocks ( ) . len ( ) ;
224+ let expected = 3 ;
225+
226+ assert_eq ! ( actual, expected) ;
227+ }
228+
229+ #[ test]
230+ fn test_restore_replaces_placeholders_with_highlighted_code ( ) {
231+ let fixture = "```rust\n code\n ```" ;
163232 let highlighter = SyntaxHighlighter :: default ( ) ;
164- let r = CodeBlockParser :: parse ( "Hi\n ```rust\n let x = 1;\n ```\n Bye" ) ;
165- let result = strip_ansi ( & r. restore ( & highlighter, r. markdown ( ) . to_string ( ) ) ) ;
166- assert ! ( result. contains( "Hi" ) && result. contains( "let x = 1" ) && result. contains( "Bye" ) ) ;
233+ let parser = CodeBlockParser :: new ( fixture) ;
234+
235+ let actual = strip_ansi ( & parser. restore ( & highlighter, parser. markdown ( ) . to_string ( ) ) ) ;
236+
237+ assert ! ( actual. contains( "code" ) ) ;
167238 }
168239
169240 #[ test]
170- fn test_shared_highlighter ( ) {
241+ fn test_full_extraction_and_restoration_flow ( ) {
242+ let fixture = "Hi\n ```rust\n let x = 1;\n ```\n Bye" ;
171243 let highlighter = SyntaxHighlighter :: default ( ) ;
244+ let parser = CodeBlockParser :: new ( fixture) ;
172245
173- let r1 = CodeBlockParser :: parse ( "```rust\n let x = 1;\n ```" ) ;
174- let r2 = CodeBlockParser :: parse ( "```python\n print('hello')\n ```" ) ;
246+ let actual = strip_ansi ( & parser. restore ( & highlighter, parser. markdown ( ) . to_string ( ) ) ) ;
247+
248+ assert ! ( actual. contains( "Hi" ) ) ;
249+ assert ! ( actual. contains( "let x = 1" ) ) ;
250+ assert ! ( actual. contains( "Bye" ) ) ;
251+ }
252+
253+ #[ test]
254+ fn test_highlighter_can_be_reused ( ) {
255+ let highlighter = SyntaxHighlighter :: default ( ) ;
175256
176- assert_eq ! ( r1 . blocks ( ) [ 0 ] . lang , " rust") ;
177- assert_eq ! ( r2 . blocks ( ) [ 0 ] . lang , " python") ;
257+ let parser1 = CodeBlockParser :: new ( "``` rust\n let x = 1; \n ``` ") ;
258+ let parser2 = CodeBlockParser :: new ( "``` python\n print('hello') \n ``` ") ;
178259
179- let result1 = r1 . restore ( & highlighter, r1 . markdown ( ) . to_string ( ) ) ;
180- let result2 = r2 . restore ( & highlighter, r2 . markdown ( ) . to_string ( ) ) ;
260+ let actual1 = strip_ansi ( & parser1 . restore ( & highlighter, parser1 . markdown ( ) . to_string ( ) ) ) ;
261+ let actual2 = strip_ansi ( & parser2 . restore ( & highlighter, parser2 . markdown ( ) . to_string ( ) ) ) ;
181262
182- assert ! ( strip_ansi ( & result1 ) . contains( "let x = 1" ) ) ;
183- assert ! ( strip_ansi ( & result2 ) . contains( "print('hello')" ) ) ;
263+ assert ! ( actual1 . contains( "let x = 1" ) ) ;
264+ assert ! ( actual2 . contains( "print('hello')" ) ) ;
184265 }
185266}
0 commit comments