diff --git a/readme.md b/readme.md index 373a1dd..bae4add 100644 --- a/readme.md +++ b/readme.md @@ -16,6 +16,7 @@ A blazingly fast tool for peeking at codebases. Perfect for loading your codebas - 🔗 Web content processing with Markdown conversion - 📦 Git repository support - 🌐 URL traversal with configurable depth +- 🏷️ XML output format for better LLM compatibility ## Installation @@ -115,6 +116,9 @@ glimpse --config_path # Initialize a .glimpse config file in the current directory glimpse --config + +# Output in XML format for better LLM compatibility +glimpse -x /path/to/project ``` ## CLI Options @@ -146,6 +150,7 @@ Options: --traverse-links Traverse links when processing URLs --link-depth Maximum depth to traverse links (default: 1) --pdf Save output as PDF + -x, --xml Output in XML format for better LLM compatibility -h, --help Print help -V, --version Print version ``` @@ -179,6 +184,60 @@ default_excludes = [ ] ``` +## XML Output Format + +Glimpse supports XML output format designed for better compatibility with Large Language Models (LLMs) like Claude, GPT, and others. When using the `-x` or `--xml` flag, the output is structured with clear XML tags that help LLMs better understand the context and structure of your codebase. + +### XML Structure + +The XML output wraps all content in a `` tag with the project name: + +```xml + + +└── src/ + └── main.rs + + + + +================================================ +fn main() { + println!("Hello, World!"); +} + + + + +Total files: 1 +Total size: 45 bytes + + +``` + +### Benefits for LLM Usage + +- **Clear Context Boundaries**: The `` wrapper helps LLMs understand where your codebase begins and ends +- **Structured Information**: Separate sections for directory tree, file contents, and summary +- **Proper Escaping**: XML-safe content that won't confuse parsers +- **Project Identification**: Automatic project name detection for better context + +### Usage Examples + +```bash +# Basic XML output +glimpse -x /path/to/project + +# XML output with file save +glimpse -x -f project.xml /path/to/project + +# XML output to stdout +glimpse -x --print /path/to/project + +# XML output with specific includes +glimpse -x -i "*.rs,*.py" /path/to/project +``` + ## Token Counting Glimpse supports two tokenizer backends: diff --git a/src/analyzer.rs b/src/analyzer.rs index 9662870..04704e5 100644 --- a/src/analyzer.rs +++ b/src/analyzer.rs @@ -41,8 +41,15 @@ pub fn process_directory(args: &Cli) -> Result<()> { fs::write(pdf_path, pdf_data)?; println!("PDF output written to: {}", pdf_path.display()); } else { + // Determine project name for XML output + let project_name = if args.xml { + Some(determine_project_name(&args.paths)) + } else { + None + }; + // Handle output (print/copy/save) - let output = generate_output(&entries, output_format)?; + let output = generate_output(&entries, output_format, args.xml, project_name)?; handle_output(output, args)?; } @@ -54,6 +61,33 @@ pub fn process_directory(args: &Cli) -> Result<()> { Ok(()) } +fn determine_project_name(paths: &[String]) -> String { + if let Some(first_path) = paths.first() { + let path = std::path::Path::new(first_path); + + // If it's a directory, use its name + if path.is_dir() { + if let Some(name) = path.file_name() { + return name.to_string_lossy().to_string(); + } + } + + // If it's a file, use the parent directory name + if path.is_file() { + if let Some(parent) = path.parent() { + if let Some(name) = parent.file_name() { + return name.to_string_lossy().to_string(); + } + } + } + + // Fallback to just the path itself + first_path.clone() + } else { + "project".to_string() + } +} + pub fn process_entries(args: &Cli) -> Result> { let max_size = args.max_size.expect("max_size should be set from config"); let max_depth = args.max_depth.expect("max_depth should be set from config"); @@ -361,6 +395,7 @@ mod tests { pdf: None, traverse_links: false, link_depth: None, + xml: false, } } diff --git a/src/cli.rs b/src/cli.rs index 69c216e..5d846fe 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -114,6 +114,10 @@ pub struct Cli { /// Maximum depth to traverse sublinks (default: 1) #[arg(long)] pub link_depth: Option, + + /// Output in XML format for better LLM compatibility + #[arg(short = 'x', long)] + pub xml: bool, } impl Cli { diff --git a/src/output.rs b/src/output.rs index 7e6fa34..3a12deb 100644 --- a/src/output.rs +++ b/src/output.rs @@ -15,37 +15,97 @@ pub struct FileEntry { pub size: u64, } -pub fn generate_output(entries: &[FileEntry], format: OutputFormat) -> Result { +pub fn generate_output( + entries: &[FileEntry], + format: OutputFormat, + xml_format: bool, + project_name: Option, +) -> Result { let mut output = String::new(); + if xml_format { + let project_name = project_name.unwrap_or_else(|| "project".to_string()); + output.push_str(&format!( + "\n", + xml_escape(&project_name) + )); + } + match format { OutputFormat::Tree => { - output.push_str("Directory Structure:\n"); + if xml_format { + output.push_str("\n"); + } else { + output.push_str("Directory Structure:\n"); + } output.push_str(&generate_tree(entries)?); + if xml_format { + output.push_str("\n"); + } } OutputFormat::Files => { - output.push_str("File Contents:\n"); - output.push_str(&generate_files(entries)?); + if xml_format { + output.push_str("\n"); + } else { + output.push_str("File Contents:\n"); + } + output.push_str(&generate_files(entries, xml_format)?); + if xml_format { + output.push_str("\n"); + } } OutputFormat::Both => { - output.push_str("Directory Structure:\n"); + if xml_format { + output.push_str("\n"); + } else { + output.push_str("Directory Structure:\n"); + } output.push_str(&generate_tree(entries)?); - output.push_str("\nFile Contents:\n"); - output.push_str(&generate_files(entries)?); + if xml_format { + output.push_str("\n\n\n"); + } else { + output.push_str("\nFile Contents:\n"); + } + output.push_str(&generate_files(entries, xml_format)?); + if xml_format { + output.push_str("\n"); + } } } // Add summary - output.push_str("\nSummary:\n"); - output.push_str(&format!("Total files: {}\n", entries.len())); - output.push_str(&format!( - "Total size: {} bytes\n", - entries.iter().map(|e| e.size).sum::() - )); + if xml_format { + output.push_str("\n"); + output.push_str(&format!("Total files: {}\n", entries.len())); + output.push_str(&format!( + "Total size: {} bytes\n", + entries.iter().map(|e| e.size).sum::() + )); + output.push_str("\n"); + } else { + output.push_str("\nSummary:\n"); + output.push_str(&format!("Total files: {}\n", entries.len())); + output.push_str(&format!( + "Total size: {} bytes\n", + entries.iter().map(|e| e.size).sum::() + )); + } + + if xml_format { + output.push_str(""); + } Ok(output) } +fn xml_escape(text: &str) -> String { + text.replace('&', "&") + .replace('<', "<") + .replace('>', ">") + .replace('"', """) + .replace('\'', "'") +} + pub fn display_token_counts(token_counter: TokenCounter, entries: &[FileEntry]) -> Result<()> { let token_count = token_counter.count_files(entries)?; @@ -117,15 +177,27 @@ fn generate_tree(entries: &[FileEntry]) -> Result { Ok(output) } -fn generate_files(entries: &[FileEntry]) -> Result { +fn generate_files(entries: &[FileEntry], xml_format: bool) -> Result { let mut output = String::new(); for entry in entries { - output.push_str(&format!("\nFile: {}\n", entry.path.display())); - output.push_str(&"=".repeat(48)); - output.push('\n'); - output.push_str(&entry.content); - output.push('\n'); + if xml_format { + output.push_str(&format!( + "\n", + xml_escape(entry.path.display().to_string().as_str()) + )); + output.push_str(&"=".repeat(48)); + output.push('\n'); + output.push_str(&entry.content); + output.push('\n'); + output.push_str("\n"); + } else { + output.push_str(&format!("\nFile: {}\n", entry.path.display())); + output.push_str(&"=".repeat(48)); + output.push('\n'); + output.push_str(&entry.content); + output.push('\n'); + } } Ok(output) @@ -280,7 +352,7 @@ mod tests { #[test] fn test_files_output() { let entries = create_test_entries(); - let files = generate_files(&entries).unwrap(); + let files = generate_files(&entries, false).unwrap(); let expected = format!( "\nFile: {}\n{}\n{}\n\nFile: {}\n{}\n{}\n", "src/main.rs", @@ -298,23 +370,73 @@ mod tests { let entries = create_test_entries(); // Test tree format - let tree_output = generate_output(&entries, OutputFormat::Tree).unwrap(); + let tree_output = generate_output(&entries, OutputFormat::Tree, false, None).unwrap(); assert!(tree_output.contains("Directory Structure:")); assert!(tree_output.contains("src/")); assert!(tree_output.contains("main.rs")); // Test files format - let files_output = generate_output(&entries, OutputFormat::Files).unwrap(); + let files_output = generate_output(&entries, OutputFormat::Files, false, None).unwrap(); assert!(files_output.contains("File Contents:")); assert!(files_output.contains("fn main()")); assert!(files_output.contains("pub fn helper()")); // Test both format - let both_output = generate_output(&entries, OutputFormat::Both).unwrap(); + let both_output = generate_output(&entries, OutputFormat::Both, false, None).unwrap(); assert!(both_output.contains("Directory Structure:")); assert!(both_output.contains("File Contents:")); } + #[test] + fn test_xml_output() { + let entries = create_test_entries(); + + // Test XML tree format + let xml_tree_output = generate_output( + &entries, + OutputFormat::Tree, + true, + Some("test_project".to_string()), + ) + .unwrap(); + assert!(xml_tree_output.contains("")); + assert!(xml_tree_output.contains("")); + assert!(xml_tree_output.contains("")); + assert!(xml_tree_output.contains("")); + assert!(xml_tree_output.contains("")); + assert!(xml_tree_output.contains("")); + + // Test XML files format + let xml_files_output = generate_output( + &entries, + OutputFormat::Files, + true, + Some("test_project".to_string()), + ) + .unwrap(); + assert!(xml_files_output.contains("")); + assert!(xml_files_output.contains("")); + assert!(xml_files_output.contains("")); + assert!(xml_files_output.contains("")); + assert!(xml_files_output.contains("")); + assert!(xml_files_output.contains("")); + + // Test XML both format + let xml_both_output = generate_output( + &entries, + OutputFormat::Both, + true, + Some("test_project".to_string()), + ) + .unwrap(); + assert!(xml_both_output.contains("")); + assert!(xml_both_output.contains("")); + assert!(xml_both_output.contains("")); + assert!(xml_both_output.contains("")); + assert!(xml_both_output.contains("")); + assert!(xml_both_output.contains("")); + } + #[test] fn test_handle_output() { use tempfile::tempdir; @@ -345,6 +467,7 @@ mod tests { traverse_links: false, link_depth: None, config_path: false, + xml: false, }; handle_output(content.clone(), &args).unwrap(); diff --git a/test_project/src/lib.rs b/test_project/src/lib.rs new file mode 100644 index 0000000..a61949e --- /dev/null +++ b/test_project/src/lib.rs @@ -0,0 +1 @@ +pub fn helper() { println!("Helper function"); } diff --git a/test_project/src/main.rs b/test_project/src/main.rs new file mode 100644 index 0000000..a3e978a --- /dev/null +++ b/test_project/src/main.rs @@ -0,0 +1 @@ +fn main() { println!("Hello, world!"); }