diff --git a/src/indexer/graphrag/database.rs b/src/indexer/graphrag/database.rs
index 4340fb0..111ec73 100644
--- a/src/indexer/graphrag/database.rs
+++ b/src/indexer/graphrag/database.rs
@@ -192,23 +192,38 @@ impl<'a> DatabaseOperations<'a> {
 			let desc_array = extract_string_column(&rel_batch, "description")?;
 			let conf_array = extract_f32_column(&rel_batch, "confidence")?;
 
-			// Process each relationship
+			// Deduplicate relationships by (source, target, type) triple —
+			// batch writes can produce duplicates across incremental flushes
+			let mut seen = std::collections::HashSet::new();
+			let mut dedup_count = 0usize;
 			for i in 0..rel_batch.num_rows() {
+				let source = source_array.value(i);
+				let target = target_array.value(i);
+				let rel_type = type_array.value(i);
+				let key = (source.to_string(), target.to_string(), rel_type.to_string());
+				if !seen.insert(key) {
+					dedup_count += 1;
+					continue;
+				}
 				let relationship = CodeRelationship {
-					source: source_array.value(i).to_string(),
-					target: target_array.value(i).to_string(),
-					relation_type: type_array
-						.value(i)
+					source: source.to_string(),
+					target: target.to_string(),
+					relation_type: rel_type
 						.parse()
 						.unwrap_or(crate::indexer::graphrag::types::RelationType::Imports),
 					description: desc_array.value(i).to_string(),
 					confidence: conf_array.value(i),
-					weight: 1.0, // Default weight for legacy relationships
+					weight: 1.0,
 				};
-
-				// Add to graph
 				graph.relationships.push(relationship);
 			}
+			if dedup_count > 0 && !quiet {
+				println!(
+					"  Deduplicated {} → {} unique relationships",
+					rel_batch.num_rows(),
+					graph.relationships.len()
+				);
+			}
 		}
 
 		if !graph.nodes.is_empty() && !quiet {
diff --git a/src/indexer/graphrag/relationships.rs b/src/indexer/graphrag/relationships.rs
index 3bad307..5743513 100644
--- a/src/indexer/graphrag/relationships.rs
+++ b/src/indexer/graphrag/relationships.rs
@@ -32,30 +32,36 @@ impl RelationshipDiscovery {
 
 		for source_file in new_files {
 			// 1. Import/Export relationships (high confidence)
-			for import in &source_file.imports {
-				for target_file in all_nodes {
-					if target_file.id == source_file.id {
-						continue;
-					}
+			// For markdown files, use proper path-based import resolution
+			// (the symbol-matching approach doesn't work for file path imports)
+			if source_file.language == "markdown" {
+	Self::discover_import_relationships(source_file, all_nodes, &mut relationships);
+			} else {
+				for import in &source_file.imports {
+					for target_file in all_nodes {
+						if target_file.id == source_file.id {
+							continue;
+						}
 
-					// Check if target exports what source imports
-					if target_file
-						.exports
-						.iter()
-						.any(|exp| symbols_match(import, exp))
-						|| target_file
-							.symbols
+						// Check if target exports what source imports
+						if target_file
+							.exports
 							.iter()
-							.any(|sym| symbols_match(import, sym))
-					{
-						relationships.push(CodeRelationship {
-							source: source_file.id.clone(),
-							target: target_file.id.clone(),
-							relation_type: crate::indexer::graphrag::types::RelationType::Imports,
-							description: format!("Imports {} from {}", import, target_file.name),
-							confidence: 0.9,
-							weight: 1.0,
-						});
+							.any(|exp| symbols_match(import, exp))
+							|| target_file
+								.symbols
+								.iter()
+								.any(|sym| symbols_match(import, sym))
+						{
+							relationships.push(CodeRelationship {
+								source: source_file.id.clone(),
+								target: target_file.id.clone(),
+								relation_type: crate::indexer::graphrag::types::RelationType::Imports,
+								description: format!("Imports {} from {}", import, target_file.name),
+								confidence: 0.9,
+								weight: 1.0,
+							});
+						}
 					}
 				}
 			}
@@ -193,16 +199,26 @@ impl RelationshipDiscovery {
 				{
 					// Find the target node
 					if let Some(target_node) = file_map.get(&resolved_path) {
-						// Create semantic import relationship
+						// Use References for markdown cross-links, Imports for code
+						let rel_type = if source_file.language == "markdown" {
+							crate::indexer::graphrag::types::RelationType::References
+						} else {
+							crate::indexer::graphrag::types::RelationType::Imports
+						};
+						let description_prefix = if source_file.language == "markdown" {
+							"References"
+						} else {
+							"Direct import"
+						};
 						relationships.push(CodeRelationship {
 							source: source_file.id.clone(),
 							target: target_node.id.clone(),
-							relation_type: crate::indexer::graphrag::types::RelationType::Imports,
+							relation_type: rel_type,
 							description: format!(
-								"Direct import: {} -> {}",
-								import_path, resolved_path
+								"{}: {} -> {}",
+								description_prefix, import_path, resolved_path
 							),
-							confidence: 0.95, // High confidence for resolved imports
+							confidence: 0.95,
 							weight: 1.0,
 						});
 
@@ -482,10 +498,9 @@ impl RelationshipDiscovery {
 			|| relative_path.contains(".test.")
 		{
 			"test_file".to_string()
-		} else if relative_path.ends_with(".md")
-			|| relative_path.ends_with(".txt")
-			|| relative_path.ends_with(".rst")
-		{
+		} else if relative_path.ends_with(".md") || relative_path.ends_with(".markdown") {
+			"document_file".to_string()
+		} else if relative_path.ends_with(".txt") || relative_path.ends_with(".rst") {
 			"documentation".to_string()
 		} else if relative_path.contains("/config") || relative_path.contains(".config") {
 			"config_file".to_string()
diff --git a/src/indexer/graphrag/tests.rs b/src/indexer/graphrag/tests.rs
index 891e022..182668b 100644
--- a/src/indexer/graphrag/tests.rs
+++ b/src/indexer/graphrag/tests.rs
@@ -954,4 +954,101 @@ def _private_function():
 			extract_imports_exports_recursive(child, contents, lang_impl, all_imports, all_exports);
 		}
 	}
+
+	/// Integration test: verify discover_relationships_efficiently produces
+	/// References edges for markdown nodes with file-path imports.
+	/// This is the exact code path used at runtime (not discover_import_relationships directly).
+	#[tokio::test]
+	async fn test_markdown_references_via_efficient_discovery() {
+		use crate::indexer::graphrag::types::RelationType;
+
+		// Source: adapters-integrations.md imports credit-suite.md and credit-accounts.md
+		let source = CodeNode {
+			id: "projects/docs/core/adapters-integrations.md".to_string(),
+			name: "adapters-integrations".to_string(),
+			kind: "document_file".to_string(),
+			path: "projects/docs/core/adapters-integrations.md".to_string(),
+			description: String::new(),
+			symbols: vec![],
+			imports: vec![
+				"credit-suite.md".to_string(),             // same-dir
+				"../intro/credit-accounts.md".to_string(),  // parent-dir
+			],
+			exports: vec!["Adapters".to_string()],
+			functions: vec![],
+			hash: "aaa".to_string(),
+			embedding: vec![],
+			size_lines: 50,
+			language: "markdown".to_string(),
+		};
+
+		let target1 = CodeNode {
+			id: "projects/docs/core/credit-suite.md".to_string(),
+			name: "credit-suite".to_string(),
+			kind: "document_file".to_string(),
+			path: "projects/docs/core/credit-suite.md".to_string(),
+			description: String::new(),
+			symbols: vec![],
+			imports: vec![],
+			exports: vec!["Credit Suite".to_string()],
+			functions: vec![],
+			hash: "bbb".to_string(),
+			embedding: vec![],
+			size_lines: 100,
+			language: "markdown".to_string(),
+		};
+
+		let target2 = CodeNode {
+			id: "projects/docs/intro/credit-accounts.md".to_string(),
+			name: "credit-accounts".to_string(),
+			kind: "document_file".to_string(),
+			path: "projects/docs/intro/credit-accounts.md".to_string(),
+			description: String::new(),
+			symbols: vec![],
+			imports: vec![],
+			exports: vec!["Credit Accounts".to_string()],
+			functions: vec![],
+			hash: "ccc".to_string(),
+			embedding: vec![],
+			size_lines: 80,
+			language: "markdown".to_string(),
+		};
+
+		let all_nodes = vec![source.clone(), target1.clone(), target2.clone()];
+		let new_files = vec![source.clone()];
+
+		// Call the SAME function used at runtime
+		let relationships = RelationshipDiscovery::discover_relationships_efficiently(
+			&new_files,
+			&all_nodes,
+		)
+		.await
+		.expect("relationship discovery should succeed");
+
+		// Find References relationships (not just sibling_module)
+		let refs: Vec<_> = relationships
+			.iter()
+			.filter(|r| r.relation_type == RelationType::References)
+			.collect();
+
+		assert!(
+			refs.len() >= 2,
+			"Expected at least 2 References relationships, got {}: {:?}",
+			refs.len(),
+			refs.iter().map(|r| format!("{} -> {}", r.source, r.target)).collect::<Vec<_>>()
+		);
+
+		// Verify specific edges
+		let has_suite = refs.iter().any(|r| {
+			r.source == "projects/docs/core/adapters-integrations.md"
+				&& r.target == "projects/docs/core/credit-suite.md"
+		});
+		assert!(has_suite, "Should have reference to credit-suite.md");
+
+		let has_accounts = refs.iter().any(|r| {
+			r.source == "projects/docs/core/adapters-integrations.md"
+				&& r.target == "projects/docs/intro/credit-accounts.md"
+		});
+		assert!(has_accounts, "Should have reference to credit-accounts.md");
+	}
 }
diff --git a/src/indexer/graphrag/types.rs b/src/indexer/graphrag/types.rs
index 9fd8799..298d06a 100644
--- a/src/indexer/graphrag/types.rs
+++ b/src/indexer/graphrag/types.rs
@@ -60,6 +60,8 @@ pub enum RelationType {
 	StrategyPattern,
 	/// Adapter pattern (interface adaptation)
 	AdapterPattern,
+	/// Document cross-reference via markdown link
+	References,
 
 	// Low importance - Organizational relationships (weight: 0.3)
 	/// Files in the same directory
@@ -85,6 +87,7 @@ impl RelationType {
 			| Self::ObserverPattern
 			| Self::StrategyPattern
 			| Self::AdapterPattern => 0.8,
+			Self::References => 0.6,
 
 			// Low importance - organizational structure
 			Self::SiblingModule | Self::ParentModule | Self::ChildModule => 0.3,
@@ -105,6 +108,7 @@ impl RelationType {
 			Self::ObserverPattern => "observer_pattern",
 			Self::StrategyPattern => "strategy_pattern",
 			Self::AdapterPattern => "adapter_pattern",
+			Self::References => "references",
 			Self::SiblingModule => "sibling_module",
 			Self::ParentModule => "parent_module",
 			Self::ChildModule => "child_module",
@@ -129,6 +133,7 @@ impl FromStr for RelationType {
 			"observer_pattern" => Self::ObserverPattern,
 			"strategy_pattern" => Self::StrategyPattern,
 			"adapter_pattern" => Self::AdapterPattern,
+			"references" => Self::References,
 			"sibling_module" => Self::SiblingModule,
 			"parent_module" => Self::ParentModule,
 			"child_module" => Self::ChildModule,
@@ -264,6 +269,9 @@ mod tests {
 		assert_eq!(RelationType::Uses.importance_weight(), 0.7);
 		assert_eq!(RelationType::FactoryCreates.importance_weight(), 0.8);
 
+		// Document references (between structural and organizational)
+		assert_eq!(RelationType::References.importance_weight(), 0.6);
+
 		// Low importance relationships
 		assert_eq!(RelationType::SiblingModule.importance_weight(), 0.3);
 		assert_eq!(RelationType::ParentModule.importance_weight(), 0.3);
@@ -290,6 +298,16 @@ mod tests {
 			RelationType::Calls.importance_weight()
 				> RelationType::ParentModule.importance_weight()
 		);
+
+		// Verify references sit between structural imports and organizational
+		assert!(
+			RelationType::Imports.importance_weight()
+				> RelationType::References.importance_weight()
+		);
+		assert!(
+			RelationType::References.importance_weight()
+				> RelationType::SiblingModule.importance_weight()
+		);
 	}
 
 	#[test]
@@ -315,6 +333,10 @@ mod tests {
 			"sibling_module".parse::<RelationType>().unwrap(),
 			RelationType::SiblingModule
 		);
+		assert_eq!(
+			"references".parse::<RelationType>().unwrap(),
+			RelationType::References
+		);
 
 		// Test unknown type defaults to Imports
 		assert_eq!(
@@ -343,6 +365,7 @@ mod tests {
 			RelationType::Imports,
 			RelationType::Calls,
 			RelationType::Uses,
+			RelationType::References,
 			RelationType::SiblingModule,
 		];
 
diff --git a/src/indexer/languages/markdown.rs b/src/indexer/languages/markdown.rs
index 8a100f5..c8e3b5c 100644
--- a/src/indexer/languages/markdown.rs
+++ b/src/indexer/languages/markdown.rs
@@ -65,23 +65,72 @@ impl Language for Markdown {
 		"markdown headings"
 	}
 
-	// Markdown doesn't have traditional imports/exports
-	#[allow(dead_code)]
-	fn extract_imports_exports(&self, _node: Node, _contents: &str) -> (Vec<String>, Vec<String>) {
-		// Markdown files don't have imports or exports in the traditional sense
-		// Could potentially extract links to other markdown files, but that's not
-		// the same as code imports/exports
-		(Vec::new(), Vec::new())
+	fn extract_imports_exports(&self, node: Node, contents: &str) -> (Vec<String>, Vec<String>) {
+		// Only extract at the root node to avoid redundant passes during
+		// the recursive AST walk (the JSON placeholder parser produces
+		// multiple nodes, each receiving the same contents string).
+		if node.parent().is_some() {
+			return (Vec::new(), Vec::new());
+		}
+
+		let mut links = Vec::new();
+		// Match [text](path.md) — standard markdown links to .md files
+		// Skip external URLs (http:// or https://)
+		// Strip anchor fragments (#section)
+		let link_re =
+			regex::Regex::new(r"\[[^\]]*\]\(([^)]+\.md)(?:#[^)]*)?\)").unwrap();
+		for cap in link_re.captures_iter(contents) {
+			if let Some(target) = cap.get(1) {
+				let path = target.as_str();
+				if !path.starts_with("http://") && !path.starts_with("https://") {
+					links.push(path.to_string());
+				}
+			}
+		}
+
+		// Deduplicate — a doc may link to the same target multiple times
+		links.sort();
+		links.dedup();
+
+		// Markdown "exports" are section headings
+		let exports = self.extract_symbols(node, contents);
+		(links, exports)
 	}
 
 	fn resolve_import(
 		&self,
-		_import_path: &str,
-		_source_file: &str,
-		_all_files: &[String],
+		import_path: &str,
+		source_file: &str,
+		all_files: &[String],
 	) -> Option<String> {
-		// Markdown doesn't have imports
-		None
+		use std::path::{Component, PathBuf};
+
+		let source_dir = PathBuf::from(source_file)
+			.parent()
+			.map(|p| p.to_path_buf())
+			.unwrap_or_default();
+		let joined = source_dir.join(import_path);
+
+		// Normalize path components (resolve ../ and ./)
+		let normalized =
+			joined
+				.components()
+				.fold(PathBuf::new(), |mut acc, c| {
+					match c {
+						Component::ParentDir => {
+							acc.pop();
+						}
+						Component::CurDir => {}
+						Component::Normal(os) => {
+							acc.push(os);
+						}
+						_ => {}
+					}
+					acc
+				});
+
+		let normalized_str = normalized.to_string_lossy().to_string();
+		all_files.iter().find(|f| **f == normalized_str).cloned()
 	}
 
 	fn get_file_extensions(&self) -> Vec<&'static str> {
@@ -90,3 +139,123 @@ impl Language for Markdown {
 }
 
 impl Markdown {}
+
+#[cfg(test)]
+mod tests {
+	use super::*;
+
+	#[test]
+	fn test_extract_markdown_links() {
+		let content = r#"
+# Credit Accounts
+
+See [Credit Suite](../core-architecture/credit-suite.md) for details.
+Also check [Pool](../core-architecture/pool.md#liquidity) and
+[Adapters](./adapters.md).
+
+External links are ignored: [Docs](https://docs.example.com/guide.md)
+Non-md links are ignored: [Image](./photo.png)
+"#;
+
+		// Test the regex logic directly (can't easily create tree-sitter Node in unit tests)
+		let link_re =
+			regex::Regex::new(r"\[[^\]]*\]\(([^)]+\.md)(?:#[^)]*)?\)").unwrap();
+		let mut links = Vec::new();
+		for cap in link_re.captures_iter(content) {
+			if let Some(target) = cap.get(1) {
+				let path = target.as_str();
+				if !path.starts_with("http://") && !path.starts_with("https://") {
+					links.push(path.to_string());
+				}
+			}
+		}
+
+		assert_eq!(links.len(), 3);
+		assert!(links.contains(&"../core-architecture/credit-suite.md".to_string()));
+		assert!(links.contains(&"../core-architecture/pool.md".to_string()));
+		assert!(links.contains(&"./adapters.md".to_string()));
+		assert!(!links.iter().any(|l| l.contains("https://")));
+		assert!(!links.iter().any(|l| l.contains(".png")));
+	}
+
+	#[test]
+	fn test_resolve_markdown_import() {
+		let md = Markdown;
+		let all_files = vec![
+			"core-architecture/credit-suite.md".to_string(),
+			"core-architecture/pool.md".to_string(),
+			"introduction/adapters.md".to_string(),
+		];
+
+		// Relative link: introduction/credit-accounts.md → ../core-architecture/credit-suite.md
+		let resolved = md.resolve_import(
+			"../core-architecture/credit-suite.md",
+			"introduction/credit-accounts.md",
+			&all_files,
+		);
+		assert_eq!(
+			resolved,
+			Some("core-architecture/credit-suite.md".to_string())
+		);
+
+		// Same-directory link
+		let resolved = md.resolve_import(
+			"./adapters.md",
+			"introduction/credit-accounts.md",
+			&all_files,
+		);
+		assert_eq!(resolved, Some("introduction/adapters.md".to_string()));
+
+		// Non-existent target
+		let resolved = md.resolve_import(
+			"../nonexistent.md",
+			"introduction/credit-accounts.md",
+			&all_files,
+		);
+		assert_eq!(resolved, None);
+	}
+
+	#[test]
+	fn test_resolve_with_deep_project_paths() {
+		let md = Markdown;
+		// Real-world paths from ai-assistant repo
+		let all_files = vec![
+			"projects/gearbox/autodocs-about/docs/core-architecture/credit-suite.md".to_string(),
+			"projects/gearbox/autodocs-about/docs/core-architecture/pool.md".to_string(),
+			"projects/gearbox/autodocs-about/docs/introduction/credit-accounts.md".to_string(),
+		];
+
+		// adapters-integrations.md links to credit-suite.md (same dir)
+		let resolved = md.resolve_import(
+			"credit-suite.md",
+			"projects/gearbox/autodocs-about/docs/core-architecture/adapters-integrations.md",
+			&all_files,
+		);
+		assert_eq!(
+			resolved,
+			Some("projects/gearbox/autodocs-about/docs/core-architecture/credit-suite.md".to_string()),
+			"Same-dir link should resolve"
+		);
+
+		// adapters-integrations.md links to ../introduction/credit-accounts.md
+		let resolved = md.resolve_import(
+			"../introduction/credit-accounts.md",
+			"projects/gearbox/autodocs-about/docs/core-architecture/adapters-integrations.md",
+			&all_files,
+		);
+		assert_eq!(
+			resolved,
+			Some("projects/gearbox/autodocs-about/docs/introduction/credit-accounts.md".to_string()),
+			"Parent-dir link should resolve"
+		);
+	}
+
+	#[test]
+	fn test_no_links_in_empty_doc() {
+		let link_re =
+			regex::Regex::new(r"\[[^\]]*\]\(([^)]+\.md)(?:#[^)]*)?\)").unwrap();
+		let content = "# Simple heading\nNo links here.";
+		let links: Vec<_> = link_re.captures_iter(content).collect();
+		assert!(links.is_empty());
+	}
+}
diff --git a/src/indexer/mod.rs b/src/indexer/mod.rs
index 507a309..488837a 100644
--- a/src/indexer/mod.rs
+++ b/src/indexer/mod.rs
@@ -42,6 +42,7 @@ use crate::config::Config;
 use crate::mcp::logging::{log_file_processing_error, log_indexing_progress};
 use crate::state;
 use crate::state::SharedState;
+use crate::store::CodeBlock;
 #[cfg(test)]
 use crate::store::DocumentBlock;
 use crate::store::Store;
@@ -848,6 +849,24 @@ pub async fn index_files_with_quiet(
 								state.clone(),
 							)
 							.await?;
+
+							// Also create a synthetic CodeBlock so GraphBuilder includes
+							// this file in the knowledge graph. The builder reads the file
+							// from disk and calls Markdown::extract_imports_exports for
+							// actual link extraction.
+							if config.graphrag.enabled {
+								all_code_blocks.push(CodeBlock {
+									path: file_path.to_string(),
+									language: "markdown".to_string(),
+									content: String::new(),
+									symbols: vec![],
+									start_line: 0,
+									end_line: contents.lines().count(),
+									hash: crate::embedding::calculate_content_hash(&contents),
+									distance: None,
+								});
+							}
+
 							file_processed = true;
 						} else {
 							// Handle code files - index as semantic code blocks only
@@ -1066,6 +1085,21 @@ pub async fn index_files_with_quiet(
 								state.clone(),
 							)
 							.await?;
+
+							// Also create a synthetic CodeBlock for GraphRAG
+							if config.graphrag.enabled {
+								all_code_blocks.push(CodeBlock {
+									path: file_path.to_string(),
+									language: "markdown".to_string(),
+									content: String::new(),
+									symbols: vec![],
+									start_line: 0,
+									end_line: contents.lines().count(),
+									hash: crate::embedding::calculate_content_hash(&contents),
+									distance: None,
+								});
+							}
+
 							file_processed = true;
 						} else {
 							// Handle code files - index as semantic code blocks only
@@ -1423,6 +1457,26 @@ pub async fn handle_file_change(store: &Store, file_path: &str, config: &Config)
 						)
 						.await?;
 					}
+
+					// Update GraphRAG for this markdown file
+					if config.graphrag.enabled {
+						let md_blocks = vec![CodeBlock {
+							path: relative_file_path.to_string(),
+							language: "markdown".to_string(),
+							content: String::new(),
+							symbols: vec![],
+							start_line: 0,
+							end_line: contents.lines().count(),
+							hash: crate::embedding::calculate_content_hash(&contents),
+							distance: None,
+						}];
+						let graph_builder =
+							graphrag::GraphBuilder::new_with_quiet(config.clone(), true)
+								.await?;
+						graph_builder
+							.process_code_blocks(&md_blocks, Some(state.clone()))
+							.await?;
+					}
 				} else {
 					// Handle code files
 					let mut code_blocks_batch = Vec::new();
diff --git a/src/store/graphrag.rs b/src/store/graphrag.rs
index 1d06c54..5e4ddb7 100644
--- a/src/store/graphrag.rs
+++ b/src/store/graphrag.rs
@@ -444,32 +444,74 @@ impl<'a> GraphRagOperations<'a> {
 	pub async fn get_all_code_blocks_for_graphrag(&self) -> Result<Vec<CodeBlock>> {
 		let mut all_blocks = Vec::new();
 
-		if !self.table_ops.table_exists("code_blocks").await? {
-			return Ok(all_blocks);
+		if self.table_ops.table_exists("code_blocks").await? {
+			let table = self.get_table("code_blocks").await?;
+
+			// Get all code blocks in batches to avoid memory issues
+			let mut results = table.query().execute().await?;
+
+			// Process all result batches
+			while let Some(batch) = results.try_next().await? {
+				if batch.num_rows() > 0 {
+					// Convert batch to CodeBlocks
+					let converter =
+						crate::store::batch_converter::BatchConverter::new(self.code_vector_dim);
+					let mut code_blocks = converter.batch_to_code_blocks(&batch, None)?;
+					all_blocks.append(&mut code_blocks);
+
+					// Log progress for large datasets
+					if cfg!(debug_assertions) && all_blocks.len() % 1000 == 0 {
+						tracing::debug!(
+							"Loaded {} code blocks for GraphRAG processing...",
+							all_blocks.len()
+						);
+					}
+				}
+			}
 		}
 
-		let table = self.get_table("code_blocks").await?;
+		// Also include markdown files from document_blocks so they get GraphRAG
+		// nodes and cross-reference relationships. Document blocks are stored
+		// separately from code blocks, so without this, markdown files would be
+		// invisible to GraphRAG when rebuilding from existing database.
+		if self.table_ops.table_exists("document_blocks").await? {
+			let doc_table = self.get_table("document_blocks").await?;
+			let mut doc_results = doc_table.query().execute().await?;
 
-		// Get all code blocks in batches to avoid memory issues
-		let mut results = table.query().execute().await?;
+			let mut seen_md_paths = std::collections::HashSet::new();
+			// Collect paths already covered by code blocks
+			for block in &all_blocks {
+				seen_md_paths.insert(block.path.clone());
+			}
 
-		// Process all result batches
-		while let Some(batch) = results.try_next().await? {
-			if batch.num_rows() > 0 {
-				// Convert batch to CodeBlocks
-				let converter =
-					crate::store::batch_converter::BatchConverter::new(self.code_vector_dim);
-				let mut code_blocks = converter.batch_to_code_blocks(&batch, None)?;
-				all_blocks.append(&mut code_blocks);
-
-				// Log progress for large datasets
-				if cfg!(debug_assertions) && all_blocks.len() % 1000 == 0 {
-					tracing::debug!(
-						"Loaded {} code blocks for GraphRAG processing...",
-						all_blocks.len()
-					);
+			while let Some(batch) = doc_results.try_next().await? {
+				if batch.num_rows() > 0 {
+					let converter =
+						crate::store::batch_converter::BatchConverter::new(self.code_vector_dim);
+					let doc_blocks = converter.batch_to_document_blocks(&batch, None)?;
+					for doc in &doc_blocks {
+						if doc.path.ends_with(".md") && seen_md_paths.insert(doc.path.clone()) {
+							// Create a synthetic CodeBlock so GraphBuilder processes
+							// this markdown file for nodes and cross-references
+							all_blocks.push(crate::store::CodeBlock {
+								path: doc.path.clone(),
+								language: "markdown".to_string(),
+								content: String::new(),
+								symbols: vec![],
+								start_line: 0,
+								end_line: doc.end_line,
+								hash: doc.hash.clone(),
+								distance: None,
+							});
+						}
+					}
 				}
 			}
+
+			tracing::debug!(
+				"Added {} markdown files from document_blocks for GraphRAG",
+				seen_md_paths.len().saturating_sub(all_blocks.len())
+			);
 		}
 
 		Ok(all_blocks)
@@ -919,9 +961,9 @@ impl<'a> GraphRagOperations<'a> {
 		} else if all_batches.len() == 1 {
 			Ok(all_batches.into_iter().next().unwrap())
 		} else {
-			// For simplicity, return the first batch
-			// In a production system, you might want to concatenate all batches
-			Ok(all_batches.into_iter().next().unwrap())
+			// Concatenate all batches into one
+			let schema = all_batches[0].schema();
+			Ok(arrow::compute::concat_batches(&schema, &all_batches)?)
 		}
 	}