update

LarFii · LarFii · commit 214eb0f94d33 · 2025-09-22T11:11:40.000+08:00
diff --git a/README.md b/README.md
@@ -502,8 +502,8 @@ async def process_multimodal_content():
 
     image_content = {
         "img_path": "path/to/image.jpg",
-        "img_caption": ["Figure 1: Experimental results"],
-        "img_footnote": ["Data collected in 2024"]
+        "image_caption": ["Figure 1: Experimental results"],
+        "image_footnote": ["Data collected in 2024"]
     }
 
     description, entity_info = await image_processor.process_multimodal_content(
@@ -876,8 +876,8 @@ async def insert_content_list_example():
         {
             "type": "image",
             "img_path": "/absolute/path/to/figure1.jpg",  # IMPORTANT: Use absolute path
-            "img_caption": ["Figure 1: System Architecture"],
-            "img_footnote": ["Source: Authors' original design"],
+            "image_caption": ["Figure 1: System Architecture"],
+            "image_footnote": ["Source: Authors' original design"],
             "page_idx": 1  # Page number where this image appears
         },
         {
@@ -947,7 +947,7 @@ if __name__ == "__main__":
 The `content_list` should follow the standard format with each item being a dictionary containing:
 
 - **Text content**: `{"type": "text", "text": "content text", "page_idx": 0}`
-- **Image content**: `{"type": "image", "img_path": "/absolute/path/to/image.jpg", "img_caption": ["caption"], "img_footnote": ["note"], "page_idx": 1}`
+- **Image content**: `{"type": "image", "img_path": "/absolute/path/to/image.jpg", "image_caption": ["caption"], "image_footnote": ["note"], "page_idx": 1}`
 - **Table content**: `{"type": "table", "table_body": "markdown table", "table_caption": ["caption"], "table_footnote": ["note"], "page_idx": 2}`
 - **Equation content**: `{"type": "equation", "latex": "LaTeX formula", "text": "description", "page_idx": 3}`
 - **Generic content**: `{"type": "custom_type", "content": "any content", "page_idx": 4}`
diff --git a/README_zh.md b/README_zh.md
@@ -487,8 +487,8 @@ async def process_multimodal_content():
 
     image_content = {
         "img_path": "path/to/image.jpg",
-        "img_caption": ["图1：实验结果"],
-        "img_footnote": ["数据收集于2024年"]
+        "image_caption": ["图1：实验结果"],
+        "image_footnote": ["数据收集于2024年"]
     }
 
     description, entity_info = await image_processor.process_multimodal_content(
@@ -860,8 +860,8 @@ async def insert_content_list_example():
         {
             "type": "image",
             "img_path": "/absolute/path/to/figure1.jpg",  # 重要：使用绝对路径
-            "img_caption": ["图1：系统架构"],
-            "img_footnote": ["来源：作者原创设计"],
+            "image_caption": ["图1：系统架构"],
+            "image_footnote": ["来源：作者原创设计"],
             "page_idx": 1  # 此图像出现的页码
         },
         {
@@ -931,7 +931,7 @@ if __name__ == "__main__":
 `content_list` 应遵循标准格式，每个项目都是包含以下内容的字典：
 
 - **文本内容**: `{"type": "text", "text": "内容文本", "page_idx": 0}`
-- **图像内容**: `{"type": "image", "img_path": "/absolute/path/to/image.jpg", "img_caption": ["标题"], "img_footnote": ["注释"], "page_idx": 1}`
+- **图像内容**: `{"type": "image", "img_path": "/absolute/path/to/image.jpg", "image_caption": ["标题"], "image_footnote": ["注释"], "page_idx": 1}`
 - **表格内容**: `{"type": "table", "table_body": "markdown表格", "table_caption": ["标题"], "table_footnote": ["注释"], "page_idx": 2}`
 - **公式内容**: `{"type": "equation", "latex": "LaTeX公式", "text": "描述", "page_idx": 3}`
 - **通用内容**: `{"type": "custom_type", "content": "任何内容", "page_idx": 4}`
diff --git a/docs/context_aware_processing.md b/docs/context_aware_processing.md
@@ -202,7 +202,8 @@ Multimodal Content → Extract Surrounding Context → Enhanced LLM Analysis →
     {
         "type": "image",
         "img_path": "images/figure1.jpg",
-        "img_caption": ["Figure 1: Architecture"],
+        "image_caption": ["Figure 1: Architecture"],
+        "image_footnote": [],
         "page_idx": 1
     }
 ]
diff --git a/raganything/processor.py b/raganything/processor.py
@@ -925,8 +925,12 @@ def _apply_chunk_template(
         try:
             if content_type == "image":
                 image_path = original_item.get("img_path", "")
-                captions = original_item.get("img_caption", [])
-                footnotes = original_item.get("img_footnote", [])
+                captions = original_item.get(
+                    "image_caption", original_item.get("img_caption", [])
+                )
+                footnotes = original_item.get(
+                    "image_footnote", original_item.get("img_footnote", [])
+                )
 
                 return PROMPTS["image_chunk"].format(
                     image_path=image_path,
@@ -1733,7 +1737,7 @@ async def insert_content_list(
                          Each item should be a dictionary with the following structure:
                          - Text: {"type": "text", "text": "content", "page_idx": 0}
                          - Image: {"type": "image", "img_path": "/absolute/path/to/image.jpg",
-                                  "img_caption": ["caption"], "img_footnote": ["note"], "page_idx": 1}
+                                  "image_caption": ["caption"], "image_footnote": ["note"], "page_idx": 1}
                          - Table: {"type": "table", "table_body": "markdown table",
                                   "table_caption": ["caption"], "table_footnote": ["note"], "page_idx": 2}
                          - Equation: {"type": "equation", "latex": "LaTeX formula",

Original file line number	Diff line number	Diff line change
`@@ -202,7 +202,8 @@ Multimodal Content → Extract Surrounding Context → Enhanced LLM Analysis →`
`202`	`202`	`{`
`203`	`203`	`"type": "image",`
`204`	`204`	`"img_path": "images/figure1.jpg",`
`205`		`- "img_caption": ["Figure 1: Architecture"],`
	`205`	`+ "image_caption": ["Figure 1: Architecture"],`
	`206`	`+ "image_footnote": [],`
`206`	`207`	`"page_idx": 1`
`207`	`208`	`}`
`208`	`209`	`]`