Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 6b74997

Browse files
committed
feat(memory): Implement structured JSON extraction with retry logic and token optimization
Major improvements to fact extraction and storage system: Extraction & Validation: - Add Pydantic schema validation for structured JSON output - Implement retry logic (3 attempts) with progressive prompt refinement - Lower importance threshold 6→4 to capture more contextual facts - Add user content check to skip extraction when only agent speaks - Add context validation to reject bare facts without explanation Storage Backend Fixes: - Fix PostgreSQL JSON handling (dict vs string parsing) - Fix MySQL SQL syntax (ON DUPLICATE KEY vs ON CONFLICT) - Fix S3, Redis, MongoDB serialization (CallerMemory.model_dump()) - Add SSL parameter handling for MySQL and PostgreSQL connections - Parse query params (ssl-mode, sslmode) without breaking SQLAlchemy Fact Management: - Implement history tracking with (PREVIOUS) and (CURRENT) markers - Keep both old and new values when facts are updated - Increase max_facts storage limit 15→50 - Optimize token usage: 150+ chars → 25-40 chars per fact value JSON Repair: - Add robust JSON repair logic for incomplete/malformed responses - Handle unterminated strings and missing brackets - Add regex fallback for extracting facts from partial JSON Files changed: - memory/extraction/llm_extractor.py (+483 lines) - memory/extraction/prompts.py (+217 lines) - memory/storage/*.py (all backends) - memory/models.py, enrichment.py, service.py
1 parent f680628 commit 6b74997

12 files changed

Lines changed: 989 additions & 230 deletions

File tree

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,5 @@ BRD.md
88
SRD.md
99
.env.local
1010
.env.*
11-
requirements.txt
11+
.opencode
12+
requirements.txt

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ dependencies = [
9595
"redis",
9696
"sqlalchemy",
9797
"psycopg2",
98+
"pymysql",
9899
"livekit-agents[openai]",
99100
"livekit-agents[google]",
100101
"livekit-agents[anthropic]",

siphon/memory/enrichment.py

Lines changed: 78 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
"""Memory enrichment - format memory for prompt injection."""
22

3-
from typing import Optional
3+
from typing import Optional, List, Dict
44
from datetime import datetime
5-
from siphon.memory.models import CallerMemory, MemoryContext
5+
from siphon.memory.models import CallerMemory, MemoryContext, Fact
66
from siphon.config import get_logger
77
from siphon.agent.internal_prompts import memory_aware_prompt
88

@@ -40,33 +40,87 @@ def format(self, memory: Optional[CallerMemory]) -> MemoryContext:
4040
except:
4141
pass
4242

43-
# Format facts
44-
facts_lines = []
43+
# Sort facts by importance
4544
sorted_facts = sorted(memory.facts, key=lambda f: f.importance, reverse=True)
45+
top_facts = sorted_facts[:self.max_facts_in_prompt]
46+
47+
# Categorize facts for better organization
48+
summary_facts = [f for f in top_facts if f.key in ['call_summary', 'conversation_summary']]
49+
action_facts = [f for f in top_facts if f.key in ['next_action', 'follow_up_needed', 'incomplete_task']]
50+
personal_facts = [f for f in top_facts if f.key in ['user_name', 'contact_number', 'email']]
51+
appointment_facts = [f for f in top_facts if 'appointment' in f.key or 'schedule' in f.key]
52+
other_facts = [f for f in top_facts if f.key not in
53+
['call_summary', 'conversation_summary', 'next_action', 'follow_up_needed',
54+
'incomplete_task', 'user_name', 'contact_number', 'email'] and
55+
'appointment' not in f.key and 'schedule' not in f.key]
56+
57+
# Build sections
58+
sections = []
59+
60+
# Header section
61+
sections.append("---")
62+
sections.append("Previous Conversation Context:")
63+
sections.append("")
4664

47-
for fact in sorted_facts[:self.max_facts_in_prompt]:
48-
key = fact.key.replace("_", " ").title()
49-
facts_lines.append(f"- {key}: {fact.value}")
50-
51-
formatted_facts = "\n".join(facts_lines) if facts_lines else ""
52-
53-
# Build full context
54-
lines = [
55-
"---",
56-
"Previous Conversation Context:",
57-
]
58-
5965
if memory.call_count > 1:
6066
if last_call_str:
61-
lines.append(f"This user has called {memory.call_count} times previously. Last call was on {last_call_str}.")
67+
sections.append(f"This user has called {memory.call_count} times previously. Last call was on {last_call_str}.")
6268
else:
63-
lines.append(f"This user has called {memory.call_count} times previously.")
64-
65-
if formatted_facts:
66-
lines.append("Key facts from previous conversations:")
67-
lines.append(formatted_facts)
68-
69-
full_context = "\n".join(lines) if len(lines) > 2 else ""
69+
sections.append(f"This user has called {memory.call_count} times previously.")
70+
sections.append("")
71+
72+
# Summary section (most important for context)
73+
if summary_facts:
74+
sections.append("SUMMARY:")
75+
for fact in summary_facts[:2]: # Max 2 summaries
76+
sections.append(f" {fact.value}")
77+
sections.append("")
78+
79+
# Next Actions section (critical for follow-ups)
80+
if action_facts:
81+
sections.append("NEXT ACTIONS / FOLLOW-UPS:")
82+
for fact in action_facts[:3]: # Max 3 actions
83+
key_display = fact.key.replace("_", " ").title()
84+
sections.append(f" • {key_display}: {fact.value}")
85+
sections.append("")
86+
87+
# Personal Information section
88+
if personal_facts:
89+
sections.append("PERSONAL INFO:")
90+
for fact in personal_facts[:3]:
91+
key_display = fact.key.replace("_", " ").title()
92+
sections.append(f" • {key_display}: {fact.value}")
93+
sections.append("")
94+
95+
# Appointments section
96+
if appointment_facts:
97+
sections.append("APPOINTMENTS:")
98+
for fact in appointment_facts[:3]:
99+
key_display = fact.key.replace("_", " ").title()
100+
sections.append(f" • {key_display}: {fact.value}")
101+
sections.append("")
102+
103+
# Other important facts
104+
if other_facts:
105+
sections.append("OTHER KEY DETAILS:")
106+
for fact in other_facts[:5]: # Max 5 other facts
107+
key_display = fact.key.replace("_", " ").title()
108+
# Truncate long values for readability
109+
value = fact.value
110+
if len(value) > 100:
111+
value = value[:97] + "..."
112+
sections.append(f" • {key_display}: {value}")
113+
sections.append("")
114+
115+
# Join all sections
116+
full_context = "\n".join(sections)
117+
118+
# Also create simple formatted_facts for backward compatibility
119+
formatted_facts_lines = []
120+
for fact in top_facts[:self.max_facts_in_prompt]:
121+
key_display = fact.key.replace("_", " ").title()
122+
formatted_facts_lines.append(f"- {key_display}: {fact.value}")
123+
formatted_facts = "\n".join(formatted_facts_lines)
70124

71125
return MemoryContext(
72126
has_history=True,

0 commit comments

Comments
 (0)