Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit a35a144

Browse files
committed
Address review
Signed-off-by: Edwin Yu <[email protected]>
1 parent d61963e commit a35a144

11 files changed

Lines changed: 157 additions & 25 deletions

File tree

packages/server/server_tests/memmachine_server/episodic_memory/long_term_memory/test_episode_to_event.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,6 @@ def test_non_message_episode_uses_null_context():
7777
# Only MESSAGE exists today; this branch will start exercising once
7878
# additional Episode types are introduced. Skip rather than assert
7979
# invariant we can't yet exercise.
80-
import pytest
81-
8280
pytest.skip("Only MESSAGE EpisodeType exists; nothing else to verify yet")
8381
episode = _episode(episode_type=non_message)
8482
event = LongTermMemory._episode_to_event(episode)

packages/server/server_tests/memmachine_server/episodic_memory/long_term_memory/test_event_backend_wiring.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,41 @@ async def test_search_dedupes_by_episode_uid(
226226
assert len(uids) == len(set(uids))
227227

228228

229+
async def test_search_warns_on_index_storage_drift(
230+
long_term_memory,
231+
fake_episode_storage,
232+
episodes,
233+
caplog,
234+
):
235+
"""If the event index references an episode UID that EpisodeStorage no
236+
longer has (index/storage drift), the dropped UID is logged as a warning
237+
and the remaining episodes are still returned."""
238+
import logging
239+
240+
await long_term_memory.add_episodes(episodes)
241+
# Simulate drift: index keeps ep-2's segment, but EpisodeStorage forgets it.
242+
await fake_episode_storage.delete_episodes(["ep-2"])
243+
244+
with caplog.at_level(
245+
logging.WARNING,
246+
logger="memmachine_server.episodic_memory.long_term_memory.long_term_memory",
247+
):
248+
scored = await long_term_memory.search_scored(
249+
"george washington",
250+
num_episodes_limit=3,
251+
)
252+
253+
returned_uids = {ep.uid for _, ep in scored}
254+
assert "ep-2" not in returned_uids
255+
assert returned_uids <= {"ep-1", "ep-3"}
256+
257+
drift_records = [
258+
r for r in caplog.records if "index/storage drift" in r.getMessage()
259+
]
260+
assert drift_records, "expected a drift warning"
261+
assert "ep-2" in drift_records[0].getMessage()
262+
263+
229264
async def test_delete_episodes_removes_from_event_memory(
230265
long_term_memory,
231266
segment_store_partition,

packages/server/server_tests/memmachine_server/episodic_memory/long_term_memory/test_service_locator.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@
22

33
import re
44

5+
import pytest
6+
57
from memmachine_server.episodic_memory.long_term_memory.service_locator import (
8+
_resolve_user_properties_schema,
69
partition_key_for_session,
710
)
811

@@ -61,3 +64,25 @@ def test_partition_key_empty_string_passthrough():
6164
key = partition_key_for_session("")
6265
assert _is_valid_partition_key(key)
6366
assert len(key) == _PARTITION_KEY_MAX_LEN
67+
68+
69+
def test_resolve_user_properties_schema_accepts_normal_keys():
70+
resolved = _resolve_user_properties_schema({"customer_tier": "str", "score": "int"})
71+
assert resolved == {"customer_tier": str, "score": int}
72+
73+
74+
def test_resolve_user_properties_schema_rejects_underscore_prefixed_keys():
75+
"""`_`-prefixed keys collide with system-defined event fields
76+
(`_episode_uid`, `_session_key`, ...). The merged collection schema is
77+
a dict-spread with user_schema last, so allowing them would silently
78+
overwrite the system slot and may change its declared type."""
79+
with pytest.raises(ValueError, match="reserved"):
80+
_resolve_user_properties_schema({"_episode_uid": "str"})
81+
82+
with pytest.raises(ValueError, match="reserved"):
83+
_resolve_user_properties_schema({"_my_field": "int"})
84+
85+
86+
def test_resolve_user_properties_schema_rejects_unknown_type_name():
87+
with pytest.raises(ValueError, match="unknown type name"):
88+
_resolve_user_properties_schema({"customer_tier": "date"})

packages/server/src/memmachine_server/common/configuration/episodic_config.py

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,31 @@ def _long_term_memory_backend_discriminator(value: object) -> str:
1818
discriminator, so deserialize as `"declarative"` (the legacy backend).
1919
Code that *creates* new configs is responsible for explicitly setting
2020
`backend="event"` if it wants the new default.
21+
22+
Raises TypeError for inputs that cannot legitimately carry a `backend`
23+
discriminator (e.g. an int) or for non-string `backend` values, instead
24+
of silently coercing them into a declarative parse attempt that would
25+
then fail downstream with a less actionable error.
2126
"""
2227
if isinstance(value, dict):
2328
backend = cast(dict[str, Any], value).get("backend")
24-
else:
29+
elif isinstance(value, BaseModel):
2530
backend = getattr(value, "backend", None)
26-
return backend if isinstance(backend, str) else "declarative"
31+
else:
32+
raise TypeError(
33+
"Cannot determine long-term-memory backend: expected a dict or a "
34+
f"LongTermMemoryConf instance, got {type(value).__name__}: "
35+
f"{value!r}."
36+
)
37+
38+
if backend is None:
39+
return "declarative"
40+
if isinstance(backend, str):
41+
return backend
42+
raise TypeError(
43+
"Long-term-memory `backend` discriminator must be a string or omitted "
44+
f"(legacy default); got {type(backend).__name__}: {backend!r}."
45+
)
2746

2847

2948
def merge_partial_configs[TFull: BaseModel, TPartial: BaseModel](
@@ -317,19 +336,19 @@ def merge(self, other: Self) -> LongTermMemoryConf:
317336
# Event backend: synthesize defaults for sub-configs that the flat partial
318337
# leaves None.
319338
merged = merge_partial_configs(
320-
_force_backend(self, "event"),
321-
_force_backend(other, "event"),
339+
LongTermMemoryConfPartial._force_backend(self, "event"),
340+
LongTermMemoryConfPartial._force_backend(other, "event"),
322341
EventLongTermMemoryConf,
323342
)
324343
return merged
325344

326-
327-
def _force_backend(
328-
partial: LongTermMemoryConfPartial,
329-
backend: Literal["declarative", "event"],
330-
) -> LongTermMemoryConfPartial:
331-
"""Return a copy of the partial with `backend` explicitly set."""
332-
return partial.model_copy(update={"backend": backend})
345+
@staticmethod
346+
def _force_backend(
347+
partial: "LongTermMemoryConfPartial",
348+
backend: Literal["declarative", "event"],
349+
) -> "LongTermMemoryConfPartial":
350+
"""Return a copy of the partial with `backend` explicitly set."""
351+
return partial.model_copy(update={"backend": backend})
333352

334353

335354
class EpisodicMemoryConf(MetricsFactoryIdMixin, YamlSerializableMixin):

packages/server/src/memmachine_server/episodic_memory/long_term_memory/long_term_memory.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Long-term memory facade with declarative + event backends."""
22

33
import datetime
4+
import logging
45
from collections.abc import Iterable
56
from typing import Annotated, Literal, cast
67
from uuid import UUID, uuid4, uuid5
@@ -54,6 +55,8 @@
5455
)
5556
from memmachine_server.episodic_memory.event_memory.segmenter import Segmenter
5657

58+
logger = logging.getLogger(__name__)
59+
5760
# Stable namespace for deterministic Episode.uid -> Event.uuid mapping. Do not
5861
# change without a data migration.
5962
_EVENT_UUID_NAMESPACE = UUID("8c2c0e0a-3a2f-4b9c-9d1f-9b6c2a3a4f7e")
@@ -354,6 +357,20 @@ async def _search_scored_event(
354357
episodes = await self._episode_storage.get_episodes(ordered_uids)
355358
episodes_by_uid: dict[str, Episode] = {ep.uid: ep for ep in episodes}
356359

360+
missing = [uid for uid in ordered_uids if uid not in episodes_by_uid]
361+
if missing:
362+
# Index/storage drift: the event index referenced these episode
363+
# UIDs, but they're absent from EpisodeStorage. The two stores
364+
# are not transactionally linked, so this can happen on partial
365+
# failures during add/delete. Surface it so operators notice;
366+
# continue with whatever did hydrate.
367+
logger.warning(
368+
"search_scored dropped %d episode(s) found in the event index "
369+
"but missing from EpisodeStorage (likely index/storage drift): %s",
370+
len(missing),
371+
missing,
372+
)
373+
357374
return [
358375
(scores_by_uid[uid], episodes_by_uid[uid])
359376
for uid in ordered_uids

packages/server/src/memmachine_server/episodic_memory/long_term_memory/service_locator.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Helpers for building long-term memory from configuration."""
22

33
import hashlib
4+
import logging
45
import re
56

67
from pydantic import InstanceOf
@@ -46,6 +47,8 @@
4647
LongTermMemoryParams,
4748
)
4849

50+
logger = logging.getLogger(__name__)
51+
4952
_EVENT_BACKEND_NAMESPACE = "long_term_memory"
5053

5154
_PARTITION_KEY_RE = re.compile(r"^[a-z0-9_]+$")
@@ -164,23 +167,42 @@ def partition_key_for_session(session_id: str) -> str:
164167
Derive a partition key matching `[a-z0-9_]+` (≤32 chars) from a session id.
165168
166169
If the session_id already satisfies the constraint, use it directly to keep
167-
debug paths legible. Otherwise hash to a stable 32-char hex digest.
170+
debug paths legible. Otherwise hash to a stable 32-char hex digest and emit
171+
a DEBUG log of the original→hashed mapping so operators can correlate
172+
partition keys back to sessions during incident response.
168173
"""
169174
if (
170175
_PARTITION_KEY_RE.match(session_id)
171176
and len(session_id) <= _PARTITION_KEY_MAX_LEN
172177
):
173178
return session_id
174-
return hashlib.sha256(session_id.encode("utf-8")).hexdigest()[
179+
partition_key = hashlib.sha256(session_id.encode("utf-8")).hexdigest()[
175180
:_PARTITION_KEY_MAX_LEN
176181
]
182+
logger.debug(
183+
"partition_key_for_session: hashed session_id %r -> partition_key %r",
184+
session_id,
185+
partition_key,
186+
)
187+
return partition_key
177188

178189

179190
def _resolve_user_properties_schema(
180191
raw: dict[str, str],
181192
) -> dict[str, type[PropertyValue]]:
182193
resolved: dict[str, type[PropertyValue]] = {}
183194
for key, type_name in raw.items():
195+
if key.startswith("_"):
196+
# `_`-prefixed keys are reserved for system-defined event fields
197+
# (`_episode_uid`, `_session_key`, `_producer_id`, ...). Allowing a
198+
# user property to share that namespace would let it overwrite the
199+
# system slot in the merged collection schema (dict-spread is last-
200+
# wins) and silently change its declared type.
201+
raise ValueError(
202+
f"Property {key!r}: keys starting with '_' are reserved for "
203+
"system-defined event fields and cannot be used as user "
204+
"property names."
205+
)
184206
prop_type = PROPERTY_TYPE_NAME_TO_PROPERTY_TYPE.get(type_name)
185207
if prop_type is None:
186208
raise ValueError(f"Property {key!r}: unknown type name {type_name!r}")

packages/server/src/memmachine_server/installation/configuration_wizard.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,11 @@ def vector_store_id(self) -> str:
233233
"(requires host SQLite with loadable-extension support)"
234234
)
235235

236+
# default_choice is always one of `valid` (constructed above). In
237+
# silent mode (`self.prompt == False`), `ask_for` returns this default
238+
# unchanged, so the first iteration always satisfies `raw in valid`
239+
# and breaks — no second iteration, no need for a silent-mode fallback.
240+
assert default_choice in valid
236241
choice = default_choice
237242
while True:
238243
raw = (
@@ -246,9 +251,6 @@ def vector_store_id(self) -> str:
246251
if raw in valid:
247252
choice = raw
248253
break
249-
if not self.prompt:
250-
# Silent mode: ask_for always returns the default, which is valid.
251-
break
252254
logger.info(
253255
"Invalid choice %r. Pick one of: %s (or press Enter for the default).",
254256
raw,

packages/server/src/memmachine_server/main/memmachine.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -186,15 +186,24 @@ def _resolve_ltm_defaults(self) -> None:
186186
if ltm.reranker is None:
187187
try:
188188
ltm.reranker = self._conf.default_long_term_memory_reranker
189-
except ConfigurationError:
189+
except Exception:
190190
ltm.reranker = None
191191

192192
# vector_store + segment_store are required to materialize an
193193
# event-backed long-term memory. If either is missing, disable.
194-
if ltm.vector_store is None or ltm.segment_store is None:
194+
missing = [
195+
name
196+
for name, value in (
197+
("vector_store", ltm.vector_store),
198+
("segment_store", ltm.segment_store),
199+
)
200+
if value is None
201+
]
202+
if missing:
195203
self._disable_long_term_memory(
196204
"Event-backed long-term memory requires both vector_store "
197-
"and segment_store; disabling long-term episodic memory."
205+
f"and segment_store; missing: {', '.join(missing)}. "
206+
"Disabling long-term episodic memory."
198207
)
199208

200209
def _disable_long_term_memory(self, warning_message: str) -> None:
@@ -214,7 +223,7 @@ def _resolve_ltm_resource_default(
214223
return current_value
215224
try:
216225
return default_getter()
217-
except ConfigurationError:
226+
except Exception:
218227
self._disable_long_term_memory(missing_warning)
219228
return None
220229

packages/server/src/memmachine_server/server/api_v2/config_service.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,6 @@ def _handle_backend_change(
181181
"""
182182
if new_backend == ltm.backend:
183183
# Idempotent re-assertion of the same backend; no change to emit.
184-
ltm.backend = new_backend
185184
return []
186185

187186
changes: list[str] = []

sample_configs/episodic_memory_config.gpu.sample

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,11 @@ resources:
100100
# config:
101101
# path: event_vectors.db
102102
# vector_search_engine: usearch # or: hnswlib
103+
# # Persist ANN indexes to disk. If `index_directory` is omitted,
104+
# # indexes are kept in-memory only and rebuilt from the SQLite payload
105+
# # table on every restart — fine for tests, not for production.
103106
# index_directory: event_vector_indexes
104-
# save_threshold: 1000
107+
# save_threshold: 1000 # ops before auto-saving
105108
embedders:
106109
openai_embedder:
107110
provider: openai

0 commit comments

Comments
 (0)