fix(memory): only delete memories the model explicitly drops in tidy (#3455)

* fix(memory): only delete memories the model explicitly drops in tidy The AI memory-tidy path computed deletions as the complement of the model's `keep` list (`if mid not in keep_ids: continue`). When the model returned a valid response that simply omitted some existing ids — a common LLM lapse — every omitted memory was silently deleted, even though it was neither a duplicate nor listed in `drop`. Honor the explicit `drop` set instead: delete only ids the model dropped (minus any it saw only truncated), and preserve everything else, still applying cleaned text/category from `keep`. Adds tests/test_consolidate_memory_explicit_drops.py: a memory the model omits from both keep and drop survives; an explicitly dropped one is removed. * refactor(memory): remove now-dead keep_ids from tidy After deletion switched to drop_ids and text/category rewrites to cleaned_by_id, keep_ids was written but never read. Remove the init, the .add(mid) in the keep loop, and the truncated .update() (its truncated-protection is already covered by `drop_ids -= truncated_ids`). Pure deletion, no behavior change; tests stay green. Addresses review feedback on #3455. --------- Co-authored-by: Kenny Van de Maele <kenny@kvandemaele.be>
2026-06-16 09:45:24 -04:00 · 2026-06-08 19:54:45 +03:00
parent d458cade98
commit d71284194b
2 changed files with 72 additions and 8 deletions
@@ -168,7 +168,6 @@ async def action_consolidate_memory(owner: str, **kwargs) -> Tuple[str, bool]:
                    drop_items = decision.get("drop") if isinstance(decision, dict) else None
                    if isinstance(keep_items, list) and isinstance(drop_items, list):
                        by_id = {m.get("id"): m for m in group_memories if m.get("id")}
-                        keep_ids = set()
                        cleaned_by_id = {}
                        for item in keep_items:
                            if not isinstance(item, dict):
@@ -179,7 +178,6 @@ async def action_consolidate_memory(owner: str, **kwargs) -> Tuple[str, bool]:
                            text = (item.get("text") or "").strip()
                            if not text:
                                continue
-                            keep_ids.add(mid)
                            cleaned = {
                                "category": (item.get("category") or by_id[mid].get("category") or "fact").strip(),
                            }
@@ -188,11 +186,20 @@ async def action_consolidate_memory(owner: str, **kwargs) -> Tuple[str, bool]:
                                cleaned["text"] = text
                            cleaned_by_id[mid] = cleaned

-                        # If the model only saw a truncated memory, do not let
-                        # that partial view delete or rewrite the full memory.
-                        keep_ids.update(mid for mid in truncated_ids if mid in by_id)
+                        # Delete only memories the model EXPLICITLY dropped, never
+                        # ones it merely omitted from `keep`. Treating the
+                        # complement of `keep` as deletions meant a model that
+                        # forgot to re-list an id (common) silently destroyed that
+                        # memory. Honor the explicit `drop` set instead.
+                        drop_ids = {
+                            d.get("id")
+                            for d in drop_items
+                            if isinstance(d, dict) and d.get("id") in by_id
+                        }
+                        # Never delete a memory the model only saw truncated.
+                        drop_ids -= truncated_ids

-                        if keep_ids:
+                        if drop_ids or cleaned_by_id:
                            changed_text = 0
                            group_ref_ids = {id(m) for m in group_memories}
                            kept_all = []
@@ -201,7 +208,7 @@ async def action_consolidate_memory(owner: str, **kwargs) -> Tuple[str, bool]:
                                    kept_all.append(mem)
                                    continue
                                mid = mem.get("id")
-                                if mid not in keep_ids:
+                                if mid in drop_ids:
                                    continue
                                cleaned = cleaned_by_id.get(mid) or {}
                                if mid in truncated_ids:
@@ -213,7 +220,7 @@ async def action_consolidate_memory(owner: str, **kwargs) -> Tuple[str, bool]:
                                    mem["category"] = cleaned["category"]
                                kept_all.append(mem)

-                            removed = len(group_memories) - len(keep_ids)
+                            removed = sum(1 for m in group_memories if m.get("id") in drop_ids)
                            total_scanned += len(group_memories)
                            if removed or changed_text:
                                all_memories = kept_all
@@ -0,0 +1,57 @@
+"""Memory consolidation must delete only memories the model explicitly drops.
+
+The AI tidy path computed deletions as the complement of the model's `keep`
+list, so any memory the model simply omitted (a common LLM lapse) was silently
+deleted. The fix honors the explicit `drop` set, so an omitted memory survives.
+"""
+import asyncio
+import json
+
+import src.builtin_actions as ba
+
+
+class _FakeMM:
+    saved = None
+
+    def __init__(self, *args, **kwargs):
+        pass
+
+    def load_all(self):
+        return [
+            {"id": "a", "owner": "alice", "text": "Likes dark roast coffee", "category": "preference"},
+            {"id": "b", "owner": "alice", "text": "Likes dark roast coffee too", "category": "preference"},
+            {"id": "c", "owner": "alice", "text": "Lives in Cairo", "category": "fact"},
+        ]
+
+    def save(self, entries):
+        _FakeMM.saved = list(entries)
+
+
+def test_omitted_memory_survives_only_explicit_drop(monkeypatch):
+    import src.memory
+    import src.endpoint_resolver
+    import src.llm_core
+
+    _FakeMM.saved = None
+    monkeypatch.setattr(src.memory, "MemoryManager", _FakeMM)
+    monkeypatch.setattr(
+        src.endpoint_resolver, "resolve_endpoint",
+        lambda kind, owner=None: ("http://x/v1", "model", {}),
+    )
+
+    async def fake_llm(**kwargs):
+        # Model keeps 'a', drops 'b', and OMITS 'c' entirely.
+        return json.dumps({
+            "keep": [{"id": "a", "text": "Likes dark roast coffee", "category": "preference"}],
+            "drop": [{"id": "b", "reason": "duplicate of a"}],
+        })
+
+    monkeypatch.setattr(src.llm_core, "llm_call_async", fake_llm)
+
+    msg, ok = asyncio.run(ba.action_consolidate_memory("alice"))
+
+    assert ok, msg
+    ids = {m["id"] for m in _FakeMM.saved}
+    assert "c" in ids, "omitted memory must NOT be deleted"
+    assert "a" in ids
+    assert "b" not in ids, "explicitly dropped memory should be removed"