fix(memory): only delete memories the model explicitly drops in tidy (#3455)

* fix(memory): only delete memories the model explicitly drops in tidy

The AI memory-tidy path computed deletions as the complement of the model's
`keep` list (`if mid not in keep_ids: continue`). When the model returned a
valid response that simply omitted some existing ids — a common LLM lapse — every
omitted memory was silently deleted, even though it was neither a duplicate nor
listed in `drop`.

Honor the explicit `drop` set instead: delete only ids the model dropped (minus
any it saw only truncated), and preserve everything else, still applying cleaned
text/category from `keep`.

Adds tests/test_consolidate_memory_explicit_drops.py: a memory the model omits
from both keep and drop survives; an explicitly dropped one is removed.

* refactor(memory): remove now-dead keep_ids from tidy

After deletion switched to drop_ids and text/category rewrites to cleaned_by_id,
keep_ids was written but never read. Remove the init, the .add(mid) in the keep
loop, and the truncated .update() (its truncated-protection is already covered by
`drop_ids -= truncated_ids`). Pure deletion, no behavior change; tests stay green.

Addresses review feedback on #3455.

---------

Co-authored-by: Kenny Van de Maele <kenny@kvandemaele.be>
This commit is contained in:
Mazen Tamer Salah
2026-06-08 19:54:45 +03:00
committed by GitHub
parent d458cade98
commit d71284194b
2 changed files with 72 additions and 8 deletions
+15 -8
View File
@@ -168,7 +168,6 @@ async def action_consolidate_memory(owner: str, **kwargs) -> Tuple[str, bool]:
drop_items = decision.get("drop") if isinstance(decision, dict) else None
if isinstance(keep_items, list) and isinstance(drop_items, list):
by_id = {m.get("id"): m for m in group_memories if m.get("id")}
keep_ids = set()
cleaned_by_id = {}
for item in keep_items:
if not isinstance(item, dict):
@@ -179,7 +178,6 @@ async def action_consolidate_memory(owner: str, **kwargs) -> Tuple[str, bool]:
text = (item.get("text") or "").strip()
if not text:
continue
keep_ids.add(mid)
cleaned = {
"category": (item.get("category") or by_id[mid].get("category") or "fact").strip(),
}
@@ -188,11 +186,20 @@ async def action_consolidate_memory(owner: str, **kwargs) -> Tuple[str, bool]:
cleaned["text"] = text
cleaned_by_id[mid] = cleaned
# If the model only saw a truncated memory, do not let
# that partial view delete or rewrite the full memory.
keep_ids.update(mid for mid in truncated_ids if mid in by_id)
# Delete only memories the model EXPLICITLY dropped, never
# ones it merely omitted from `keep`. Treating the
# complement of `keep` as deletions meant a model that
# forgot to re-list an id (common) silently destroyed that
# memory. Honor the explicit `drop` set instead.
drop_ids = {
d.get("id")
for d in drop_items
if isinstance(d, dict) and d.get("id") in by_id
}
# Never delete a memory the model only saw truncated.
drop_ids -= truncated_ids
if keep_ids:
if drop_ids or cleaned_by_id:
changed_text = 0
group_ref_ids = {id(m) for m in group_memories}
kept_all = []
@@ -201,7 +208,7 @@ async def action_consolidate_memory(owner: str, **kwargs) -> Tuple[str, bool]:
kept_all.append(mem)
continue
mid = mem.get("id")
if mid not in keep_ids:
if mid in drop_ids:
continue
cleaned = cleaned_by_id.get(mid) or {}
if mid in truncated_ids:
@@ -213,7 +220,7 @@ async def action_consolidate_memory(owner: str, **kwargs) -> Tuple[str, bool]:
mem["category"] = cleaned["category"]
kept_all.append(mem)
removed = len(group_memories) - len(keep_ids)
removed = sum(1 for m in group_memories if m.get("id") in drop_ids)
total_scanned += len(group_memories)
if removed or changed_text:
all_memories = kept_all
@@ -0,0 +1,57 @@
"""Memory consolidation must delete only memories the model explicitly drops.
The AI tidy path computed deletions as the complement of the model's `keep`
list, so any memory the model simply omitted (a common LLM lapse) was silently
deleted. The fix honors the explicit `drop` set, so an omitted memory survives.
"""
import asyncio
import json
import src.builtin_actions as ba
class _FakeMM:
saved = None
def __init__(self, *args, **kwargs):
pass
def load_all(self):
return [
{"id": "a", "owner": "alice", "text": "Likes dark roast coffee", "category": "preference"},
{"id": "b", "owner": "alice", "text": "Likes dark roast coffee too", "category": "preference"},
{"id": "c", "owner": "alice", "text": "Lives in Cairo", "category": "fact"},
]
def save(self, entries):
_FakeMM.saved = list(entries)
def test_omitted_memory_survives_only_explicit_drop(monkeypatch):
import src.memory
import src.endpoint_resolver
import src.llm_core
_FakeMM.saved = None
monkeypatch.setattr(src.memory, "MemoryManager", _FakeMM)
monkeypatch.setattr(
src.endpoint_resolver, "resolve_endpoint",
lambda kind, owner=None: ("http://x/v1", "model", {}),
)
async def fake_llm(**kwargs):
# Model keeps 'a', drops 'b', and OMITS 'c' entirely.
return json.dumps({
"keep": [{"id": "a", "text": "Likes dark roast coffee", "category": "preference"}],
"drop": [{"id": "b", "reason": "duplicate of a"}],
})
monkeypatch.setattr(src.llm_core, "llm_call_async", fake_llm)
msg, ok = asyncio.run(ba.action_consolidate_memory("alice"))
assert ok, msg
ids = {m["id"] for m in _FakeMM.saved}
assert "c" in ids, "omitted memory must NOT be deleted"
assert "a" in ids
assert "b" not in ids, "explicitly dropped memory should be removed"