mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-17 18:25:26 -04:00
fix(memory): record dislikes as dislikes, not preferences (#2435)
_fallback_memory_candidates matched both positive (prefer/like/love) and
negative (hate / do not like / don't like) sentiment verbs in one regex
alternation, then formatted every hit as "User prefers {X}.". So
"I hate cilantro" was stored as "User prefers cilantro." -- the inverse of
what the user said. These fallback facts are persisted to memory and later
re-injected into the model's context, so the inverted preference actively
misleads the assistant.
Capture the matched verb and branch on it: negatives become
"User dislikes {X}.", positives stay "User prefers {X}." (still filed under
the existing "preference" category).
Supported by Claude Opus 4.8
Co-authored-by: SurprisedDuck <288741682+SurprisedDuck@users.noreply.github.com>
This commit is contained in:
@@ -192,11 +192,19 @@ def _fallback_memory_candidates(messages) -> list[dict]:
|
||||
if place:
|
||||
add(f"User lives in {place}.", "identity")
|
||||
|
||||
m = re.search(r"\bi (?:prefer|like|love|hate|do not like|don't like)\s+([^.!?\n]{4,100})", text, re.I)
|
||||
m = re.search(r"\bi (prefer|like|love|hate|do not like|don't like)\s+([^.!?\n]{4,100})", text, re.I)
|
||||
if m:
|
||||
preference = _clean_memory_value(m.group(1), 100)
|
||||
preference = _clean_memory_value(m.group(2), 100)
|
||||
if preference:
|
||||
add(f"User prefers {preference}.", "preference")
|
||||
# The same pattern catches likes and dislikes; keep the stored
|
||||
# sentiment faithful instead of recording every match as a
|
||||
# preference ("I hate cilantro" must not become "User prefers
|
||||
# cilantro").
|
||||
verb = m.group(1).lower()
|
||||
if verb in ("hate", "do not like", "don't like"):
|
||||
add(f"User dislikes {preference}.", "preference")
|
||||
else:
|
||||
add(f"User prefers {preference}.", "preference")
|
||||
|
||||
m = re.search(
|
||||
r"\bi (?:(?:want|would like|plan|hope) to|wanna) "
|
||||
|
||||
@@ -0,0 +1,31 @@
|
||||
"""The fallback memory extractor must not invert dislikes into preferences.
|
||||
|
||||
_fallback_memory_candidates matched both positive (prefer/like/love) and
|
||||
negative (hate/do not like/don't like) sentiment verbs in one alternation but
|
||||
formatted every hit as "User prefers X.", so "I hate cilantro" was stored as
|
||||
"User prefers cilantro" -- the opposite of what the user said, then persisted
|
||||
to memory and re-injected into context. These pin the sentiment.
|
||||
"""
|
||||
from services.memory.memory_extractor import _fallback_memory_candidates
|
||||
|
||||
|
||||
def _texts(content):
|
||||
cands = _fallback_memory_candidates([{"role": "user", "content": content}])
|
||||
return [c["text"].lower() for c in cands]
|
||||
|
||||
|
||||
def test_dislike_is_not_stored_as_preference():
|
||||
texts = _texts("I hate cilantro in my food")
|
||||
assert not any("prefers cilantro" in t for t in texts)
|
||||
assert any("dislikes cilantro" in t for t in texts)
|
||||
|
||||
|
||||
def test_negated_like_is_not_stored_as_preference():
|
||||
texts = _texts("I don't like crowded trains")
|
||||
assert not any("prefers crowded" in t for t in texts)
|
||||
assert any("dislikes crowded" in t for t in texts)
|
||||
|
||||
|
||||
def test_genuine_preference_still_stored():
|
||||
texts = _texts("I love spicy ramen noodles")
|
||||
assert any("prefers spicy ramen" in t for t in texts)
|
||||
Reference in New Issue
Block a user