mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-15 17:25:26 -04:00
c75d3e1975
_fallback_memory_candidates matched both positive (prefer/like/love) and
negative (hate / do not like / don't like) sentiment verbs in one regex
alternation, then formatted every hit as "User prefers {X}.". So
"I hate cilantro" was stored as "User prefers cilantro." -- the inverse of
what the user said. These fallback facts are persisted to memory and later
re-injected into the model's context, so the inverted preference actively
misleads the assistant.
Capture the matched verb and branch on it: negatives become
"User dislikes {X}.", positives stay "User prefers {X}." (still filed under
the existing "preference" category).
Supported by Claude Opus 4.8
Co-authored-by: SurprisedDuck <288741682+SurprisedDuck@users.noreply.github.com>
32 lines
1.2 KiB
Python
32 lines
1.2 KiB
Python
"""The fallback memory extractor must not invert dislikes into preferences.
|
|
|
|
_fallback_memory_candidates matched both positive (prefer/like/love) and
|
|
negative (hate/do not like/don't like) sentiment verbs in one alternation but
|
|
formatted every hit as "User prefers X.", so "I hate cilantro" was stored as
|
|
"User prefers cilantro" -- the opposite of what the user said, then persisted
|
|
to memory and re-injected into context. These pin the sentiment.
|
|
"""
|
|
from services.memory.memory_extractor import _fallback_memory_candidates
|
|
|
|
|
|
def _texts(content):
|
|
cands = _fallback_memory_candidates([{"role": "user", "content": content}])
|
|
return [c["text"].lower() for c in cands]
|
|
|
|
|
|
def test_dislike_is_not_stored_as_preference():
|
|
texts = _texts("I hate cilantro in my food")
|
|
assert not any("prefers cilantro" in t for t in texts)
|
|
assert any("dislikes cilantro" in t for t in texts)
|
|
|
|
|
|
def test_negated_like_is_not_stored_as_preference():
|
|
texts = _texts("I don't like crowded trains")
|
|
assert not any("prefers crowded" in t for t in texts)
|
|
assert any("dislikes crowded" in t for t in texts)
|
|
|
|
|
|
def test_genuine_preference_still_stored():
|
|
texts = _texts("I love spicy ramen noodles")
|
|
assert any("prefers spicy ramen" in t for t in texts)
|