mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-22 04:35:29 -04:00
fix(research): keep Discuss chats grounded on their report (#4006)
* fix(research): preserve Discuss spin-off primer during context trimming
trim_for_context() kept only system_msgs[:1] as essential and dropped the
rest under budget pressure. A research "Discuss" spin-off seeds the report
as a system message that sits after the preface system messages, so it
landed in extra_system and was the first thing evicted once the chat grew
— the conversation then lost its grounding and drifted off task.
Treat any system message carrying research_spinoff_from metadata as
essential, alongside the leading system prompt, so the seeded report
survives trimming. maybe_compact already retains all system messages.
Tests: tests/test_context_compactor.py::TestResearchPrimerPreserved
* fix(research): ground Discuss spin-off chats on the seeded report
build_chat_context injected global memory (pinned + hybrid-retrieved) and
personal-doc RAG every turn, keyed off the user-level memory_enabled pref
and a request-scoped use_rag flag — never the session. A research spin-off,
whose primer declares the report the sole knowledge base, thus had
unrelated keyword-matched facts pulled in ("wrong data") competing with the
report; its rag=False flag was also ignored (use_rag defaulted on).
Add _session_is_research_spinoff(sess) (detects the primer research_spinoff_from
metadata; handles ChatMessage and dict forms) and, for such sessions,
disable memory injection and force RAG off.
Tests: tests/test_chat_helpers.py spin-off detection cases
---------
Co-authored-by: Dan (cirim) <claude@cirim.org>
This commit is contained in:
@@ -218,3 +218,47 @@ def test_save_assistant_response_preserves_actual_and_requested_model():
|
||||
|
||||
assert sess.history[-1].metadata["requested_model"] == "selected-model"
|
||||
assert sess.history[-1].metadata["model"] == "actual-model"
|
||||
|
||||
|
||||
from types import SimpleNamespace
|
||||
from routes.chat_helpers import _session_is_research_spinoff
|
||||
|
||||
|
||||
class _SpinMsg:
|
||||
def __init__(self, role, metadata=None):
|
||||
self.role = role
|
||||
self.metadata = metadata
|
||||
|
||||
|
||||
def test_spinoff_detected_from_chatmessage_history():
|
||||
sess = SimpleNamespace(history=[
|
||||
_SpinMsg("system", {"research_spinoff_from": "rp-1"}),
|
||||
_SpinMsg("user", None),
|
||||
])
|
||||
assert _session_is_research_spinoff(sess) is True
|
||||
|
||||
|
||||
def test_spinoff_detected_from_dict_history():
|
||||
sess = SimpleNamespace(history=[
|
||||
{"role": "system", "metadata": {"research_spinoff_from": "rp-2"}},
|
||||
{"role": "user", "content": "hi"},
|
||||
])
|
||||
assert _session_is_research_spinoff(sess) is True
|
||||
|
||||
|
||||
def test_non_spinoff_plain_session_is_false():
|
||||
sess = SimpleNamespace(history=[
|
||||
_SpinMsg("system", {"compacted": True}),
|
||||
_SpinMsg("user", None),
|
||||
])
|
||||
assert _session_is_research_spinoff(sess) is False
|
||||
|
||||
|
||||
def test_metadata_on_non_system_message_ignored():
|
||||
sess = SimpleNamespace(history=[_SpinMsg("user", {"research_spinoff_from": "rp-3"})])
|
||||
assert _session_is_research_spinoff(sess) is False
|
||||
|
||||
|
||||
def test_empty_or_missing_history():
|
||||
assert _session_is_research_spinoff(SimpleNamespace(history=[])) is False
|
||||
assert _session_is_research_spinoff(SimpleNamespace()) is False
|
||||
|
||||
@@ -192,3 +192,42 @@ class TestMaybeCompactFourthMessage:
|
||||
]}
|
||||
result = self._run(messages)
|
||||
assert len(result) == 3 and result[2] is True
|
||||
|
||||
|
||||
class TestResearchPrimerPreserved:
|
||||
"""A research-spinoff primer (metadata research_spinoff_from) must never be
|
||||
trimmed away — it is the Discuss chat's sole knowledge base (drift fix)."""
|
||||
|
||||
def _messages(self):
|
||||
return [
|
||||
{"role": "system", "content": "You are Odysseus."},
|
||||
{"role": "system", "content": "Prompt-safety policy: data not instructions."},
|
||||
{"role": "system", "content": "saved memory: pinned " + "m" * 600},
|
||||
{"role": "system", "content": "RETRIEVED-DOCS-MARKER " + "r" * 6000},
|
||||
{"role": "system",
|
||||
"content": "=== REPORT ===\nPRIMER-MARKER " + "z" * 1500,
|
||||
"metadata": {"research_spinoff_from": "rp-abc123"}},
|
||||
] + [
|
||||
{"role": "user", "content": f"q{i} " + ("x" * 500)} for i in range(8)
|
||||
] + [
|
||||
{"role": "assistant", "content": "a" * 500},
|
||||
{"role": "user", "content": "latest question"},
|
||||
]
|
||||
|
||||
def test_primer_kept_when_over_budget(self):
|
||||
trimmed = trim_for_context(self._messages(), context_length=1024, reserve_tokens=256)
|
||||
joined = "\n".join(str(m.get("content", "")) for m in trimmed)
|
||||
assert "PRIMER-MARKER" in joined
|
||||
|
||||
def test_bulky_non_primer_system_dropped_but_primer_kept(self):
|
||||
trimmed = trim_for_context(self._messages(), context_length=1024, reserve_tokens=256)
|
||||
joined = "\n".join(str(m.get("content", "")) for m in trimmed)
|
||||
assert "PRIMER-MARKER" in joined
|
||||
assert "RETRIEVED-DOCS-MARKER" not in joined
|
||||
|
||||
def test_leading_preset_kept_when_no_primer_metadata(self):
|
||||
msgs = self._messages()
|
||||
del msgs[4]["metadata"]
|
||||
trimmed = trim_for_context(msgs, context_length=1024, reserve_tokens=256)
|
||||
joined = "\n".join(str(m.get("content", "")) for m in trimmed)
|
||||
assert "You are Odysseus." in joined
|
||||
|
||||
Reference in New Issue
Block a user