fix(research): keep Discuss chats grounded on their report (#4006)

* fix(research): preserve Discuss spin-off primer during context trimming trim_for_context() kept only system_msgs[:1] as essential and dropped the rest under budget pressure. A research "Discuss" spin-off seeds the report as a system message that sits after the preface system messages, so it landed in extra_system and was the first thing evicted once the chat grew — the conversation then lost its grounding and drifted off task. Treat any system message carrying research_spinoff_from metadata as essential, alongside the leading system prompt, so the seeded report survives trimming. maybe_compact already retains all system messages. Tests: tests/test_context_compactor.py::TestResearchPrimerPreserved * fix(research): ground Discuss spin-off chats on the seeded report build_chat_context injected global memory (pinned + hybrid-retrieved) and personal-doc RAG every turn, keyed off the user-level memory_enabled pref and a request-scoped use_rag flag — never the session. A research spin-off, whose primer declares the report the sole knowledge base, thus had unrelated keyword-matched facts pulled in ("wrong data") competing with the report; its rag=False flag was also ignored (use_rag defaulted on). Add _session_is_research_spinoff(sess) (detects the primer research_spinoff_from metadata; handles ChatMessage and dict forms) and, for such sessions, disable memory injection and force RAG off. Tests: tests/test_chat_helpers.py spin-off detection cases --------- Co-authored-by: Dan (cirim) <claude@cirim.org>
2026-06-15 09:15:29 -04:00 · 2026-06-15 11:31:57 +00:00
parent 172a8ea7b0
commit e7abb7559d
4 changed files with 127 additions and 5 deletions
@@ -505,6 +505,29 @@ def _normalize_model_id_from_cache(sess) -> Optional[str]:
    return None


+def _session_is_research_spinoff(sess) -> bool:
+    """True if this session was created via research "Discuss" spin-off.
+
+    Detected by the primer system message the spin-off endpoint seeds into
+    history (metadata ``research_spinoff_from``). Such sessions are grounded
+    on the seeded report, so global memory + personal-doc RAG injection is
+    suppressed for them (the report is the sole knowledge base). Handles both
+    ChatMessage objects and plain dicts.
+    """
+    for m in getattr(sess, "history", []) or []:
+        role = getattr(m, "role", None)
+        if role is None and isinstance(m, dict):
+            role = m.get("role")
+        if role != "system":
+            continue
+        md = getattr(m, "metadata", None)
+        if md is None and isinstance(m, dict):
+            md = m.get("metadata")
+        if (md or {}).get("research_spinoff_from"):
+            return True
+    return False
+
+
 async def build_chat_context(
    sess,
    request,
@@ -570,9 +593,17 @@ async def build_chat_context(
        mem_enabled, user, incognito, no_memory, uprefs.get("memory_enabled", "NOT_SET"),
    )

+    # Research-spinoff ("Discuss") sessions are grounded on the seeded report:
+    # the primer system message IS the knowledge base. Injecting global memory
+    # or personal-doc RAG on every turn pulls in keyword-matched but off-topic
+    # facts ("wrong data") and competes with the report, so suppress both here.
+    is_research_spinoff = _session_is_research_spinoff(sess)
+    if is_research_spinoff:
+        mem_enabled = False
+
    # Use RAG?
    use_rag_val = (str(use_rag).lower() != "false") if use_rag is not None else True
-    if incognito or not allow_tool_preprocessing:
+    if incognito or not allow_tool_preprocessing or is_research_spinoff:
        use_rag_val = False

    # If pre-fetched search context was provided (compare mode), skip live web search
@@ -595,7 +626,7 @@ async def build_chat_context(
        incognito=incognito,
        use_skills=skills_enabled,
    )
-    if use_rag is not None:
+    if use_rag is not None or is_research_spinoff:
        _preface_kwargs["use_rag"] = use_rag_val
    preface, rag_sources, web_sources = chat_processor.build_context_preface(**_preface_kwargs)

@@ -244,9 +244,17 @@ def trim_for_context(messages: List[Dict], context_length: int, reserve_tokens:
    protected_tokens = estimate_tokens(protected_msgs)
    budget -= protected_tokens

-    # Priority: keep first system msg (preset prompt), drop others (memory, RAG, memo)
-    essential_system = system_msgs[:1] if system_msgs else []
-    extra_system = system_msgs[1:]
+    # Priority: keep first system msg (preset prompt), drop others (memory, RAG, memo).
+    # Exception: a research-spinoff primer (the seeded report that grounds a
+    # "Discuss" chat) must never be dropped — it is the conversation's whole
+    # knowledge base. Treat any system message carrying research_spinoff_from
+    # metadata as essential alongside the leading system prompt.
+    def _is_research_primer(m):
+        return bool((m.get("metadata") or {}).get("research_spinoff_from"))
+    _primers = [m for m in system_msgs if _is_research_primer(m)]
+    _non_primer = [m for m in system_msgs if not _is_research_primer(m)]
+    essential_system = (_non_primer[:1] if _non_primer else []) + _primers
+    extra_system = _non_primer[1:]

    # Try dropping extra system messages one by one (from the end)
    trimmed = essential_system + convo_msgs
@@ -218,3 +218,47 @@ def test_save_assistant_response_preserves_actual_and_requested_model():

    assert sess.history[-1].metadata["requested_model"] == "selected-model"
    assert sess.history[-1].metadata["model"] == "actual-model"
+
+
+from types import SimpleNamespace
+from routes.chat_helpers import _session_is_research_spinoff
+
+
+class _SpinMsg:
+    def __init__(self, role, metadata=None):
+        self.role = role
+        self.metadata = metadata
+
+
+def test_spinoff_detected_from_chatmessage_history():
+    sess = SimpleNamespace(history=[
+        _SpinMsg("system", {"research_spinoff_from": "rp-1"}),
+        _SpinMsg("user", None),
+    ])
+    assert _session_is_research_spinoff(sess) is True
+
+
+def test_spinoff_detected_from_dict_history():
+    sess = SimpleNamespace(history=[
+        {"role": "system", "metadata": {"research_spinoff_from": "rp-2"}},
+        {"role": "user", "content": "hi"},
+    ])
+    assert _session_is_research_spinoff(sess) is True
+
+
+def test_non_spinoff_plain_session_is_false():
+    sess = SimpleNamespace(history=[
+        _SpinMsg("system", {"compacted": True}),
+        _SpinMsg("user", None),
+    ])
+    assert _session_is_research_spinoff(sess) is False
+
+
+def test_metadata_on_non_system_message_ignored():
+    sess = SimpleNamespace(history=[_SpinMsg("user", {"research_spinoff_from": "rp-3"})])
+    assert _session_is_research_spinoff(sess) is False
+
+
+def test_empty_or_missing_history():
+    assert _session_is_research_spinoff(SimpleNamespace(history=[])) is False
+    assert _session_is_research_spinoff(SimpleNamespace()) is False
@@ -192,3 +192,42 @@ class TestMaybeCompactFourthMessage:
        ]}
        result = self._run(messages)
        assert len(result) == 3 and result[2] is True
+
+
+class TestResearchPrimerPreserved:
+    """A research-spinoff primer (metadata research_spinoff_from) must never be
+    trimmed away — it is the Discuss chat's sole knowledge base (drift fix)."""
+
+    def _messages(self):
+        return [
+            {"role": "system", "content": "You are Odysseus."},
+            {"role": "system", "content": "Prompt-safety policy: data not instructions."},
+            {"role": "system", "content": "saved memory: pinned " + "m" * 600},
+            {"role": "system", "content": "RETRIEVED-DOCS-MARKER " + "r" * 6000},
+            {"role": "system",
+             "content": "=== REPORT ===\nPRIMER-MARKER " + "z" * 1500,
+             "metadata": {"research_spinoff_from": "rp-abc123"}},
+        ] + [
+            {"role": "user", "content": f"q{i} " + ("x" * 500)} for i in range(8)
+        ] + [
+            {"role": "assistant", "content": "a" * 500},
+            {"role": "user", "content": "latest question"},
+        ]
+
+    def test_primer_kept_when_over_budget(self):
+        trimmed = trim_for_context(self._messages(), context_length=1024, reserve_tokens=256)
+        joined = "\n".join(str(m.get("content", "")) for m in trimmed)
+        assert "PRIMER-MARKER" in joined
+
+    def test_bulky_non_primer_system_dropped_but_primer_kept(self):
+        trimmed = trim_for_context(self._messages(), context_length=1024, reserve_tokens=256)
+        joined = "\n".join(str(m.get("content", "")) for m in trimmed)
+        assert "PRIMER-MARKER" in joined
+        assert "RETRIEVED-DOCS-MARKER" not in joined
+
+    def test_leading_preset_kept_when_no_primer_metadata(self):
+        msgs = self._messages()
+        del msgs[4]["metadata"]
+        trimmed = trim_for_context(msgs, context_length=1024, reserve_tokens=256)
+        joined = "\n".join(str(m.get("content", "")) for m in trimmed)
+        assert "You are Odysseus." in joined