Files
odysseus/tests/test_chat_helpers.py
cirim e7abb7559d fix(research): keep Discuss chats grounded on their report (#4006)
* fix(research): preserve Discuss spin-off primer during context trimming

trim_for_context() kept only system_msgs[:1] as essential and dropped the
rest under budget pressure. A research "Discuss" spin-off seeds the report
as a system message that sits after the preface system messages, so it
landed in extra_system and was the first thing evicted once the chat grew
— the conversation then lost its grounding and drifted off task.

Treat any system message carrying research_spinoff_from metadata as
essential, alongside the leading system prompt, so the seeded report
survives trimming. maybe_compact already retains all system messages.

Tests: tests/test_context_compactor.py::TestResearchPrimerPreserved

* fix(research): ground Discuss spin-off chats on the seeded report

build_chat_context injected global memory (pinned + hybrid-retrieved) and
personal-doc RAG every turn, keyed off the user-level memory_enabled pref
and a request-scoped use_rag flag — never the session. A research spin-off,
whose primer declares the report the sole knowledge base, thus had
unrelated keyword-matched facts pulled in ("wrong data") competing with the
report; its rag=False flag was also ignored (use_rag defaulted on).

Add _session_is_research_spinoff(sess) (detects the primer research_spinoff_from
metadata; handles ChatMessage and dict forms) and, for such sessions,
disable memory injection and force RAG off.

Tests: tests/test_chat_helpers.py spin-off detection cases

---------

Co-authored-by: Dan (cirim) <claude@cirim.org>
2026-06-15 20:31:57 +09:00

265 lines
8.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import pytest
from fastapi import HTTPException
from routes.chat_helpers import (
_enforce_chat_privileges,
clean_thinking_for_save,
needs_auto_name,
save_assistant_response,
)
class _AuthManager:
def __init__(self, privileges):
self._privileges = privileges
def get_privileges(self, username):
assert username == "alice"
return self._privileges
class _Request:
def __init__(self, privileges):
self.app = type("App", (), {})()
self.app.state = type("State", (), {"auth_manager": _AuthManager(privileges)})()
class _Session:
def __init__(self, model):
self.model = model
def test_allowed_models_legacy_empty_list_remains_unrestricted(monkeypatch):
monkeypatch.setattr("routes.chat_helpers.get_current_user", lambda request: "alice")
_enforce_chat_privileges(
_Request({"allowed_models": [], "max_messages_per_day": 0}),
_Session("provider/model-a"),
)
def test_allowed_models_explicit_empty_restricted_list_blocks_all_models(monkeypatch):
monkeypatch.setattr("routes.chat_helpers.get_current_user", lambda request: "alice")
with pytest.raises(HTTPException) as exc:
_enforce_chat_privileges(
_Request({
"allowed_models": [],
"allowed_models_restricted": True,
"max_messages_per_day": 0,
}),
_Session("provider/model-a"),
)
assert exc.value.status_code == 403
assert "provider/model-a" in exc.value.detail
def test_allowed_models_nonempty_list_still_restricts_without_new_flag(monkeypatch):
monkeypatch.setattr("routes.chat_helpers.get_current_user", lambda request: "alice")
_enforce_chat_privileges(
_Request({"allowed_models": ["provider/model-a"], "max_messages_per_day": 0}),
_Session("provider/model-a"),
)
with pytest.raises(HTTPException):
_enforce_chat_privileges(
_Request({"allowed_models": ["provider/model-a"], "max_messages_per_day": 0}),
_Session("provider/model-b"),
)
def test_no_restriction_allows_any_model(monkeypatch):
monkeypatch.setattr("routes.chat_helpers.get_current_user", lambda request: "alice")
privs = {"allowed_models": [], "block_all_models": False, "max_messages_per_day": 0}
_enforce_chat_privileges(_Request(privs), _Session("provider/model-a"))
_enforce_chat_privileges(_Request(privs), _Session("provider/model-z"))
def test_specific_allowlist_blocks_models_outside_it(monkeypatch):
monkeypatch.setattr("routes.chat_helpers.get_current_user", lambda request: "alice")
privs = {
"allowed_models": ["gpt-4"],
"block_all_models": False,
"max_messages_per_day": 0,
}
_enforce_chat_privileges(_Request(privs), _Session("gpt-4"))
with pytest.raises(HTTPException) as exc:
_enforce_chat_privileges(_Request(privs), _Session("gpt-3.5"))
assert exc.value.status_code == 403
def test_block_all_models_blocks_regardless_of_allowed_models_contents(monkeypatch):
monkeypatch.setattr("routes.chat_helpers.get_current_user", lambda request: "alice")
# Even if allowed_models contains entries, block_all_models wins.
privs = {
"allowed_models": ["gpt-4", "gpt-3.5"],
"block_all_models": True,
"max_messages_per_day": 0,
}
with pytest.raises(HTTPException) as exc:
_enforce_chat_privileges(_Request(privs), _Session("gpt-4"))
assert exc.value.status_code == 403
with pytest.raises(HTTPException):
_enforce_chat_privileges(_Request(privs), _Session("anything-else"))
def test_admin_user_is_never_blocked(monkeypatch):
from core.auth import ADMIN_PRIVILEGES
monkeypatch.setattr("routes.chat_helpers.get_current_user", lambda request: "admin")
class _AdminAuthManager:
def get_privileges(self, username):
assert username == "admin"
return dict(ADMIN_PRIVILEGES)
class _AdminRequest:
def __init__(self):
self.app = type("App", (), {})()
self.app.state = type("State", (), {"auth_manager": _AdminAuthManager()})()
_enforce_chat_privileges(_AdminRequest(), _Session("provider/model-a"))
_enforce_chat_privileges(_AdminRequest(), _Session("anything-else"))
class _FakeSession:
def __init__(self, model="selected-model"):
self.model = model
self.history = []
def add_message(self, message):
self.history.append(message)
@pytest.mark.parametrize("name,expected", [
# 24h format (the bug this PR fixes)
("deepseek-v4-flash 14:05:33", True),
("qwq 17:46:02", True),
("gemma3 23:59:59", True),
("claude-sonnet 4 0:00:00", True),
# 12h format (was already working)
("deepseek-v4-flash 2:05:33 PM", True),
("qwq 06:46:02 AM", True),
("claude-sonnet-4 8:05:17 am", True),
# empty / default
("", True),
(" ", False),
("Chat: something", True),
# custom titles should NOT trigger auto-naming
("custom title", False),
("CW Decoder for STM32", False),
("my chat about python", False),
("Fix the login bug", False),
])
def test_needs_auto_name(name, expected):
assert needs_auto_name(name) == expected, f"needs_auto_name({name!r}) should be {expected}"
def test_clean_thinking_for_save_extracts_gemma4_thought_channel():
content, metadata = clean_thinking_for_save(
"<|channel>thought\ninternal reasoning<channel|>Final answer.",
{"model": "google/gemma-4-31B-it"},
)
assert content == "Final answer."
assert metadata["thinking"] == "internal reasoning"
assert metadata["model"] == "google/gemma-4-31B-it"
def test_clean_thinking_for_save_strips_empty_gemma4_thought_channel():
content, metadata = clean_thinking_for_save(
"<|channel>thought\n<channel|>Final answer.",
{"model": "google/gemma-4-31B-it"},
)
assert content == "Final answer."
assert "thinking" not in metadata
def test_clean_thinking_for_save_unwraps_gemma4_response_channel():
content, metadata = clean_thinking_for_save(
"<|channel>thought\ninternal reasoning<channel|><|channel>response\nFinal answer.<channel|>",
{"model": "google/gemma-4-31B-it"},
)
assert content == "Final answer."
assert metadata["thinking"] == "internal reasoning"
def test_clean_thinking_for_save_extracts_thought_tag():
content, metadata = clean_thinking_for_save(
"<thought>internal reasoning</thought>Final answer.",
{},
)
assert content == "Final answer."
assert metadata["thinking"] == "internal reasoning"
def test_save_assistant_response_preserves_actual_and_requested_model():
sess = _FakeSession("selected-model")
save_assistant_response(
sess,
session_manager=None,
session_id="s1",
full_response="hello",
last_metrics={"model": "actual-model", "input_tokens": 1, "output_tokens": 2},
incognito=True,
)
assert sess.history[-1].metadata["requested_model"] == "selected-model"
assert sess.history[-1].metadata["model"] == "actual-model"
from types import SimpleNamespace
from routes.chat_helpers import _session_is_research_spinoff
class _SpinMsg:
def __init__(self, role, metadata=None):
self.role = role
self.metadata = metadata
def test_spinoff_detected_from_chatmessage_history():
sess = SimpleNamespace(history=[
_SpinMsg("system", {"research_spinoff_from": "rp-1"}),
_SpinMsg("user", None),
])
assert _session_is_research_spinoff(sess) is True
def test_spinoff_detected_from_dict_history():
sess = SimpleNamespace(history=[
{"role": "system", "metadata": {"research_spinoff_from": "rp-2"}},
{"role": "user", "content": "hi"},
])
assert _session_is_research_spinoff(sess) is True
def test_non_spinoff_plain_session_is_false():
sess = SimpleNamespace(history=[
_SpinMsg("system", {"compacted": True}),
_SpinMsg("user", None),
])
assert _session_is_research_spinoff(sess) is False
def test_metadata_on_non_system_message_ignored():
sess = SimpleNamespace(history=[_SpinMsg("user", {"research_spinoff_from": "rp-3"})])
assert _session_is_research_spinoff(sess) is False
def test_empty_or_missing_history():
assert _session_is_research_spinoff(SimpleNamespace(history=[])) is False
assert _session_is_research_spinoff(SimpleNamespace()) is False