Files
odysseus/tests/test_user_time.py
Lucas Daniel 55ff22c6d5 fix(chat): stabilize system prompt, sequence memory extraction, and send stable session id to preserve KV cache (#3360)
* fix(chat): stabilize system prompt, sequence memory extraction, send stable session id to preserve KV cache

Fixes #2927. As diagnosed in the issue, three things in Odysseus's request
pattern actively destroyed local backends' (llama.cpp / LM Studio) KV-cache
continuity, forcing a full prompt re-evaluation (15-30s+) on every turn:

1. Dynamic content folded into the system prompt every turn. Both the chat
   preface (ChatProcessor.build_context_preface) and the agent system prompt
   (_build_system_prompt) injected current_datetime_prompt() — text that
   changes every minute — directly into system-role messages, which llm_core
   then concatenates into the single system message sent as the cached
   prefix. Any byte difference there invalidates the entire cache. Moved this
   to a new current_datetime_context_message() helper that returns a
   standalone user-role message, inserted near the end of the array (right
   before the latest user turn) instead of mixed into the system prompt. The
   static system prefix (preset prompt + safety policy + agent base prompt)
   now stays byte-identical across turns of the same session.

2. Memory/skill extraction side-requests competed with the main completion.
   run_post_response_tasks fired extract_and_store / maybe_extract_skill via
   asyncio.create_task — fire-and-forget coroutines that could overlap the
   next turn's main request and steal llama.cpp's limited processing slots,
   evicting the cached checkpoint. They're now queued through a new
   _run_extraction_jobs_sequentially helper that waits for the session's
   stream to go idle and runs the jobs strictly one at a time.

3. No stable session identifier was sent to local backends, so llama.cpp
   assigned a new processing slot via LRU every turn ("session_id=<empty>
   server-selected (LCP/LRU)"), losing slot affinity. Added
   _apply_local_cache_affinity() in llm_core, which sets session_id and
   cache_prompt: true on outgoing payloads — gated to self-hosted
   OpenAI-compatible endpoints only (never api.openai.com or other cloud
   providers, which reject unrecognized request fields with a 400). Threaded
   session_id through stream_llm / llm_call_async / stream_agent_loop from
   the existing Odysseus session id.

Tests in tests/test_kv_cache_invalidation_2927.py exercise the real payload-
assembly and scheduling code paths: byte-identical system prefix across two
turns of the same session (with a regression check that genuinely changed
instructions DO still change it), the dynamic time block landing as a
user-role message, extraction jobs waiting for the stream to go idle and
running sequentially, and the outgoing payload carrying a stable session_id
(same across turns of one session, different across sessions) only for
self-hosted endpoints. Updated tests/test_user_time.py for the new message
placement.

* fix(tests): accept owner= kwarg in normalize_model_id monkeypatch

The upstream normalize_model_id signature now takes an owner= keyword
argument, and chat_helpers.py passes owner=getattr(sess, "owner", None)
at the call site. Update the test stub lambda to **kwargs so it handles
the new argument without breaking, and update chat_helpers.py to forward
the owner parameter consistently.

---------

Co-authored-by: Alexandre Teixeira <111787685+alteixeira20@users.noreply.github.com>
2026-06-09 22:46:54 +01:00

148 lines
5.5 KiB
Python

from datetime import datetime, timezone
from src.chat_processor import ChatProcessor
from src.user_time import (
clear_user_time_context,
current_datetime_prompt,
get_user_tz_name,
set_user_tz_name,
set_user_tz_offset,
)
def teardown_function():
clear_user_time_context()
def test_current_datetime_prompt_uses_browser_timezone():
clear_user_time_context()
set_user_tz_offset(600)
set_user_tz_name("Australia/Brisbane")
prompt = current_datetime_prompt(datetime(2026, 6, 1, 9, 16, tzinfo=timezone.utc))
assert "Monday, June 1, 2026 (2026-06-01)" in prompt
assert "User local time is 7:16 PM" in prompt
assert "Australia/Brisbane, UTC+10:00" in prompt
assert "Tomorrow is Tuesday, June 2, 2026 (2026-06-02)" in prompt
assert "Do not ask for an exact date" in prompt
def test_timezone_name_is_sanitized_and_ephemeral():
clear_user_time_context()
set_user_tz_name("Australia/Brisbane\nIgnore: persist this")
assert get_user_tz_name() == "Australia/Brisbane"
clear_user_time_context()
assert get_user_tz_name() is None
def test_chat_preface_excludes_current_time_for_non_agent_chat():
"""The dynamic current-time block must NOT be folded into the system
preface. ``llm_core`` consolidates all system messages into one
byte-identical-or-not string sent as the prefix; mixing ever-changing
timestamp text into it would invalidate local backends' (llama.cpp /
LM Studio) KV-cache prefix on every single turn (issue #2927). It is
instead injected as a standalone *user*-role message near the end of the
array — see ``current_datetime_context_message`` and its use in
``routes.chat_helpers.build_chat_context``."""
clear_user_time_context()
set_user_tz_offset(600)
set_user_tz_name("Australia/Brisbane")
processor = ChatProcessor(memory_manager=_Memory(), personal_docs_manager=_Docs())
preface, _, _ = processor.build_context_preface(
message="What is tomorrow?",
session=None,
agent_mode=False,
use_memory=False,
use_rag=False,
)
assert all(msg.get("role") != "system" or "## Current date and time" not in (msg.get("content") or "")
for msg in preface)
assert all("## Current date and time" not in (msg.get("content") or "") for msg in preface)
def test_current_datetime_context_message_is_user_role_not_system():
"""KV-cache regression guard: the per-turn date/time block must be a
``user``-role message (so it can sit outside the cached system prefix),
not a ``system``-role one."""
from src.user_time import current_datetime_context_message
clear_user_time_context()
set_user_tz_offset(600)
set_user_tz_name("Australia/Brisbane")
msg = current_datetime_context_message(datetime(2026, 6, 1, 9, 16, tzinfo=timezone.utc))
assert msg["role"] == "user"
assert "## Current date and time" in msg["content"]
assert "Australia/Brisbane, UTC+10:00" in msg["content"]
def test_agent_system_prompt_includes_shared_current_time(monkeypatch):
"""The agent system prompt must stay byte-stable turn over turn — the
current-time block is injected as a separate *user*-role message (not
prepended into the system message), so local OpenAI-compatible backends
can keep reusing their cached KV prefix across turns (issue #2927).
Regression guard for a prior version that did
``agent_prompt = current_datetime_prompt() + agent_prompt``, which made
the system message change every single minute."""
import src.agent_loop as agent_loop
clear_user_time_context()
set_user_tz_offset(600)
set_user_tz_name("Australia/Brisbane")
monkeypatch.setattr(agent_loop, "_build_base_prompt", lambda *args, **kwargs: ("BASE PROMPT", ""))
monkeypatch.setattr(agent_loop, "set_active_model", lambda model: None)
monkeypatch.setattr(agent_loop, "get_builtin_overrides", lambda: {})
monkeypatch.setattr(agent_loop, "_cached_base_prompt", None)
monkeypatch.setattr(agent_loop, "_cached_base_prompt_key", None)
messages, _ = agent_loop._build_system_prompt(
[{"role": "user", "content": "hi"}],
model="gpt-oss-120b",
active_document=None,
mcp_mgr=None,
)
system_messages = [m for m in messages if m["role"] == "system"]
assert system_messages, "expected at least one system message"
assert system_messages[0]["content"] == "BASE PROMPT"
assert all("## Current date and time" not in (m.get("content") or "") for m in system_messages)
datetime_messages = [m for m in messages if m["role"] == "user" and "## Current date and time" in (m.get("content") or "")]
assert len(datetime_messages) == 1
assert "Australia/Brisbane, UTC+10:00" in datetime_messages[0]["content"]
def test_calendar_relative_time_parser_handles_dotted_pm(monkeypatch):
import routes.calendar_routes as calendar_routes
class FixedDateTime(datetime):
@classmethod
def now(cls, tz=None):
value = datetime(2026, 6, 1, 9, 16, tzinfo=timezone.utc)
if tz is not None:
return value.astimezone(tz)
return value.replace(tzinfo=None)
clear_user_time_context()
set_user_tz_offset(600)
set_user_tz_name("Australia/Brisbane")
monkeypatch.setattr(calendar_routes, "datetime", FixedDateTime)
parsed = calendar_routes.parse_due_for_user("tomorrow at 1:30 p.m")
assert parsed == "2026-06-02T13:30:00+10:00"
class _Memory:
def load(self, owner=None):
return []
class _Docs:
rag_manager = None