mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-17 10:15:27 -04:00
55ff22c6d5
* fix(chat): stabilize system prompt, sequence memory extraction, send stable session id to preserve KV cache Fixes #2927. As diagnosed in the issue, three things in Odysseus's request pattern actively destroyed local backends' (llama.cpp / LM Studio) KV-cache continuity, forcing a full prompt re-evaluation (15-30s+) on every turn: 1. Dynamic content folded into the system prompt every turn. Both the chat preface (ChatProcessor.build_context_preface) and the agent system prompt (_build_system_prompt) injected current_datetime_prompt() — text that changes every minute — directly into system-role messages, which llm_core then concatenates into the single system message sent as the cached prefix. Any byte difference there invalidates the entire cache. Moved this to a new current_datetime_context_message() helper that returns a standalone user-role message, inserted near the end of the array (right before the latest user turn) instead of mixed into the system prompt. The static system prefix (preset prompt + safety policy + agent base prompt) now stays byte-identical across turns of the same session. 2. Memory/skill extraction side-requests competed with the main completion. run_post_response_tasks fired extract_and_store / maybe_extract_skill via asyncio.create_task — fire-and-forget coroutines that could overlap the next turn's main request and steal llama.cpp's limited processing slots, evicting the cached checkpoint. They're now queued through a new _run_extraction_jobs_sequentially helper that waits for the session's stream to go idle and runs the jobs strictly one at a time. 3. No stable session identifier was sent to local backends, so llama.cpp assigned a new processing slot via LRU every turn ("session_id=<empty> server-selected (LCP/LRU)"), losing slot affinity. Added _apply_local_cache_affinity() in llm_core, which sets session_id and cache_prompt: true on outgoing payloads — gated to self-hosted OpenAI-compatible endpoints only (never api.openai.com or other cloud providers, which reject unrecognized request fields with a 400). Threaded session_id through stream_llm / llm_call_async / stream_agent_loop from the existing Odysseus session id. Tests in tests/test_kv_cache_invalidation_2927.py exercise the real payload- assembly and scheduling code paths: byte-identical system prefix across two turns of the same session (with a regression check that genuinely changed instructions DO still change it), the dynamic time block landing as a user-role message, extraction jobs waiting for the stream to go idle and running sequentially, and the outgoing payload carrying a stable session_id (same across turns of one session, different across sessions) only for self-hosted endpoints. Updated tests/test_user_time.py for the new message placement. * fix(tests): accept owner= kwarg in normalize_model_id monkeypatch The upstream normalize_model_id signature now takes an owner= keyword argument, and chat_helpers.py passes owner=getattr(sess, "owner", None) at the call site. Update the test stub lambda to **kwargs so it handles the new argument without breaking, and update chat_helpers.py to forward the owner parameter consistently. --------- Co-authored-by: Alexandre Teixeira <111787685+alteixeira20@users.noreply.github.com>
162 lines
6.0 KiB
Python
162 lines
6.0 KiB
Python
"""Per-request user-local time helpers.
|
|
|
|
Chat routes set this context from browser headers. Prompt builders and tools
|
|
can then resolve relative dates against the user's clock instead of the server.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
from contextvars import ContextVar
|
|
from datetime import datetime, timedelta, timezone
|
|
from typing import Dict, Optional
|
|
|
|
|
|
_USER_TZ_OFFSET_MIN: ContextVar[Optional[int]] = ContextVar("user_tz_offset_min", default=None)
|
|
_USER_TZ_NAME: ContextVar[Optional[str]] = ContextVar("user_tz_name", default=None)
|
|
|
|
|
|
def set_user_tz_offset(offset_min) -> None:
|
|
"""Set the current user's UTC offset in minutes east of UTC."""
|
|
if offset_min in (None, ""):
|
|
_USER_TZ_OFFSET_MIN.set(None)
|
|
return
|
|
try:
|
|
value = int(offset_min)
|
|
except (TypeError, ValueError):
|
|
return
|
|
if -14 * 60 <= value <= 14 * 60:
|
|
_USER_TZ_OFFSET_MIN.set(value)
|
|
|
|
|
|
def get_user_tz_offset() -> Optional[int]:
|
|
"""Return minutes east of UTC for the current user, if known."""
|
|
return _USER_TZ_OFFSET_MIN.get()
|
|
|
|
|
|
def set_user_tz_name(name) -> None:
|
|
"""Set a safe IANA timezone label for the current request context."""
|
|
if not name:
|
|
_USER_TZ_NAME.set(None)
|
|
return
|
|
first_token = str(name).strip().split()[0] if str(name).strip() else ""
|
|
cleaned = re.sub(r"[^A-Za-z0-9_+\-./]", "", first_token)[:80]
|
|
_USER_TZ_NAME.set(cleaned or None)
|
|
|
|
|
|
def get_user_tz_name() -> Optional[str]:
|
|
"""Return the current user's browser timezone name, if provided."""
|
|
return _USER_TZ_NAME.get()
|
|
|
|
|
|
def clear_user_time_context() -> None:
|
|
"""Clear user-local time context for tests and non-browser entry points."""
|
|
_USER_TZ_OFFSET_MIN.set(None)
|
|
_USER_TZ_NAME.set(None)
|
|
|
|
|
|
def format_utc_offset(offset_min: Optional[int]) -> str:
|
|
"""Format minutes east of UTC as +HH:MM or -HH:MM."""
|
|
if offset_min is None:
|
|
offset_min = 0
|
|
sign = "+" if offset_min >= 0 else "-"
|
|
total = abs(int(offset_min))
|
|
hours, minutes = divmod(total, 60)
|
|
return f"{sign}{hours:02d}:{minutes:02d}"
|
|
|
|
|
|
def user_timezone() -> timezone:
|
|
"""Return the best known user timezone as a fixed-offset tzinfo."""
|
|
offset = get_user_tz_offset()
|
|
if offset is None:
|
|
name = get_user_tz_name()
|
|
if name:
|
|
try:
|
|
from zoneinfo import ZoneInfo
|
|
return ZoneInfo(name)
|
|
except Exception:
|
|
pass
|
|
return datetime.now().astimezone().tzinfo or timezone.utc
|
|
return timezone(timedelta(minutes=offset))
|
|
|
|
|
|
def now_user_local(now_utc: Optional[datetime] = None) -> datetime:
|
|
"""Return the current time in the user's timezone."""
|
|
if now_utc is None:
|
|
now_utc = datetime.now(timezone.utc)
|
|
elif now_utc.tzinfo is None:
|
|
now_utc = now_utc.replace(tzinfo=timezone.utc)
|
|
return now_utc.astimezone(user_timezone())
|
|
|
|
|
|
def _date_label(dt: datetime) -> str:
|
|
return f"{dt.strftime('%A')}, {dt.strftime('%B')} {dt.day}, {dt.year}"
|
|
|
|
|
|
def _clock_label(dt: datetime) -> str:
|
|
hour = dt.hour % 12 or 12
|
|
return f"{hour}:{dt.minute:02d} {dt.strftime('%p')}"
|
|
|
|
|
|
def timezone_label(dt: Optional[datetime] = None) -> str:
|
|
"""Return a concise display label such as Australia/Brisbane, UTC+10:00."""
|
|
offset = get_user_tz_offset()
|
|
if offset is None:
|
|
if dt is None:
|
|
dt = datetime.now().astimezone()
|
|
offset = int((dt.utcoffset() or timedelta()).total_seconds() // 60)
|
|
offset_label = f"UTC{format_utc_offset(offset)}"
|
|
name = get_user_tz_name()
|
|
return f"{name}, {offset_label}" if name else offset_label
|
|
|
|
|
|
def current_datetime_prompt(now_utc: Optional[datetime] = None) -> str:
|
|
"""Build reusable system prompt text for date/time reasoning."""
|
|
if now_utc is None:
|
|
utc_now = datetime.now(timezone.utc)
|
|
elif now_utc.tzinfo is None:
|
|
utc_now = now_utc.replace(tzinfo=timezone.utc)
|
|
else:
|
|
utc_now = now_utc.astimezone(timezone.utc)
|
|
|
|
local_now = now_user_local(utc_now)
|
|
tomorrow = local_now + timedelta(days=1)
|
|
return (
|
|
"## Current date and time\n"
|
|
f"Today is {_date_label(local_now)} ({local_now.strftime('%Y-%m-%d')}). "
|
|
f"User local time is {_clock_label(local_now)} ({timezone_label(local_now)}); "
|
|
f"current UTC time is {utc_now.strftime('%H:%M')}.\n"
|
|
f"Tomorrow is {_date_label(tomorrow)} ({tomorrow.strftime('%Y-%m-%d')}) "
|
|
"in the user's local timezone.\n"
|
|
"Use this for any 'today', 'tomorrow', 'tonight', 'this week', or other "
|
|
"relative-date reasoning. Do not ask for an exact date just because the "
|
|
"user used a relative date.\n"
|
|
"When scheduling calendar events with manage_calendar, pass local ISO "
|
|
"datetimes resolved against this user-local date/time.\n"
|
|
"When scheduling a task with manage_tasks, scheduled_time is in UTC: "
|
|
"convert the user's stated local time using the UTC offset above.\n\n"
|
|
)
|
|
|
|
|
|
def current_datetime_context_message(now_utc: Optional[datetime] = None) -> Dict[str, str]:
|
|
"""Build the current-date/time context as a standalone chat message.
|
|
|
|
This intentionally returns a ``user``-role message rather than a
|
|
``system``-role one. The text changes every turn (it embeds the current
|
|
clock time down to the minute), and local OpenAI-compatible backends
|
|
(llama.cpp / LM Studio) key their KV-cache prefix off the system message
|
|
byte-for-byte — folding ever-changing timestamp text into the system
|
|
message would invalidate the cached prefix on every single request (see
|
|
issue #2927). Keeping it as a separate message placed near the end of the
|
|
array (right before the latest user turn) lets the static system prompt
|
|
stay byte-identical across turns while the model still gets fresh
|
|
date/time grounding for relative-date reasoning.
|
|
"""
|
|
return {
|
|
"role": "user",
|
|
"content": (
|
|
"[Context — current date/time, refreshed each turn; not part of "
|
|
"your instructions]\n" + current_datetime_prompt(now_utc)
|
|
),
|
|
}
|