mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-15 17:25:26 -04:00
8f2c8d2dc8
#2996 made context_compactor call resolve_endpoint('utility', owner=owner), but the mock added by #2174 stubbed it as lambda which: ..., which rejects the owner kwarg. Each PR passed alone; merged on dev the two compaction tests fail with TypeError and the pytest job goes red. Widen the mock to lambda *a, **k. Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
98 lines
3.8 KiB
Python
98 lines
3.8 KiB
Python
"""Regression test for #2160: when the compaction summary LLM call fails,
|
|
maybe_compact must return the original messages unchanged, not the older half
|
|
dropped. Uses mock imports to avoid loading the full app stack."""
|
|
|
|
import asyncio
|
|
import sys
|
|
from unittest.mock import MagicMock
|
|
|
|
import pytest
|
|
|
|
# Mock heavy dependencies before importing
|
|
for mod in [
|
|
'sqlalchemy', 'sqlalchemy.orm', 'sqlalchemy.ext', 'sqlalchemy.ext.declarative',
|
|
'sqlalchemy.ext.hybrid', 'sqlalchemy.sql', 'sqlalchemy.sql.expression',
|
|
'src.database',
|
|
'core.models', 'core.database',
|
|
]:
|
|
if mod not in sys.modules:
|
|
sys.modules[mod] = MagicMock()
|
|
|
|
import src.context_compactor as cc
|
|
from src.context_compactor import maybe_compact
|
|
|
|
|
|
class TestCompactionSummaryFailure:
|
|
"""When the summary call raises, no conversation history may be lost.
|
|
|
|
On success maybe_compact replaces the older half with a summary message.
|
|
On failure it must degrade gracefully and hand back the original messages
|
|
list unchanged, so the next turn (or trim_for_context) can handle length.
|
|
Before the fix the except branch returned `system_msgs + recent`, silently
|
|
discarding the older half while reporting was_compacted=False — the caller
|
|
then treated a materially shorter list as a no-op."""
|
|
|
|
def _run(self, messages, *, context_length=100):
|
|
# Force compaction to trigger (pct over COMPACT_THRESHOLD) and make the
|
|
# summary call fail, so the except branch runs. Stub everything so the
|
|
# test is hermetic (no network, no real endpoint resolution).
|
|
orig_ctx = cc.get_context_length
|
|
orig_est = cc.estimate_tokens
|
|
orig_call = cc.llm_call_async
|
|
orig_resolve = cc.resolve_endpoint
|
|
orig_update = cc._update_session_history
|
|
|
|
async def _boom(*a, **k):
|
|
raise RuntimeError("summary model down")
|
|
|
|
cc.get_context_length = lambda url, model: context_length
|
|
cc.estimate_tokens = lambda msgs: 10000 # well over the threshold
|
|
cc.llm_call_async = _boom
|
|
cc.resolve_endpoint = lambda *a, **k: (None, None, None)
|
|
cc._update_session_history = lambda *a, **k: None
|
|
try:
|
|
return asyncio.run(
|
|
maybe_compact(
|
|
session=None,
|
|
endpoint_url="http://local/v1/chat/completions",
|
|
model="local-model",
|
|
messages=list(messages),
|
|
headers={},
|
|
)
|
|
)
|
|
finally:
|
|
cc.get_context_length = orig_ctx
|
|
cc.estimate_tokens = orig_est
|
|
cc.llm_call_async = orig_call
|
|
cc.resolve_endpoint = orig_resolve
|
|
cc._update_session_history = orig_update
|
|
|
|
def _history(self):
|
|
return [
|
|
{"role": "system", "content": "PRESET"},
|
|
{"role": "user", "content": "OLDER-1"},
|
|
{"role": "assistant", "content": "OLDER-2"},
|
|
{"role": "user", "content": "OLDER-3"},
|
|
{"role": "assistant", "content": "RECENT-1"},
|
|
{"role": "user", "content": "RECENT-2"},
|
|
{"role": "assistant", "content": "RECENT-3"},
|
|
]
|
|
|
|
def test_returns_original_messages_when_summary_fails(self):
|
|
messages = self._history()
|
|
out, _ctx, was_compacted = self._run(messages)
|
|
|
|
# Nothing was actually compacted.
|
|
assert was_compacted is False
|
|
# The full original list comes back unchanged — including the older half.
|
|
assert out == messages
|
|
|
|
def test_older_messages_not_dropped_on_failure(self):
|
|
messages = self._history()
|
|
out, _ctx, _was = self._run(messages)
|
|
|
|
contents = [m["content"] for m in out]
|
|
# The older half must survive the failed summary call.
|
|
for older in ("OLDER-1", "OLDER-2", "OLDER-3"):
|
|
assert older in contents
|