Files
odysseus/tests/test_compaction_summary_failure.py
Kenny Van de Maele 8f2c8d2dc8 fix(test): tolerate owner kwarg in compaction summary resolve_endpoint mock (#3304)
#2996 made context_compactor call resolve_endpoint('utility', owner=owner),
but the mock added by #2174 stubbed it as lambda which: ..., which rejects the
owner kwarg. Each PR passed alone; merged on dev the two compaction tests fail
with TypeError and the pytest job goes red. Widen the mock to lambda *a, **k.

Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-07 17:23:06 +02:00

98 lines
3.8 KiB
Python

"""Regression test for #2160: when the compaction summary LLM call fails,
maybe_compact must return the original messages unchanged, not the older half
dropped. Uses mock imports to avoid loading the full app stack."""
import asyncio
import sys
from unittest.mock import MagicMock
import pytest
# Mock heavy dependencies before importing
for mod in [
'sqlalchemy', 'sqlalchemy.orm', 'sqlalchemy.ext', 'sqlalchemy.ext.declarative',
'sqlalchemy.ext.hybrid', 'sqlalchemy.sql', 'sqlalchemy.sql.expression',
'src.database',
'core.models', 'core.database',
]:
if mod not in sys.modules:
sys.modules[mod] = MagicMock()
import src.context_compactor as cc
from src.context_compactor import maybe_compact
class TestCompactionSummaryFailure:
"""When the summary call raises, no conversation history may be lost.
On success maybe_compact replaces the older half with a summary message.
On failure it must degrade gracefully and hand back the original messages
list unchanged, so the next turn (or trim_for_context) can handle length.
Before the fix the except branch returned `system_msgs + recent`, silently
discarding the older half while reporting was_compacted=False — the caller
then treated a materially shorter list as a no-op."""
def _run(self, messages, *, context_length=100):
# Force compaction to trigger (pct over COMPACT_THRESHOLD) and make the
# summary call fail, so the except branch runs. Stub everything so the
# test is hermetic (no network, no real endpoint resolution).
orig_ctx = cc.get_context_length
orig_est = cc.estimate_tokens
orig_call = cc.llm_call_async
orig_resolve = cc.resolve_endpoint
orig_update = cc._update_session_history
async def _boom(*a, **k):
raise RuntimeError("summary model down")
cc.get_context_length = lambda url, model: context_length
cc.estimate_tokens = lambda msgs: 10000 # well over the threshold
cc.llm_call_async = _boom
cc.resolve_endpoint = lambda *a, **k: (None, None, None)
cc._update_session_history = lambda *a, **k: None
try:
return asyncio.run(
maybe_compact(
session=None,
endpoint_url="http://local/v1/chat/completions",
model="local-model",
messages=list(messages),
headers={},
)
)
finally:
cc.get_context_length = orig_ctx
cc.estimate_tokens = orig_est
cc.llm_call_async = orig_call
cc.resolve_endpoint = orig_resolve
cc._update_session_history = orig_update
def _history(self):
return [
{"role": "system", "content": "PRESET"},
{"role": "user", "content": "OLDER-1"},
{"role": "assistant", "content": "OLDER-2"},
{"role": "user", "content": "OLDER-3"},
{"role": "assistant", "content": "RECENT-1"},
{"role": "user", "content": "RECENT-2"},
{"role": "assistant", "content": "RECENT-3"},
]
def test_returns_original_messages_when_summary_fails(self):
messages = self._history()
out, _ctx, was_compacted = self._run(messages)
# Nothing was actually compacted.
assert was_compacted is False
# The full original list comes back unchanged — including the older half.
assert out == messages
def test_older_messages_not_dropped_on_failure(self):
messages = self._history()
out, _ctx, _was = self._run(messages)
contents = [m["content"] for m in out]
# The older half must survive the failed summary call.
for older in ("OLDER-1", "OLDER-2", "OLDER-3"):
assert older in contents