mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-15 17:25:26 -04:00
fix(context_compactor): return original messages when compaction summary fails (#2174)
On summary LLM call failure, maybe_compact was returning system_msgs+recent (dropping the older half) with was_compacted=False, misleading the caller into thinking the list was unchanged. Return the original messages list unchanged so no history is lost; the next trim_for_context call handles length if needed. Fixes #2160 Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -381,7 +381,10 @@ async def maybe_compact(
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Compaction summary failed: {e}")
|
||||
return system_msgs + recent, context_length, False
|
||||
# Degrade gracefully: keep the conversation intact rather than
|
||||
# silently dropping the older half. was_compacted=False signals the
|
||||
# caller nothing was summarized; trim_for_context handles length.
|
||||
return messages, context_length, False
|
||||
|
||||
summary_msg = {
|
||||
"role": "system",
|
||||
|
||||
@@ -0,0 +1,97 @@
|
||||
"""Regression test for #2160: when the compaction summary LLM call fails,
|
||||
maybe_compact must return the original messages unchanged, not the older half
|
||||
dropped. Uses mock imports to avoid loading the full app stack."""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
# Mock heavy dependencies before importing
|
||||
for mod in [
|
||||
'sqlalchemy', 'sqlalchemy.orm', 'sqlalchemy.ext', 'sqlalchemy.ext.declarative',
|
||||
'sqlalchemy.ext.hybrid', 'sqlalchemy.sql', 'sqlalchemy.sql.expression',
|
||||
'src.database',
|
||||
'core.models', 'core.database',
|
||||
]:
|
||||
if mod not in sys.modules:
|
||||
sys.modules[mod] = MagicMock()
|
||||
|
||||
import src.context_compactor as cc
|
||||
from src.context_compactor import maybe_compact
|
||||
|
||||
|
||||
class TestCompactionSummaryFailure:
|
||||
"""When the summary call raises, no conversation history may be lost.
|
||||
|
||||
On success maybe_compact replaces the older half with a summary message.
|
||||
On failure it must degrade gracefully and hand back the original messages
|
||||
list unchanged, so the next turn (or trim_for_context) can handle length.
|
||||
Before the fix the except branch returned `system_msgs + recent`, silently
|
||||
discarding the older half while reporting was_compacted=False — the caller
|
||||
then treated a materially shorter list as a no-op."""
|
||||
|
||||
def _run(self, messages, *, context_length=100):
|
||||
# Force compaction to trigger (pct over COMPACT_THRESHOLD) and make the
|
||||
# summary call fail, so the except branch runs. Stub everything so the
|
||||
# test is hermetic (no network, no real endpoint resolution).
|
||||
orig_ctx = cc.get_context_length
|
||||
orig_est = cc.estimate_tokens
|
||||
orig_call = cc.llm_call_async
|
||||
orig_resolve = cc.resolve_endpoint
|
||||
orig_update = cc._update_session_history
|
||||
|
||||
async def _boom(*a, **k):
|
||||
raise RuntimeError("summary model down")
|
||||
|
||||
cc.get_context_length = lambda url, model: context_length
|
||||
cc.estimate_tokens = lambda msgs: 10000 # well over the threshold
|
||||
cc.llm_call_async = _boom
|
||||
cc.resolve_endpoint = lambda which: (None, None, None)
|
||||
cc._update_session_history = lambda *a, **k: None
|
||||
try:
|
||||
return asyncio.run(
|
||||
maybe_compact(
|
||||
session=None,
|
||||
endpoint_url="http://local/v1/chat/completions",
|
||||
model="local-model",
|
||||
messages=list(messages),
|
||||
headers={},
|
||||
)
|
||||
)
|
||||
finally:
|
||||
cc.get_context_length = orig_ctx
|
||||
cc.estimate_tokens = orig_est
|
||||
cc.llm_call_async = orig_call
|
||||
cc.resolve_endpoint = orig_resolve
|
||||
cc._update_session_history = orig_update
|
||||
|
||||
def _history(self):
|
||||
return [
|
||||
{"role": "system", "content": "PRESET"},
|
||||
{"role": "user", "content": "OLDER-1"},
|
||||
{"role": "assistant", "content": "OLDER-2"},
|
||||
{"role": "user", "content": "OLDER-3"},
|
||||
{"role": "assistant", "content": "RECENT-1"},
|
||||
{"role": "user", "content": "RECENT-2"},
|
||||
{"role": "assistant", "content": "RECENT-3"},
|
||||
]
|
||||
|
||||
def test_returns_original_messages_when_summary_fails(self):
|
||||
messages = self._history()
|
||||
out, _ctx, was_compacted = self._run(messages)
|
||||
|
||||
# Nothing was actually compacted.
|
||||
assert was_compacted is False
|
||||
# The full original list comes back unchanged — including the older half.
|
||||
assert out == messages
|
||||
|
||||
def test_older_messages_not_dropped_on_failure(self):
|
||||
messages = self._history()
|
||||
out, _ctx, _was = self._run(messages)
|
||||
|
||||
contents = [m["content"] for m in out]
|
||||
# The older half must survive the failed summary call.
|
||||
for older in ("OLDER-1", "OLDER-2", "OLDER-3"):
|
||||
assert older in contents
|
||||
Reference in New Issue
Block a user