diff --git a/src/context_compactor.py b/src/context_compactor.py index c87ea4c43..b92c7d752 100644 --- a/src/context_compactor.py +++ b/src/context_compactor.py @@ -381,7 +381,10 @@ async def maybe_compact( ) except Exception as e: logger.error(f"Compaction summary failed: {e}") - return system_msgs + recent, context_length, False + # Degrade gracefully: keep the conversation intact rather than + # silently dropping the older half. was_compacted=False signals the + # caller nothing was summarized; trim_for_context handles length. + return messages, context_length, False summary_msg = { "role": "system", diff --git a/tests/test_compaction_summary_failure.py b/tests/test_compaction_summary_failure.py new file mode 100644 index 000000000..f69f7e58d --- /dev/null +++ b/tests/test_compaction_summary_failure.py @@ -0,0 +1,97 @@ +"""Regression test for #2160: when the compaction summary LLM call fails, +maybe_compact must return the original messages unchanged, not the older half +dropped. Uses mock imports to avoid loading the full app stack.""" + +import asyncio +import sys +from unittest.mock import MagicMock + +import pytest + +# Mock heavy dependencies before importing +for mod in [ + 'sqlalchemy', 'sqlalchemy.orm', 'sqlalchemy.ext', 'sqlalchemy.ext.declarative', + 'sqlalchemy.ext.hybrid', 'sqlalchemy.sql', 'sqlalchemy.sql.expression', + 'src.database', + 'core.models', 'core.database', +]: + if mod not in sys.modules: + sys.modules[mod] = MagicMock() + +import src.context_compactor as cc +from src.context_compactor import maybe_compact + + +class TestCompactionSummaryFailure: + """When the summary call raises, no conversation history may be lost. + + On success maybe_compact replaces the older half with a summary message. + On failure it must degrade gracefully and hand back the original messages + list unchanged, so the next turn (or trim_for_context) can handle length. + Before the fix the except branch returned `system_msgs + recent`, silently + discarding the older half while reporting was_compacted=False — the caller + then treated a materially shorter list as a no-op.""" + + def _run(self, messages, *, context_length=100): + # Force compaction to trigger (pct over COMPACT_THRESHOLD) and make the + # summary call fail, so the except branch runs. Stub everything so the + # test is hermetic (no network, no real endpoint resolution). + orig_ctx = cc.get_context_length + orig_est = cc.estimate_tokens + orig_call = cc.llm_call_async + orig_resolve = cc.resolve_endpoint + orig_update = cc._update_session_history + + async def _boom(*a, **k): + raise RuntimeError("summary model down") + + cc.get_context_length = lambda url, model: context_length + cc.estimate_tokens = lambda msgs: 10000 # well over the threshold + cc.llm_call_async = _boom + cc.resolve_endpoint = lambda which: (None, None, None) + cc._update_session_history = lambda *a, **k: None + try: + return asyncio.run( + maybe_compact( + session=None, + endpoint_url="http://local/v1/chat/completions", + model="local-model", + messages=list(messages), + headers={}, + ) + ) + finally: + cc.get_context_length = orig_ctx + cc.estimate_tokens = orig_est + cc.llm_call_async = orig_call + cc.resolve_endpoint = orig_resolve + cc._update_session_history = orig_update + + def _history(self): + return [ + {"role": "system", "content": "PRESET"}, + {"role": "user", "content": "OLDER-1"}, + {"role": "assistant", "content": "OLDER-2"}, + {"role": "user", "content": "OLDER-3"}, + {"role": "assistant", "content": "RECENT-1"}, + {"role": "user", "content": "RECENT-2"}, + {"role": "assistant", "content": "RECENT-3"}, + ] + + def test_returns_original_messages_when_summary_fails(self): + messages = self._history() + out, _ctx, was_compacted = self._run(messages) + + # Nothing was actually compacted. + assert was_compacted is False + # The full original list comes back unchanged — including the older half. + assert out == messages + + def test_older_messages_not_dropped_on_failure(self): + messages = self._history() + out, _ctx, _was = self._run(messages) + + contents = [m["content"] for m in out] + # The older half must survive the failed summary call. + for older in ("OLDER-1", "OLDER-2", "OLDER-3"): + assert older in contents