mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-16 01:35:36 -04:00
115 lines
4.0 KiB
Python
115 lines
4.0 KiB
Python
"""Regression: stream_agent_loop surfaces *why* a guard ended the turn.
|
|
|
|
Two internal guards used to stop the agent in ways that looked like a clean
|
|
completion or a vague blocked message:
|
|
|
|
* the loop-breaker stall detector -> now emits `loop_breaker_triggered`
|
|
* the intent-without-action nudge cap -> now emits `intent_nudge_exhausted`
|
|
|
|
These tests run the real loop body against a fake LLM stream (no model calls,
|
|
no sleeps) and assert the structured stop event is emitted.
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
|
|
import src.agent_loop as al
|
|
|
|
|
|
def _collect(gen):
|
|
async def _run():
|
|
return [c async for c in gen]
|
|
return asyncio.run(_run())
|
|
|
|
|
|
def _types(chunks):
|
|
out = []
|
|
for c in chunks:
|
|
if c.startswith("data: ") and not c.startswith("data: [DONE]"):
|
|
try:
|
|
out.append(json.loads(c[6:]))
|
|
except Exception:
|
|
pass
|
|
return out
|
|
|
|
|
|
def _patch_common(monkeypatch):
|
|
monkeypatch.setattr(al, "get_setting", lambda key, default=None: default, raising=False)
|
|
monkeypatch.setattr(al, "get_mcp_manager", lambda: None, raising=False)
|
|
monkeypatch.setattr(al, "estimate_tokens", lambda *a, **k: 10, raising=False)
|
|
|
|
async def _fake_exec(block, *a, **k):
|
|
return ("bash", {"output": "ok", "exit_code": 0})
|
|
monkeypatch.setattr(al, "execute_tool_block", _fake_exec, raising=False)
|
|
|
|
|
|
def _run_loop(monkeypatch, round_text, max_rounds, relevant_tools={"bash"}):
|
|
async def _fake_stream(_candidates, messages, **kwargs):
|
|
yield f'data: {json.dumps({"delta": round_text})}\n\n'
|
|
yield "data: [DONE]\n\n"
|
|
monkeypatch.setattr(al, "stream_llm_with_fallback", _fake_stream, raising=False)
|
|
|
|
gen = al.stream_agent_loop(
|
|
"http://x/v1", "m",
|
|
[{"role": "user", "content": "do a long multi-step task"}],
|
|
max_rounds=max_rounds,
|
|
relevant_tools=relevant_tools,
|
|
)
|
|
return _types(_collect(gen))
|
|
|
|
|
|
def test_emits_loop_breaker_triggered_on_repeated_no_progress(monkeypatch):
|
|
_patch_common(monkeypatch)
|
|
# Same exact tool call every round, no answer text -> stuck-round streak
|
|
# trips the loop-breaker once the cap is reached.
|
|
events = _run_loop(monkeypatch, "```bash\necho hi\n```", max_rounds=8)
|
|
lb = [e for e in events if e.get("type") == "loop_breaker_triggered"]
|
|
assert lb, events
|
|
e = lb[0]
|
|
assert e["reason"]
|
|
assert e["max_stuck_rounds"] == 4
|
|
assert e["stuck_rounds"] >= 4
|
|
assert "message" in e
|
|
|
|
|
|
def test_no_loop_breaker_on_normal_finish(monkeypatch):
|
|
_patch_common(monkeypatch)
|
|
events = _run_loop(monkeypatch, "All done, here is your answer.", max_rounds=8)
|
|
assert not any(e.get("type") == "loop_breaker_triggered" for e in events), events
|
|
|
|
|
|
def test_emits_intent_nudge_exhausted_when_cap_reached(monkeypatch):
|
|
_patch_common(monkeypatch)
|
|
# The model keeps announcing an action with no tool call. After the nudge
|
|
# cap is spent, the turn ends with an explicit intent_nudge_exhausted event.
|
|
events = _run_loop(monkeypatch, "Let me check the logs now", max_rounds=5)
|
|
inx = [e for e in events if e.get("type") == "intent_nudge_exhausted"]
|
|
assert inx, events
|
|
e = inx[0]
|
|
assert e["max_nudges"] == 2
|
|
assert e["nudges"] >= 2
|
|
assert "message" in e
|
|
|
|
|
|
def test_no_intent_nudge_exhausted_on_normal_finish(monkeypatch):
|
|
_patch_common(monkeypatch)
|
|
events = _run_loop(monkeypatch, "Here is the complete answer to your question.", max_rounds=5)
|
|
assert not any(e.get("type") == "intent_nudge_exhausted" for e in events), events
|
|
|
|
|
|
def test_redacts_sensitive_tool_output_before_surfacing():
|
|
text = al._redact_sensitive_text(
|
|
"password: private-value\n"
|
|
"api_key=private-key\n"
|
|
"Authorization: Bearer private-token\n"
|
|
"normal output"
|
|
)
|
|
|
|
assert "private-value" not in text
|
|
assert "private-key" not in text
|
|
assert "private-token" not in text
|
|
assert "password: [redacted]" in text
|
|
assert "api_key=[redacted]" in text
|
|
assert "Authorization: Bearer [redacted]" in text
|
|
assert "normal output" in text
|