fix(agent): surface early loop-guard stops

2026-06-15 17:25:26 -04:00 · 2026-06-13 17:14:45 +01:00
parent 270b8570fc
commit ff5bcd9864
4 changed files with 193 additions and 77 deletions
@@ -0,0 +1,114 @@
+"""Regression: stream_agent_loop surfaces *why* a guard ended the turn.
+
+Two internal guards used to stop the agent in ways that looked like a clean
+completion or a vague blocked message:
+
+  * the loop-breaker stall detector -> now emits `loop_breaker_triggered`
+  * the intent-without-action nudge cap -> now emits `intent_nudge_exhausted`
+
+These tests run the real loop body against a fake LLM stream (no model calls,
+no sleeps) and assert the structured stop event is emitted.
+"""
+
+import asyncio
+import json
+
+import src.agent_loop as al
+
+
+def _collect(gen):
+    async def _run():
+        return [c async for c in gen]
+    return asyncio.run(_run())
+
+
+def _types(chunks):
+    out = []
+    for c in chunks:
+        if c.startswith("data: ") and not c.startswith("data: [DONE]"):
+            try:
+                out.append(json.loads(c[6:]))
+            except Exception:
+                pass
+    return out
+
+
+def _patch_common(monkeypatch):
+    monkeypatch.setattr(al, "get_setting", lambda key, default=None: default, raising=False)
+    monkeypatch.setattr(al, "get_mcp_manager", lambda: None, raising=False)
+    monkeypatch.setattr(al, "estimate_tokens", lambda *a, **k: 10, raising=False)
+
+    async def _fake_exec(block, *a, **k):
+        return ("bash", {"output": "ok", "exit_code": 0})
+    monkeypatch.setattr(al, "execute_tool_block", _fake_exec, raising=False)
+
+
+def _run_loop(monkeypatch, round_text, max_rounds, relevant_tools={"bash"}):
+    async def _fake_stream(_candidates, messages, **kwargs):
+        yield f'data: {json.dumps({"delta": round_text})}\n\n'
+        yield "data: [DONE]\n\n"
+    monkeypatch.setattr(al, "stream_llm_with_fallback", _fake_stream, raising=False)
+
+    gen = al.stream_agent_loop(
+        "http://x/v1", "m",
+        [{"role": "user", "content": "do a long multi-step task"}],
+        max_rounds=max_rounds,
+        relevant_tools=relevant_tools,
+    )
+    return _types(_collect(gen))
+
+
+def test_emits_loop_breaker_triggered_on_repeated_no_progress(monkeypatch):
+    _patch_common(monkeypatch)
+    # Same exact tool call every round, no answer text -> stuck-round streak
+    # trips the loop-breaker once the cap is reached.
+    events = _run_loop(monkeypatch, "```bash\necho hi\n```", max_rounds=8)
+    lb = [e for e in events if e.get("type") == "loop_breaker_triggered"]
+    assert lb, events
+    e = lb[0]
+    assert e["reason"]
+    assert e["max_stuck_rounds"] == 4
+    assert e["stuck_rounds"] >= 4
+    assert "message" in e
+
+
+def test_no_loop_breaker_on_normal_finish(monkeypatch):
+    _patch_common(monkeypatch)
+    events = _run_loop(monkeypatch, "All done, here is your answer.", max_rounds=8)
+    assert not any(e.get("type") == "loop_breaker_triggered" for e in events), events
+
+
+def test_emits_intent_nudge_exhausted_when_cap_reached(monkeypatch):
+    _patch_common(monkeypatch)
+    # The model keeps announcing an action with no tool call. After the nudge
+    # cap is spent, the turn ends with an explicit intent_nudge_exhausted event.
+    events = _run_loop(monkeypatch, "Let me check the logs now", max_rounds=5)
+    inx = [e for e in events if e.get("type") == "intent_nudge_exhausted"]
+    assert inx, events
+    e = inx[0]
+    assert e["max_nudges"] == 2
+    assert e["nudges"] >= 2
+    assert "message" in e
+
+
+def test_no_intent_nudge_exhausted_on_normal_finish(monkeypatch):
+    _patch_common(monkeypatch)
+    events = _run_loop(monkeypatch, "Here is the complete answer to your question.", max_rounds=5)
+    assert not any(e.get("type") == "intent_nudge_exhausted" for e in events), events
+
+
+def test_redacts_sensitive_tool_output_before_surfacing():
+    text = al._redact_sensitive_text(
+        "password: private-value\n"
+        "api_key=private-key\n"
+        "Authorization: Bearer private-token\n"
+        "normal output"
+    )
+
+    assert "private-value" not in text
+    assert "private-key" not in text
+    assert "private-token" not in text
+    assert "password: [redacted]" in text
+    assert "api_key=[redacted]" in text
+    assert "Authorization: Bearer [redacted]" in text
+    assert "normal output" in text