fix(agent): surface early loop-guard stops

This commit is contained in:
Alexandre Teixeira
2026-06-13 17:14:45 +01:00
parent 2404b00f18
commit ba73ddcafe
4 changed files with 193 additions and 11 deletions
+60 -11
View File
@@ -11,6 +11,7 @@ import collections
import json
import re
import time
import re
import logging
from typing import AsyncGenerator, List, Dict, Optional, Set
from urllib.parse import urlparse
@@ -38,6 +39,27 @@ from src.agent_tools import (
logger = logging.getLogger(__name__)
_SENSITIVE_BEARER_RE = re.compile(
r"(?i)\b(authorization\s*[:=]\s*bearer\s+)[A-Za-z0-9._~+/=-]+"
)
_SENSITIVE_VALUE_RE = re.compile(
r"(?i)\b(password|passwd|pwd|token|api[_-]?key|secret)\b"
r"(\s*[:=]\s*)"
r"([^\s,;]+)"
)
def _redact_sensitive_text(value: object) -> str:
"""Redact obvious credential values before surfacing tool output."""
if value is None:
return ""
text = str(value)
text = _SENSITIVE_BEARER_RE.sub(r"\1[redacted]", text)
return _SENSITIVE_VALUE_RE.sub(r"\1\2[redacted]", text)
def _load_mcp_disabled_map() -> Dict[str, set]:
"""Load per-server disabled tool sets from the database."""
@@ -1800,6 +1822,7 @@ async def stream_agent_loop(
# signatures + consecutive no-text tool rounds to bail early.
_recent_call_sigs = collections.deque(maxlen=6)
_stuck_rounds = 0
_MAX_STUCK_ROUNDS = 4 # consecutive no-progress rounds before loop-breaker bails
# Frequency of each exact call signature (tool + args), for the runaway
# backstop. Counting identical repeats — not distinct same-tool calls —
# lets a legit batch (e.g. 18 calendar events at once) through.
@@ -2212,13 +2235,13 @@ async def stream_agent_loop(
# promise: short response (<400 chars), no fenced code/answer,
# and an action-intent phrase was matched. Long answers that
# happen to contain "let me know" are not stalls.
_looks_like_promise = (
_promise_shape = (
not guide_only
and _intent_match is not None
and len(_intent_text) < 400
and "```" not in _intent_text
and _intent_nudge_count < _MAX_INTENT_NUDGES
)
_looks_like_promise = _promise_shape and _intent_nudge_count < _MAX_INTENT_NUDGES
if _looks_like_promise:
_intent_nudge_count += 1
_matched_phrase = _intent_match.group(0).strip()
@@ -2238,6 +2261,21 @@ async def stream_agent_loop(
# Visible signal in the stream so the user knows we caught it.
yield f'data: {json.dumps({"type": "agent_step", "round": round_num + 1})}\n\n'
continue
# The model keeps announcing actions it never takes and we've spent
# every nudge — surface why the turn is ending instead of letting it
# look like a clean completion.
if _promise_shape and _intent_nudge_count >= _MAX_INTENT_NUDGES:
_matched_phrase = _intent_match.group(0).strip()
_in_message = (
f"Intent-nudge cap reached on round {round_num}: the model "
f"announced an action ({_matched_phrase!r}) without a tool call "
f"after {_intent_nudge_count} nudge(s); ending the turn."
)
logger.warning(
"[agent] intent-nudge cap exhausted on round %d (%d/%d): %r",
round_num, _intent_nudge_count, _MAX_INTENT_NUDGES, _matched_phrase,
)
yield f'data: {json.dumps({"type": "intent_nudge_exhausted", "round": round_num, "nudges": _intent_nudge_count, "max_nudges": _MAX_INTENT_NUDGES, "message": _in_message})}\n\n'
break # no tools — done
# ── Loop-breaker (Terminus-style stall detector) ──────────────
@@ -2270,10 +2308,21 @@ async def stream_agent_loop(
# Distinct calls to one tool (a real batch) are legitimate work, so we
# count identical call signatures, not raw per-tool-type totals.
_runaway = _detect_runaway_call(_call_freq)
if _stuck_rounds >= 4 or _runaway:
if _stuck_rounds >= _MAX_STUCK_ROUNDS or _runaway:
reason = (f"calling {_runaway} with identical arguments over and over" if _runaway
else "repeating the same tool calls without new progress")
logger.warning(f"[agent] loop-breaker tripped on round {round_num} ({reason}); sig={_sig[:80]!r}")
_lb_message = (
f"Loop-breaker stopped the agent on round {round_num}: {reason}. "
"Forced one tool-free round to converge on an answer or state what's blocked."
)
logger.warning(
"[agent] loop-breaker tripped on round %d (%s); "
"stuck_rounds=%d/%d runaway=%r sig=%r",
round_num, reason, _stuck_rounds, _MAX_STUCK_ROUNDS, _runaway, _sig[:80],
)
# Surface the stop cause to the stream so the user (and journalctl)
# can tell a guard fired, not a clean completion.
yield f'data: {json.dumps({"type": "loop_breaker_triggered", "round": round_num, "reason": reason, "stuck_rounds": _stuck_rounds, "max_stuck_rounds": _MAX_STUCK_ROUNDS, "runaway": _runaway, "message": _lb_message})}\n\n'
# The model has been executing tools, so its results are already
# in context. Force ONE tool-free round to converge: write the
# answer from what it has, or state plainly what's blocking it.
@@ -2492,28 +2541,28 @@ async def stream_agent_loop(
# On a bash/python timeout the result carries error + (often
# empty) stdout/stderr; fall back to the error so the "timed
# out" reason reaches the UI instead of a blank result.
output_text = (result["stdout"] or result["stderr"] or result.get("error", ""))[:2000]
output_text = _redact_sensitive_text(result["stdout"] or result["stderr"] or result.get("error", ""))[:2000]
elif "output" in result:
# bash / python canonical result: {"output": ..., "exit_code": ...}
output_text = (result["output"] or "")[:2000]
output_text = _redact_sensitive_text(result["output"] or "")[:2000]
elif "response" in result:
# AI interaction tools (chat_with_model, send_to_session)
label = result.get("model", result.get("session_name", "AI"))
output_text = f"{label}: {result['response']}"[:4000]
output_text = _redact_sensitive_text(f"{label}: {result['response']}")[:4000]
elif "content" in result:
output_text = result["content"][:2000]
output_text = _redact_sensitive_text(result["content"])[:2000]
elif "results" in result:
output_text = result["results"][:4000]
output_text = _redact_sensitive_text(result["results"])[:4000]
elif "session_id" in result and "name" in result:
output_text = f"Session created: {result['name']} (id: {result['session_id']})"
elif "success" in result:
output_text = (
f"Written: {result.get('path', '')}"
if result["success"]
else f"Error: {result.get('error', '')}"
else f"Error: {_redact_sensitive_text(result.get('error', ''))}"
)
elif "error" in result:
output_text = result["error"][:2000]
output_text = _redact_sensitive_text(result["error"])[:2000]
# Emit tool_output (include ui_event data if present)
tool_output_data = {"type": "tool_output", "tool": block.tool_type, "command": cmd_display, "output": output_text, "exit_code": result.get("exit_code")}