diff --git a/src/agent_loop.py b/src/agent_loop.py
index 7a626fb7d..b358f6a00 100644
--- a/src/agent_loop.py
+++ b/src/agent_loop.py
@@ -1437,6 +1437,18 @@ def build_active_plan_note(approved_plan: str) -> str:
)
+def _detect_runaway_call(call_freq, threshold=15):
+ """Tool name of a call signature repeated >= ``threshold`` times — a real
+ runaway loop. Counts IDENTICAL repeated calls (same tool AND args), so a
+ legitimate batch of distinct calls to one tool (e.g. creating 18 calendar
+ events at once) is NOT flagged. Returns ``None`` when nothing is runaway.
+
+ ``call_freq`` is a Counter keyed by ``"{tool_type}:{content[:120]}"``.
+ """
+ sig = next((s for s, n in call_freq.items() if n >= threshold), None)
+ return sig.split(":", 1)[0] if sig else None
+
+
async def stream_agent_loop(
endpoint_url: str,
model: str,
@@ -1774,7 +1786,10 @@ async def stream_agent_loop(
# signatures + consecutive no-text tool rounds to bail early.
_recent_call_sigs = collections.deque(maxlen=6)
_stuck_rounds = 0
- _tool_type_counts: collections.Counter = collections.Counter()
+ # Frequency of each exact call signature (tool + args), for the runaway
+ # backstop. Counting identical repeats — not distinct same-tool calls —
+ # lets a legit batch (e.g. 18 calendar events at once) through.
+ _call_freq: collections.Counter = collections.Counter()
_THINK_RE = re.compile(r'.*?', re.DOTALL | re.IGNORECASE)
_force_answer = False # set by loop-breaker → next round runs with NO tools
# Supervisor: how many times we've nudged the model after it announced
@@ -2221,7 +2236,7 @@ async def stream_agent_loop(
_is_repeat = _sig in _recent_call_sigs
_recent_call_sigs.append(_sig)
for _b in tool_blocks:
- _tool_type_counts[_b.tool_type] += 1
+ _call_freq[f"{_b.tool_type}:{(_b.content or '').strip()[:120]}"] += 1
# "Real" answer text = round text minus blocks. Empty-think
# rounds (just "\n\n" + a tool call) must not read as
# progress, so strip think before checking.
@@ -2232,9 +2247,12 @@ async def stream_agent_loop(
_stuck_rounds += 1
else:
_stuck_rounds = 0
- _runaway = next((t for t, n in _tool_type_counts.items() if n >= 15), None)
+ # Runaway = the SAME exact call repeated an absurd number of times.
+ # Distinct calls to one tool (a real batch) are legitimate work, so we
+ # count identical call signatures, not raw per-tool-type totals.
+ _runaway = _detect_runaway_call(_call_freq)
if _stuck_rounds >= 4 or _runaway:
- reason = (f"calling {_runaway} over and over" if _runaway
+ reason = (f"calling {_runaway} with identical arguments over and over" if _runaway
else "repeating the same tool calls without new progress")
logger.warning(f"[agent] loop-breaker tripped on round {round_num} ({reason}); sig={_sig[:80]!r}")
# The model has been executing tools, so its results are already
diff --git a/tests/test_loop_breaker_runaway.py b/tests/test_loop_breaker_runaway.py
new file mode 100644
index 000000000..dbea4d31f
--- /dev/null
+++ b/tests/test_loop_breaker_runaway.py
@@ -0,0 +1,61 @@
+"""Regression test for the agent loop-breaker's runaway backstop.
+
+A legitimate batch of DISTINCT tool calls (e.g. creating 18 calendar events at
+once) must not be flagged as a runaway loop. Only the SAME exact call repeated
+an absurd number of times is a real runaway. Previously the backstop counted
+per-tool-type totals, so any batch of >=15 distinct calls to one tool was
+aborted and the calls were silently discarded.
+"""
+import sys
+import collections
+from unittest.mock import MagicMock
+
+# Mock heavy deps so importing src.agent_loop doesn't load the full app stack.
+_MOCKED = [
+ 'sqlalchemy', 'sqlalchemy.orm', 'sqlalchemy.ext', 'sqlalchemy.ext.declarative',
+ 'sqlalchemy.ext.hybrid', 'sqlalchemy.sql', 'sqlalchemy.sql.expression',
+ 'src.database', 'src.agent_tools', 'core.models', 'core.database',
+]
+for _m in _MOCKED:
+ sys.modules.setdefault(_m, MagicMock())
+
+from src.agent_loop import _detect_runaway_call
+
+
+def _freq(sigs):
+ c = collections.Counter()
+ for s in sigs:
+ c[s] += 1
+ return c
+
+
+def test_distinct_batch_is_not_runaway():
+ # 18 distinct manage_calendar create_event calls (the "add 18 birthdays" case)
+ sigs = [f'manage_calendar:{{"action":"create_event","summary":"Birthday {n}"}}'
+ for n in range(18)]
+ assert _detect_runaway_call(_freq(sigs)) is None
+
+
+def test_many_distinct_same_tool_is_not_runaway():
+ sigs = [f'bash:echo {i}' for i in range(30)]
+ assert _detect_runaway_call(_freq(sigs)) is None
+
+
+def test_identical_call_repeated_is_runaway():
+ sigs = ['manage_calendar:{"action":"list_events"}'] * 15
+ assert _detect_runaway_call(_freq(sigs)) == 'manage_calendar'
+
+
+def test_below_threshold_is_not_runaway():
+ sigs = ['bash:ls'] * 14
+ assert _detect_runaway_call(_freq(sigs)) is None
+
+
+def test_threshold_is_configurable():
+ sigs = ['web_search:python'] * 5
+ assert _detect_runaway_call(_freq(sigs), threshold=5) == 'web_search'
+ assert _detect_runaway_call(_freq(sigs), threshold=6) is None
+
+
+def test_empty_is_not_runaway():
+ assert _detect_runaway_call(collections.Counter()) is None