diff --git a/src/agent_loop.py b/src/agent_loop.py
index c87da45b3..0a6d54695 100644
--- a/src/agent_loop.py
+++ b/src/agent_loop.py
@@ -755,6 +755,38 @@ def _extract_last_user_message(messages: List[Dict]) -> str:
return ""
+def _strip_think_blocks(text: str) -> str:
+ """Linear-time equivalent of
+ ``re.sub(r'.*?', '', text, flags=DOTALL|IGNORECASE)``.
+
+ The lazy regex rescans to end-of-string from every ```` opener when
+ a closer is missing -> O(n^2) on untrusted model output (prompt injection
+ can echo thousands of openers). This forward-only scan pairs each opener
+ with the next closer in a single pass. Output is byte-for-byte identical to
+ the original narrow regex: only literal ````/```` (any case)
+ are matched, a dangling opener with no closer is left intact, and an orphan
+ ```` is never stripped.
+ """
+ if not text:
+ return text
+ lowered = text.lower()
+ parts = []
+ pos = 0
+ while True:
+ start = lowered.find("", pos)
+ if start == -1:
+ parts.append(text[pos:])
+ break
+ end = lowered.find("", start + 7)
+ if end == -1:
+ # No closer for this opener: lazy regex matches nothing here.
+ parts.append(text[pos:])
+ break
+ parts.append(text[pos:start])
+ pos = end + 8 # len("")
+ return "".join(parts)
+
+
_LOW_SIGNAL_RE = re.compile(r"^[\W_]*$", re.UNICODE)
_CASUAL_OPENING_RE = re.compile(
r"^\s*(?:h+i+|hey+|hello+|yo+|sup+|what'?s up|wass?up|hiya|howdy|"
@@ -1837,7 +1869,7 @@ async def _run_verifier_subagent(
except Exception as e:
logger.warning(f"[agent] verifier subagent failed: {e}")
return []
- raw = re.sub(r".*?", "", raw or "", flags=re.DOTALL | re.IGNORECASE)
+ raw = _strip_think_blocks(raw or "")
last_v = None
for line in raw.splitlines():
if "VERIFICATION:" in line:
@@ -2459,7 +2491,6 @@ async def stream_agent_loop(
# backstop. Counting identical repeats — not distinct same-tool calls —
# lets a legit batch (e.g. 18 calendar events at once) through.
_call_freq: collections.Counter = collections.Counter()
- _THINK_RE = re.compile(r'.*?', re.DOTALL | re.IGNORECASE)
_force_answer = False # set by loop-breaker → next round runs with NO tools
# Supervisor: how many times we've nudged the model after it announced
# an action without emitting the tool call. Capped to prevent a model
@@ -2797,7 +2828,7 @@ async def stream_agent_loop(
if tool_blocks:
logger.info(f"[agent] force-answer round {round_num}: discarding {len(tool_blocks)} ignored tool call(s)")
tool_blocks = []
- if not _THINK_RE.sub("", strip_tool_blocks(round_response)).strip():
+ if not _strip_think_blocks(strip_tool_blocks(round_response)).strip():
# The model burned its budget gathering data but never wrote a
# final answer (common with weaker models on multi-source
# briefings). Salvage it: one blunt non-streaming synthesis call
@@ -2820,7 +2851,7 @@ async def stream_agent_loop(
url=endpoint_url, model=model, messages=_synth_messages,
headers=headers, temperature=0.3, max_tokens=max_tokens, timeout=60,
)
- _synth = _THINK_RE.sub("", strip_tool_blocks(_raw or "")).strip()
+ _synth = _strip_think_blocks(strip_tool_blocks(_raw or "")).strip()
except Exception as _e:
logger.warning(f"[agent] grace synthesis failed: {_e}")
if _synth:
@@ -2882,7 +2913,7 @@ async def stream_agent_loop(
# the model fix them (capped, and it must do new effectful work
# to re-trigger). Skipped on force-answer rounds (no tools to
# fix with), pure Q&A, and when the toggle is off.
- _claimed_done = bool(_THINK_RE.sub("", cleaned_round).strip())
+ _claimed_done = bool(_strip_think_blocks(cleaned_round).strip())
if (_effectful_used and not _force_answer
and _claimed_done
and _verifier_rounds < _VERIFIER_MAX_ROUNDS
@@ -2926,7 +2957,7 @@ async def stream_agent_loop(
# actual tool now") and loop again. Capped at
# _MAX_INTENT_NUDGES so a model that genuinely cannot use the
# tool doesn't pin us in a forever loop.
- _intent_text = _THINK_RE.sub("", cleaned_round).strip()
+ _intent_text = _strip_think_blocks(cleaned_round).strip()
_intent_match = _INTENT_RE.search(_intent_text) if _intent_text else None
# Only nudge when the round REALLY looks like an unfinished
# promise: short response (<400 chars), no fenced code/answer,
@@ -2989,7 +3020,7 @@ async def stream_agent_loop(
# "Real" answer text = round text minus blocks. Empty-think
# rounds (just "\n\n" + a tool call) must not read as
# progress, so strip think before checking.
- _real_text = _THINK_RE.sub("", cleaned_round).strip()
+ _real_text = _strip_think_blocks(cleaned_round).strip()
# Circling = repeating a recent call with nothing written. Any
# progress (a NEW distinct call, or actual answer text) resets it.
if _is_repeat and not _real_text:
diff --git a/tests/test_redos_think_blocks.py b/tests/test_redos_think_blocks.py
new file mode 100644
index 000000000..8e619bfaf
--- /dev/null
+++ b/tests/test_redos_think_blocks.py
@@ -0,0 +1,89 @@
+"""Regression tests for ReDoS in agent_loop's `...` stripping.
+
+CodeQL flagged `py/polynomial-redos` on the lazy `.*?` pattern
+used in `src/agent_loop.py` (one compiled `_THINK_RE`, one inline copy). It is
+applied with `re.sub` over a whole model response. When the closing delimiter
+is missing, the engine rescans to end-of-string from every `` opener ->
+O(n^2) on attacker-influenced input (prompt injection via tool output /
+retrieved content echoed back by the model).
+
+The fix replaces the regex with `_strip_think_blocks`, a forward-only linear
+scan that is byte-for-byte equivalent to the original
+`re.sub(r'.*?', '', text, flags=DOTALL|IGNORECASE)`.
+
+These tests pin BOTH halves:
+ * output is identical to the reference regex for legitimate inputs, and
+ * pathological "many openers, no closer" input completes promptly.
+"""
+
+import re
+import time
+
+from src.agent_loop import _strip_think_blocks
+
+# The exact pattern this fix replaces. Used only as an equivalence oracle on
+# well-formed inputs (never on the adversarial one, where it is the slow path).
+_REFERENCE_RE = re.compile(r".*?", re.DOTALL | re.IGNORECASE)
+
+
+def _reference(text: str) -> str:
+ return _REFERENCE_RE.sub("", text or "")
+
+
+# Loose ceiling: the linear helper finishes in well under 100ms; the vulnerable
+# regex took seconds-to-tens-of-seconds on the same input.
+_BUDGET_S = 4.0
+
+
+# -- equivalence with the original regex -------------------------------------
+
+EQUIV_CASES = [
+ "",
+ "no tags here at all",
+ "hiddenvisible",
+ "beforecotafter",
+ "aonebtwoc",
+ "only",
+ "tail",
+ "anestedrest", # lazy stops at first closer
+ "leadingorphanx", # orphan closer is NOT stripped
+ "trailingno closer for this one", # dangling opener kept verbatim
+ "CASE UP mix x", # case-insensitive
+ "multi\nline\na\nb\nc\nkeep", # DOTALL across newlines
+ "not matched by narrow regex", # only literal
+ "space-in-tag not matched", # literal tag only
+]
+
+
+def test_strip_think_blocks_matches_reference_regex():
+ for case in EQUIV_CASES:
+ assert _strip_think_blocks(case) == _reference(case), repr(case)
+
+
+def test_empty_and_none_safe():
+ assert _strip_think_blocks("") == ""
+ assert _strip_think_blocks(None) in (None, "")
+
+
+# -- ReDoS bound -------------------------------------------------------------
+
+def test_many_openers_no_closer_is_linear():
+ # Attacker echoes thousands of "" with no closer. The lazy regex
+ # rescans to EOS from each opener (O(n^2)); the helper scans once.
+ hostile = "" * 60_000 + "x"
+ start = time.perf_counter()
+ out = _strip_think_blocks(hostile)
+ elapsed = time.perf_counter() - start
+ # No closer anywhere -> nothing is stripped, input returned intact.
+ assert out == hostile
+ assert elapsed < _BUDGET_S, f"took {elapsed:.2f}s (expected linear)"
+
+
+def test_openers_then_one_far_closer_is_linear():
+ hostile = "" * 60_000 + "" + "tail"
+ start = time.perf_counter()
+ out = _strip_think_blocks(hostile)
+ elapsed = time.perf_counter() - start
+ # First opener pairs with the single closer; lazy match spans to it.
+ assert out == "tail"
+ assert elapsed < _BUDGET_S, f"took {elapsed:.2f}s (expected linear)"