mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-30 00:22:10 -04:00
fix(security): prevent ReDoS in agent_loop <think> stripping (#4877)
The lazy `<think>.*?</think>` pattern (one compiled `_THINK_RE`, one inline copy) is applied with `re.sub` over whole model responses. With a `<think>` opener and no closer, the engine rescans to end-of-string from every opener -> O(n^2) on attacker-influenced output (prompt injection can echo thousands of openers via tool output / retrieved content). CodeQL py/polynomial-redos. Replace both with `_strip_think_blocks`, a forward-only linear scan that is byte-for-byte equivalent to the original narrow regex: only literal `<think>`/`</think>` (any case) match, a dangling opener with no closer is left intact, and an orphan `</think>` is never stripped. Routing through the broader `text_helpers.strip_think` was avoided on purpose -- it also strips `<thinking>`, attributes and prompt echoes, which would change what the loop's progress/circling heuristics see. Adds tests/test_redos_think_blocks.py pinning regex-equivalence on a battery of well-formed/edge inputs plus a linear-time bound on hostile input.
This commit is contained in:
+38
-7
@@ -755,6 +755,38 @@ def _extract_last_user_message(messages: List[Dict]) -> str:
|
|||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_think_blocks(text: str) -> str:
|
||||||
|
"""Linear-time equivalent of
|
||||||
|
``re.sub(r'<think>.*?</think>', '', text, flags=DOTALL|IGNORECASE)``.
|
||||||
|
|
||||||
|
The lazy regex rescans to end-of-string from every ``<think>`` opener when
|
||||||
|
a closer is missing -> O(n^2) on untrusted model output (prompt injection
|
||||||
|
can echo thousands of openers). This forward-only scan pairs each opener
|
||||||
|
with the next closer in a single pass. Output is byte-for-byte identical to
|
||||||
|
the original narrow regex: only literal ``<think>``/``</think>`` (any case)
|
||||||
|
are matched, a dangling opener with no closer is left intact, and an orphan
|
||||||
|
``</think>`` is never stripped.
|
||||||
|
"""
|
||||||
|
if not text:
|
||||||
|
return text
|
||||||
|
lowered = text.lower()
|
||||||
|
parts = []
|
||||||
|
pos = 0
|
||||||
|
while True:
|
||||||
|
start = lowered.find("<think>", pos)
|
||||||
|
if start == -1:
|
||||||
|
parts.append(text[pos:])
|
||||||
|
break
|
||||||
|
end = lowered.find("</think>", start + 7)
|
||||||
|
if end == -1:
|
||||||
|
# No closer for this opener: lazy regex matches nothing here.
|
||||||
|
parts.append(text[pos:])
|
||||||
|
break
|
||||||
|
parts.append(text[pos:start])
|
||||||
|
pos = end + 8 # len("</think>")
|
||||||
|
return "".join(parts)
|
||||||
|
|
||||||
|
|
||||||
_LOW_SIGNAL_RE = re.compile(r"^[\W_]*$", re.UNICODE)
|
_LOW_SIGNAL_RE = re.compile(r"^[\W_]*$", re.UNICODE)
|
||||||
_CASUAL_OPENING_RE = re.compile(
|
_CASUAL_OPENING_RE = re.compile(
|
||||||
r"^\s*(?:h+i+|hey+|hello+|yo+|sup+|what'?s up|wass?up|hiya|howdy|"
|
r"^\s*(?:h+i+|hey+|hello+|yo+|sup+|what'?s up|wass?up|hiya|howdy|"
|
||||||
@@ -1837,7 +1869,7 @@ async def _run_verifier_subagent(
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"[agent] verifier subagent failed: {e}")
|
logger.warning(f"[agent] verifier subagent failed: {e}")
|
||||||
return []
|
return []
|
||||||
raw = re.sub(r"<think>.*?</think>", "", raw or "", flags=re.DOTALL | re.IGNORECASE)
|
raw = _strip_think_blocks(raw or "")
|
||||||
last_v = None
|
last_v = None
|
||||||
for line in raw.splitlines():
|
for line in raw.splitlines():
|
||||||
if "VERIFICATION:" in line:
|
if "VERIFICATION:" in line:
|
||||||
@@ -2459,7 +2491,6 @@ async def stream_agent_loop(
|
|||||||
# backstop. Counting identical repeats — not distinct same-tool calls —
|
# backstop. Counting identical repeats — not distinct same-tool calls —
|
||||||
# lets a legit batch (e.g. 18 calendar events at once) through.
|
# lets a legit batch (e.g. 18 calendar events at once) through.
|
||||||
_call_freq: collections.Counter = collections.Counter()
|
_call_freq: collections.Counter = collections.Counter()
|
||||||
_THINK_RE = re.compile(r'<think>.*?</think>', re.DOTALL | re.IGNORECASE)
|
|
||||||
_force_answer = False # set by loop-breaker → next round runs with NO tools
|
_force_answer = False # set by loop-breaker → next round runs with NO tools
|
||||||
# Supervisor: how many times we've nudged the model after it announced
|
# Supervisor: how many times we've nudged the model after it announced
|
||||||
# an action without emitting the tool call. Capped to prevent a model
|
# an action without emitting the tool call. Capped to prevent a model
|
||||||
@@ -2797,7 +2828,7 @@ async def stream_agent_loop(
|
|||||||
if tool_blocks:
|
if tool_blocks:
|
||||||
logger.info(f"[agent] force-answer round {round_num}: discarding {len(tool_blocks)} ignored tool call(s)")
|
logger.info(f"[agent] force-answer round {round_num}: discarding {len(tool_blocks)} ignored tool call(s)")
|
||||||
tool_blocks = []
|
tool_blocks = []
|
||||||
if not _THINK_RE.sub("", strip_tool_blocks(round_response)).strip():
|
if not _strip_think_blocks(strip_tool_blocks(round_response)).strip():
|
||||||
# The model burned its budget gathering data but never wrote a
|
# The model burned its budget gathering data but never wrote a
|
||||||
# final answer (common with weaker models on multi-source
|
# final answer (common with weaker models on multi-source
|
||||||
# briefings). Salvage it: one blunt non-streaming synthesis call
|
# briefings). Salvage it: one blunt non-streaming synthesis call
|
||||||
@@ -2820,7 +2851,7 @@ async def stream_agent_loop(
|
|||||||
url=endpoint_url, model=model, messages=_synth_messages,
|
url=endpoint_url, model=model, messages=_synth_messages,
|
||||||
headers=headers, temperature=0.3, max_tokens=max_tokens, timeout=60,
|
headers=headers, temperature=0.3, max_tokens=max_tokens, timeout=60,
|
||||||
)
|
)
|
||||||
_synth = _THINK_RE.sub("", strip_tool_blocks(_raw or "")).strip()
|
_synth = _strip_think_blocks(strip_tool_blocks(_raw or "")).strip()
|
||||||
except Exception as _e:
|
except Exception as _e:
|
||||||
logger.warning(f"[agent] grace synthesis failed: {_e}")
|
logger.warning(f"[agent] grace synthesis failed: {_e}")
|
||||||
if _synth:
|
if _synth:
|
||||||
@@ -2882,7 +2913,7 @@ async def stream_agent_loop(
|
|||||||
# the model fix them (capped, and it must do new effectful work
|
# the model fix them (capped, and it must do new effectful work
|
||||||
# to re-trigger). Skipped on force-answer rounds (no tools to
|
# to re-trigger). Skipped on force-answer rounds (no tools to
|
||||||
# fix with), pure Q&A, and when the toggle is off.
|
# fix with), pure Q&A, and when the toggle is off.
|
||||||
_claimed_done = bool(_THINK_RE.sub("", cleaned_round).strip())
|
_claimed_done = bool(_strip_think_blocks(cleaned_round).strip())
|
||||||
if (_effectful_used and not _force_answer
|
if (_effectful_used and not _force_answer
|
||||||
and _claimed_done
|
and _claimed_done
|
||||||
and _verifier_rounds < _VERIFIER_MAX_ROUNDS
|
and _verifier_rounds < _VERIFIER_MAX_ROUNDS
|
||||||
@@ -2926,7 +2957,7 @@ async def stream_agent_loop(
|
|||||||
# actual tool now") and loop again. Capped at
|
# actual tool now") and loop again. Capped at
|
||||||
# _MAX_INTENT_NUDGES so a model that genuinely cannot use the
|
# _MAX_INTENT_NUDGES so a model that genuinely cannot use the
|
||||||
# tool doesn't pin us in a forever loop.
|
# tool doesn't pin us in a forever loop.
|
||||||
_intent_text = _THINK_RE.sub("", cleaned_round).strip()
|
_intent_text = _strip_think_blocks(cleaned_round).strip()
|
||||||
_intent_match = _INTENT_RE.search(_intent_text) if _intent_text else None
|
_intent_match = _INTENT_RE.search(_intent_text) if _intent_text else None
|
||||||
# Only nudge when the round REALLY looks like an unfinished
|
# Only nudge when the round REALLY looks like an unfinished
|
||||||
# promise: short response (<400 chars), no fenced code/answer,
|
# promise: short response (<400 chars), no fenced code/answer,
|
||||||
@@ -2989,7 +3020,7 @@ async def stream_agent_loop(
|
|||||||
# "Real" answer text = round text minus <think> blocks. Empty-think
|
# "Real" answer text = round text minus <think> blocks. Empty-think
|
||||||
# rounds (just "<think>\n\n</think>" + a tool call) must not read as
|
# rounds (just "<think>\n\n</think>" + a tool call) must not read as
|
||||||
# progress, so strip think before checking.
|
# progress, so strip think before checking.
|
||||||
_real_text = _THINK_RE.sub("", cleaned_round).strip()
|
_real_text = _strip_think_blocks(cleaned_round).strip()
|
||||||
# Circling = repeating a recent call with nothing written. Any
|
# Circling = repeating a recent call with nothing written. Any
|
||||||
# progress (a NEW distinct call, or actual answer text) resets it.
|
# progress (a NEW distinct call, or actual answer text) resets it.
|
||||||
if _is_repeat and not _real_text:
|
if _is_repeat and not _real_text:
|
||||||
|
|||||||
@@ -0,0 +1,89 @@
|
|||||||
|
"""Regression tests for ReDoS in agent_loop's `<think>...</think>` stripping.
|
||||||
|
|
||||||
|
CodeQL flagged `py/polynomial-redos` on the lazy `<think>.*?</think>` pattern
|
||||||
|
used in `src/agent_loop.py` (one compiled `_THINK_RE`, one inline copy). It is
|
||||||
|
applied with `re.sub` over a whole model response. When the closing delimiter
|
||||||
|
is missing, the engine rescans to end-of-string from every `<think>` opener ->
|
||||||
|
O(n^2) on attacker-influenced input (prompt injection via tool output /
|
||||||
|
retrieved content echoed back by the model).
|
||||||
|
|
||||||
|
The fix replaces the regex with `_strip_think_blocks`, a forward-only linear
|
||||||
|
scan that is byte-for-byte equivalent to the original
|
||||||
|
`re.sub(r'<think>.*?</think>', '', text, flags=DOTALL|IGNORECASE)`.
|
||||||
|
|
||||||
|
These tests pin BOTH halves:
|
||||||
|
* output is identical to the reference regex for legitimate inputs, and
|
||||||
|
* pathological "many openers, no closer" input completes promptly.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
|
from src.agent_loop import _strip_think_blocks
|
||||||
|
|
||||||
|
# The exact pattern this fix replaces. Used only as an equivalence oracle on
|
||||||
|
# well-formed inputs (never on the adversarial one, where it is the slow path).
|
||||||
|
_REFERENCE_RE = re.compile(r"<think>.*?</think>", re.DOTALL | re.IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
|
def _reference(text: str) -> str:
|
||||||
|
return _REFERENCE_RE.sub("", text or "")
|
||||||
|
|
||||||
|
|
||||||
|
# Loose ceiling: the linear helper finishes in well under 100ms; the vulnerable
|
||||||
|
# regex took seconds-to-tens-of-seconds on the same input.
|
||||||
|
_BUDGET_S = 4.0
|
||||||
|
|
||||||
|
|
||||||
|
# -- equivalence with the original regex -------------------------------------
|
||||||
|
|
||||||
|
EQUIV_CASES = [
|
||||||
|
"",
|
||||||
|
"no tags here at all",
|
||||||
|
"<think>hidden</think>visible",
|
||||||
|
"before<think>cot</think>after",
|
||||||
|
"a<think>one</think>b<think>two</think>c",
|
||||||
|
"<think>only</think>",
|
||||||
|
"<think></think>tail",
|
||||||
|
"<think>a<think>nested</think>rest", # lazy stops at first closer
|
||||||
|
"leading</think>orphan<think>x</think>", # orphan closer is NOT stripped
|
||||||
|
"trailing<think>no closer for this one", # dangling opener kept verbatim
|
||||||
|
"CASE <THINK>UP</THINK> mix <Think>x</Think>", # case-insensitive
|
||||||
|
"multi\nline\n<think>a\nb\nc</think>\nkeep", # DOTALL across newlines
|
||||||
|
"<thinking>not matched by narrow regex</thinking>", # only literal <think>
|
||||||
|
"<think >space-in-tag not matched</think >", # literal tag only
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_strip_think_blocks_matches_reference_regex():
|
||||||
|
for case in EQUIV_CASES:
|
||||||
|
assert _strip_think_blocks(case) == _reference(case), repr(case)
|
||||||
|
|
||||||
|
|
||||||
|
def test_empty_and_none_safe():
|
||||||
|
assert _strip_think_blocks("") == ""
|
||||||
|
assert _strip_think_blocks(None) in (None, "")
|
||||||
|
|
||||||
|
|
||||||
|
# -- ReDoS bound -------------------------------------------------------------
|
||||||
|
|
||||||
|
def test_many_openers_no_closer_is_linear():
|
||||||
|
# Attacker echoes thousands of "<think>" with no closer. The lazy regex
|
||||||
|
# rescans to EOS from each opener (O(n^2)); the helper scans once.
|
||||||
|
hostile = "<think>" * 60_000 + "x"
|
||||||
|
start = time.perf_counter()
|
||||||
|
out = _strip_think_blocks(hostile)
|
||||||
|
elapsed = time.perf_counter() - start
|
||||||
|
# No closer anywhere -> nothing is stripped, input returned intact.
|
||||||
|
assert out == hostile
|
||||||
|
assert elapsed < _BUDGET_S, f"took {elapsed:.2f}s (expected linear)"
|
||||||
|
|
||||||
|
|
||||||
|
def test_openers_then_one_far_closer_is_linear():
|
||||||
|
hostile = "<think>" * 60_000 + "</think>" + "tail"
|
||||||
|
start = time.perf_counter()
|
||||||
|
out = _strip_think_blocks(hostile)
|
||||||
|
elapsed = time.perf_counter() - start
|
||||||
|
# First opener pairs with the single closer; lazy match spans to it.
|
||||||
|
assert out == "tail"
|
||||||
|
assert elapsed < _BUDGET_S, f"took {elapsed:.2f}s (expected linear)"
|
||||||
Reference in New Issue
Block a user