fix(security): prevent ReDoS in XML and args tool-call parsers (#4941)

* fix(security): prevent ReDoS in XML and args tool-call parsers

Four py/polynomial-redos sinks in tool_parsing.py ran lazy/greedy regexes over
untrusted model output (tool-call markup is attacker-influenced via prompt
injection). When the closing delimiter was absent, each rescanned to
end-of-string from every opener -> O(n^2):

  - args => { ... } in _parse_tool_call_block: greedy \{([\s\S]*)\} restarted
    from every `args:{` opener. Now finds the opener once and takes through the
    last `}` (rfind) — equivalent capture, O(n).
  - _XML_INVOKE_RE: lazy <invoke ...>([\s\S]*?)</invoke>. Now _iter_xml_invoke
    pairs each opener with the first reachable </invoke> and stops when none is.
  - _XML_DIRECT_TOOL_RE and the <tag>([\s\S]*?)</\1> param scan in
    _parse_tool_code_block: lazy backreference patterns. Now _iter_backref_blocks
    pairs each opener with the nearest matching closer and memoizes tag names
    with no remaining closer, so an opener flood stays O(n).

All four are output-equivalent to the originals on well-formed tool-call markup;
the lazy patterns remain defined (still re-exported via agent_tools) but no
longer drive a finditer over untrusted text. Adds tests/test_redos_xml_tool_parsers.py
pinning correctness and bounding the opener-flood inputs (old paths took 4-15s).

* fix(security): harden invoke-parameter and distinct-name tag scans

Forward-only the two residual ReDoS paths in the XML/tool parsers that the
outer-delimiter fix left quadratic:

- _parse_xml_invoke parsed <parameter> with _XML_PARAM_RE.finditer, so a
  closed <invoke> body full of unclosed <parameter> openers rescanned the
  body from every opener (O(n^2), ~11s at 8k openers). Now scans forward-only
  via _iter_named_blocks, factored out of _iter_xml_invoke.
- _iter_backref_blocks only memoized repeated missing tag names; a flood of
  distinct unclosed names searched the suffix once per name (O(n^2)). It now
  indexes every closer by name in one linear pass and binary-searches per
  opener (O(n log n)). Covers the direct and tool_code backref scans.

Output-equivalent to the prior scanners (200k randomized trials match the
memoized version for both the direct ci=True and tool_code ci=False configs).
Adds regressions for the closed-invoke parameter flood and the distinct-name
floods (45k openers now run in ~0.05s, were 5-6s).
This commit is contained in:
nopoz
2026-06-27 15:42:55 -07:00
committed by GitHub
parent df9907c09f
commit fbe3a0d73b
2 changed files with 315 additions and 28 deletions
+118 -28
View File
@@ -6,6 +6,7 @@ Supports fenced code blocks, [TOOL_CALL] blocks, and XML-style <invoke> blocks.
"""
import ast
import bisect
import json
import logging
import re
@@ -70,6 +71,27 @@ _XML_DIRECT_TOOL_RE = re.compile(
r"<\s*([A-Za-z_][\w-]*)\s*>([\s\S]*?)</\s*\1\s*>",
re.IGNORECASE,
)
# Forward-only delimiters for the lazy XML patterns above, so untrusted "many
# openers, no closer" model output can't drive finditer's O(n^2) lazy rescan
# (CodeQL py/polynomial-redos). Consumed by _iter_xml_invoke / _iter_xml_direct.
_XML_INVOKE_OPEN_RE = re.compile(r'<invoke\s+name=["\'](\w+)["\']>\s*', re.IGNORECASE)
_XML_INVOKE_CLOSE_RE = re.compile(r'</invoke>', re.IGNORECASE)
_XML_DIRECT_OPEN_RE = re.compile(r"<\s*([A-Za-z_][\w-]*)\s*>", re.IGNORECASE)
# Split <parameter ...>...</parameter> delimiters: the parameter scan inside an
# invoke body is forward-only too, so a closed invoke stuffed with unclosed
# parameter openers can't drive finditer's O(n^2) rescan. See _iter_named_blocks.
_XML_PARAM_OPEN_RE = re.compile(r'<parameter\s+name=["\'](\w+)["\']>', re.IGNORECASE)
_XML_PARAM_CLOSE_RE = re.compile(r'</parameter>', re.IGNORECASE)
# Closer tokens (any tag name) for the backref scanners, pre-indexed by name so a
# flood of distinct unclosed tag names stays near-linear. See _iter_backref_blocks.
_XML_DIRECT_CLOSE_ANY_RE = re.compile(r"</\s*([A-Za-z_][\w-]*)\s*>", re.IGNORECASE)
# `args => { ... }` opener (its closer is the last `}`, found with rfind) and the
# `<tag>` opener for tool_code XML params — both split out of greedy/backref
# patterns that finditer would otherwise rescan from every opener. See
# _parse_tool_call_block / _parse_tool_code_block.
_ARGS_BRACE_OPEN_RE = re.compile(r'args\s*(?:=>|:|=)\s*\{')
_TOOL_CODE_PARAM_OPEN_RE = re.compile(r"<(\w+)>")
_TOOL_CODE_PARAM_CLOSE_ANY_RE = re.compile(r"</(\w+)>")
# Pattern 3b: StepFun Step-3.x native tool-call tokens. The tokenizer defines:
# <tool▁calls▁begin> ... <tool▁calls▁end>
@@ -507,11 +529,15 @@ def _parse_tool_call_block(raw: str) -> Optional[ToolBlock]:
if cmd_match:
content = cmd_match.group(1)
# Pattern: args => {content} — extract everything inside the nested braces
# Pattern: args => {content} — extract everything inside the nested braces.
# Find the opener, then take through the LAST `}` (rfind). Equivalent to the
# greedy `\{([\s\S]*)\}` capture, but the bounded opener + rfind avoids
# finditer rescanning from every `args:{` opener (CodeQL py/polynomial-redos).
if not content:
args_match = re.search(r'args\s*(?:=>|:|=)\s*\{([\s\S]*)\}', raw, re.DOTALL)
if args_match:
inner = args_match.group(1).strip()
am = _ARGS_BRACE_OPEN_RE.search(raw)
close = raw.rfind('}')
if am and close >= am.end():
inner = raw[am.end():close].strip()
# Strip quotes and key prefixes
inner = re.sub(r'^--?\w+\s+', '', inner)
inner = inner.strip('\'"')
@@ -539,8 +565,8 @@ def _parse_tool_call_block(raw: str) -> Optional[ToolBlock]:
return None
def _parse_xml_invoke(inv_match) -> Optional[ToolBlock]:
"""Parse an <invoke name="tool"><parameter ...>...</parameter></invoke> match.
def _parse_xml_invoke(name, body) -> Optional[ToolBlock]:
"""Parse an <invoke name="tool"><parameter ...>...</parameter></invoke> call.
Delegates content-shaping to function_call_to_tool_block — the SAME
converter used for native function calls — so the full tool set (every
@@ -555,17 +581,16 @@ def _parse_xml_invoke(inv_match) -> Optional[ToolBlock]:
# (e.g. <invoke name="Bash">) and function_call_to_tool_block matches
# case-sensitively against the lowercase _TOOL_NAME_MAP / TOOL_TAGS, so a
# raw capitalized name would be silently dropped.
tool_name = inv_match.group(1).lower()
body = inv_match.group(2)
tool_name = name.lower()
params = {}
for pm in _XML_PARAM_RE.finditer(body):
params[pm.group(1)] = pm.group(2).strip()
for pname, pval in _iter_named_blocks(body, _XML_PARAM_OPEN_RE, _XML_PARAM_CLOSE_RE):
params[pname] = pval.strip()
# Local import to avoid a circular import at module load.
from src.tool_schemas import function_call_to_tool_block
return function_call_to_tool_block(tool_name, json.dumps(params))
def _parse_xml_direct_tool(tool_match) -> Optional[ToolBlock]:
def _parse_xml_direct_tool(name, body) -> Optional[ToolBlock]:
"""Parse direct XML tool tags inside <tool_call>.
Some local models emit:
@@ -575,13 +600,13 @@ def _parse_xml_direct_tool(tool_match) -> Optional[ToolBlock]:
Keep this as an adapter to the canonical function-call converter so aliases
and per-tool argument formatting stay in one place.
"""
tool_name = tool_match.group(1).lower().replace("-", "_")
tool_name = name.lower().replace("-", "_")
if tool_name in {"invoke", "parameter", "tool_call", "function_call"}:
return None
mapped = _TOOL_NAME_MAP.get(tool_name) or (tool_name if tool_name in TOOL_TAGS else None)
if not mapped:
return None
body = tool_match.group(2).strip()
body = body.strip()
if not body:
return None
try:
@@ -716,10 +741,12 @@ def _parse_tool_code_block(raw: str) -> Optional[ToolBlock]:
args_match = re.search(r"args\s*=>\s*['\"]?\s*([\s\S]*?)\s*['\"]?\s*$", raw, re.DOTALL)
args_body = args_match.group(1).strip().strip("'\"") if args_match else ""
# Parse XML params inside args (e.g. <command>ls</command>)
# Parse XML params inside args (e.g. <command>ls</command>). Forward-only
# backref scan so a `<x><x>...` opener flood can't drive the O(n^2) lazy
# rescan (CodeQL py/polynomial-redos); see _iter_backref_blocks.
xml_params = {}
for pm in re.finditer(r"<(\w+)>([\s\S]*?)</\1>", args_body):
xml_params[pm.group(1)] = pm.group(2).strip()
for pname, pval in _iter_backref_blocks(args_body, _TOOL_CODE_PARAM_OPEN_RE, _TOOL_CODE_PARAM_CLOSE_ANY_RE):
xml_params[pname] = pval.strip()
# When the model gave structured params, hand them to the canonical
# converter (same as native calls + <invoke>) so the full tool set and
@@ -800,6 +827,69 @@ def _strip_delimited(text: str, open_re, close_re) -> str:
return "".join(out)
def _iter_named_blocks(text, open_re, close_re):
"""Forward-only equivalent of ``open_re([\\s\\S]*?)close_re`` finditer where
open_re captures a name in group 1: yield ``(name, body)``, pairing each
opener with the first ``close_re`` after it. O(n) once no closer is reachable
from an opener, no later opener has one either (see _iter_delimited), so
untrusted opener floods can't drive the lazy O(n^2) rescan."""
pos = 0
while True:
om = open_re.search(text, pos)
if om is None:
return
cm = close_re.search(text, om.end())
if cm is None:
return
yield om.group(1), text[om.end():cm.start()]
pos = cm.end()
def _iter_xml_invoke(text):
"""Forward-only ``<invoke name="..">...</invoke>`` scan (see _iter_named_blocks)."""
return _iter_named_blocks(text, _XML_INVOKE_OPEN_RE, _XML_INVOKE_CLOSE_RE)
def _iter_backref_blocks(text, open_re, close_any_re, ci=False):
"""Forward-only equivalent of an ``<tag>([\\s\\S]*?)</tag>`` backreference
finditer (same-name open/close): yield ``(name, body)``, pairing each opener
with the nearest following matching closer and skipping an opener whose
closer is unreachable.
Every closer is indexed by tag name in one linear pass, then each opener
binary-searches its own name's closer positions. A flood of distinct unclosed
tag names therefore stays O(n log n) rather than the lazy backref's O(n^2)
suffix rescan (CodeQL py/polynomial-redos); per-name memoization alone left
that distinct-name case quadratic. ``close_any_re`` matches ANY closer and
captures its tag name in group 1; ``ci`` lowercases names for matching, since
the original backref closer is case-insensitive under re.IGNORECASE."""
norm = (lambda s: s.lower()) if ci else (lambda s: s)
closer_starts = {}
closer_ends = {}
for cm in close_any_re.finditer(text):
k = norm(cm.group(1))
closer_starts.setdefault(k, []).append(cm.start())
closer_ends.setdefault(k, []).append(cm.end())
om = open_re.search(text)
while om is not None:
name = om.group(1)
k = norm(name)
resume = om.end()
starts = closer_starts.get(k)
if starts:
i = bisect.bisect_left(starts, om.end())
if i < len(starts):
yield name, text[om.end():starts[i]]
resume = closer_ends[k][i]
om = open_re.search(text, resume)
def _iter_xml_direct(text):
"""Forward-only equivalent of ``_XML_DIRECT_TOOL_RE.finditer`` (see
_iter_backref_blocks)."""
return _iter_backref_blocks(text, _XML_DIRECT_OPEN_RE, _XML_DIRECT_CLOSE_ANY_RE, ci=True)
def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]:
"""Extract executable tool blocks from LLM response text.
@@ -840,8 +930,8 @@ def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]:
# If a code block's content is an <invoke> XML call (some models wrap
# tool calls in ```python or ```xml fences), parse the invoke instead.
if '<invoke' in content:
for inv in _XML_INVOKE_RE.finditer(content):
block = _parse_xml_invoke(inv)
for inv_name, inv_body in _iter_xml_invoke(content):
block = _parse_xml_invoke(inv_name, inv_body)
if block:
blocks.append(block)
# This fenced block is <invoke> markup, not literal code. Whether or
@@ -882,13 +972,13 @@ def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]:
text, _XML_TOOL_CALL_OPEN_RE, _XML_TOOL_CALL_CLOSE_RE
):
body = text[inner_start:inner_end]
for inv in _XML_INVOKE_RE.finditer(body):
block = _parse_xml_invoke(inv)
for inv_name, inv_body in _iter_xml_invoke(body):
block = _parse_xml_invoke(inv_name, inv_body)
if block:
blocks.append(block)
if not blocks:
for direct in _XML_DIRECT_TOOL_RE.finditer(body):
block = _parse_xml_direct_tool(direct)
for d_name, d_body in _iter_xml_direct(body):
block = _parse_xml_direct_tool(d_name, d_body)
if block:
blocks.append(block)
# Some local models stream an opening <tool_call> wrapper and a
@@ -896,20 +986,20 @@ def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]:
if not blocks:
for m in _XML_OPEN_TOOL_CALL_RE.finditer(text):
body = m.group(1)
for inv in _XML_INVOKE_RE.finditer(body):
block = _parse_xml_invoke(inv)
for inv_name, inv_body in _iter_xml_invoke(body):
block = _parse_xml_invoke(inv_name, inv_body)
if block:
blocks.append(block)
if blocks:
break
for direct in _XML_DIRECT_TOOL_RE.finditer(body):
block = _parse_xml_direct_tool(direct)
for d_name, d_body in _iter_xml_direct(body):
block = _parse_xml_direct_tool(d_name, d_body)
if block:
blocks.append(block)
# Try bare <invoke> without wrapper
if not blocks:
for inv in _XML_INVOKE_RE.finditer(text):
block = _parse_xml_invoke(inv)
for inv_name, inv_body in _iter_xml_invoke(text):
block = _parse_xml_invoke(inv_name, inv_body)
if block:
blocks.append(block)
+197
View File
@@ -0,0 +1,197 @@
"""Regression tests for the remaining ReDoS sinks in tool_parsing.py.
A previous fix (test_redos_llm_parsers.py) hardened the delimiter-bounded
[TOOL_CALL]/<tool_call>/<tool_code> scanners but explicitly left four patterns
that CodeQL (py/polynomial-redos) flagged on the next rescan:
* `args => { ... }` in `_parse_tool_call_block` — greedy `\\{([\\s\\S]*)\\}`
that `re.search` restarts from every `args:{` opener -> O(n^2).
* `_XML_INVOKE_RE` — lazy `<invoke ...>([\\s\\S]*?)</invoke>` that rescans to
end-of-string from every opener when no `</invoke>` follows.
* `_XML_DIRECT_TOOL_RE` and the `<tag>([\\s\\S]*?)</\\1>` param scan in
`_parse_tool_code_block` — lazy *backreference* patterns with the same
opener-flood blowup.
These run over untrusted model output (tool-call markup is attacker-influenced
via prompt injection), so each is now a forward-only scan. The tests pin:
* correctness is unchanged for legitimate tool-call markup, and
* pathological "many openers, no closer" inputs complete promptly.
The timing bound is loose (seconds) so it never flakes on a slow CI box; the
unguarded patterns took 2-15s on these inputs, so the margin is ~100x.
"""
import time
import pytest
import src.agent_tools # noqa: F401 (break agent_tools<->tool_parsing import cycle)
from src.tool_parsing import (
parse_tool_blocks,
strip_tool_blocks,
_parse_tool_call_block,
_parse_tool_code_block,
)
_BUDGET_S = 4.0
def _timed(fn, *args):
start = time.perf_counter()
result = fn(*args)
return result, time.perf_counter() - start
# ── correctness is preserved ────────────────────────────────────────────────
def test_xml_invoke_call_still_parsed():
blocks = parse_tool_blocks(
'<tool_call><invoke name="bash"><parameter name="command">ls -la</parameter></invoke></tool_call>'
)
assert [(b.tool_type, b.content) for b in blocks] == [("bash", "ls -la")]
def test_xml_direct_tool_still_parsed():
blocks = parse_tool_blocks('<tool_call><web_search>weather today</web_search></tool_call>')
assert [(b.tool_type, b.content) for b in blocks] == [("web_search", "weather today")]
def test_xml_direct_tool_backref_is_case_insensitive():
# `</\\1>` matched case-insensitively under re.IGNORECASE; the forward-only
# scanner preserves that (mixed-case closer still pairs with its opener).
blocks = parse_tool_blocks('<tool_call><Web_Search>q</WEB_SEARCH></tool_call>')
assert [(b.tool_type, b.content) for b in blocks] == [("web_search", "q")]
def test_tool_code_xml_params_still_parsed():
blocks = parse_tool_blocks("<tool_code>{tool => 'bash', args => '<command>ls -la</command>'}</tool_code>")
assert [(b.tool_type, b.content) for b in blocks] == [("bash", "ls -la")]
def test_xml_invoke_multiple_parameters_still_parsed():
# The invoke parameter scan is forward-only; a well-formed invoke with more
# than one <parameter> must still yield every name/value pair.
blocks = parse_tool_blocks(
'<tool_call><invoke name="web_search">'
'<parameter name="query">rust traits</parameter>'
'<parameter name="time_filter">week</parameter>'
'</invoke></tool_call>'
)
assert len(blocks) == 1
assert blocks[0].tool_type == "web_search"
assert '"query": "rust traits"' in blocks[0].content
assert '"time_filter": "week"' in blocks[0].content
def test_xml_direct_distinct_tag_names_still_parsed():
# Distinct sibling tags inside <tool_call> each pair with their own closer;
# the forward-only direct scan must keep matching after the first block.
blocks = parse_tool_blocks(
'<tool_call><web_search>weather</web_search><read_file>notes.txt</read_file></tool_call>'
)
assert [(b.tool_type, b.content) for b in blocks] == [
("web_search", "weather"),
("read_file", "notes.txt"),
]
def test_tool_call_args_brace_still_parsed():
blocks = parse_tool_blocks('[TOOL_CALL]{tool => "shell", args => {--command "ls"}}[/TOOL_CALL]')
assert [(b.tool_type, b.content) for b in blocks] == [("bash", "ls")]
def test_args_brace_takes_through_last_close_brace():
# `\\{([\\s\\S]*)\\}` is greedy to the LAST `}`; the rfind-based rewrite must
# match that (keep the nested object intact, not stop at the first `}`).
block = _parse_tool_call_block('tool => "bash", args => {--command "echo {x} done"}')
assert block is not None and block.tool_type == "bash"
assert block.content == "echo {x} done"
def test_fenced_invoke_still_parsed():
blocks = parse_tool_blocks(
'```python\n<invoke name="bash"><parameter name="command">whoami</parameter></invoke>\n```'
)
assert [(b.tool_type, b.content) for b in blocks] == [("bash", "whoami")]
# ── pathological inputs no longer blow up ───────────────────────────────────
def test_args_brace_opener_flood_is_fast():
# Many `args:{` openers, no closing `}` — old greedy capture restarted from
# every opener (>10s); the bounded opener + rfind is O(n).
evil = "args:{{a" * 14000
block, dt = _timed(_parse_tool_call_block, evil)
assert dt < _BUDGET_S, f"_parse_tool_call_block took {dt:.2f}s"
assert block is None
# And through the public path, wrapped in a [TOOL_CALL] block.
_, dt2 = _timed(parse_tool_blocks, "[TOOL_CALL]{" + evil + "}[/TOOL_CALL]")
assert dt2 < _BUDGET_S, f"parse_tool_blocks took {dt2:.2f}s"
def test_xml_invoke_opener_flood_is_fast():
# Bare <invoke> opener flood, no </invoke> closer.
evil = ('<invoke name="x">' + "a" * 10) * 6000
blocks, dt = _timed(parse_tool_blocks, evil)
assert dt < _BUDGET_S, f"parse_tool_blocks took {dt:.2f}s"
assert blocks == []
def test_xml_invoke_stale_closer_before_opener_flood_is_fast():
# A lone leading </invoke> satisfies a substring guard, but no opener after
# it has a reachable closer.
evil = "</invoke>" + ('<invoke name="x">' + "a" * 10) * 6000
_, dt = _timed(parse_tool_blocks, evil)
assert dt < _BUDGET_S, f"parse_tool_blocks took {dt:.2f}s"
def test_xml_direct_backref_opener_flood_is_fast():
# <tool_call> wrapper (no </tool_call>) routes into the open-wrapper path,
# which reaches the _XML_DIRECT_TOOL_RE backreference scan: a `<a><a>...`
# flood with no `</a>` closer.
evil = "<tool_call>" + "<a><a>b" * 6000
blocks, dt = _timed(parse_tool_blocks, evil)
assert dt < _BUDGET_S, f"parse_tool_blocks took {dt:.2f}s"
assert blocks == []
def test_tool_code_param_backref_flood_is_fast():
# `<x><x>...` param flood inside tool_code args, no `</x>` closer — exercises
# the `<tag>([\\s\\S]*?)</\\1>` backreference scan in _parse_tool_code_block.
args_flood = "tool => 'bash', args => " + "<x><x>a" * 6000
block, dt = _timed(_parse_tool_code_block, args_flood)
assert dt < _BUDGET_S, f"_parse_tool_code_block took {dt:.2f}s"
# Through the public path, inside a closed <tool_code> block.
_, dt2 = _timed(parse_tool_blocks, "<tool_code>{" + args_flood + "}</tool_code>")
assert dt2 < _BUDGET_S, f"parse_tool_blocks took {dt2:.2f}s"
def test_xml_invoke_closed_with_parameter_opener_flood_is_fast():
# A CLOSED <invoke> whose body is a flood of `<parameter name=..>` openers
# with no `</parameter>` closer: the invoke delimiter pairs fine, but the
# inner parameter scan must not rescan the body from every opener (O(n^2)).
evil = ('<tool_call><invoke name="bash">'
+ '<parameter name="x">' * 6000
+ '</invoke></tool_call>')
blocks, dt = _timed(parse_tool_blocks, evil)
assert dt < _BUDGET_S, f"parse_tool_blocks took {dt:.2f}s"
# No `</parameter>` ever closes, so no params are captured.
assert len(blocks) == 1 and blocks[0].tool_type == "bash"
def test_xml_direct_distinct_name_opener_flood_is_fast():
# Distinct unclosed tag names (`<t0><t1>...`) defeat per-name memoization;
# the scan must still stay near-linear instead of searching the suffix once
# per new name.
evil = "<tool_call>" + "".join(f"<t{i}>" for i in range(45000))
blocks, dt = _timed(parse_tool_blocks, evil)
assert dt < _BUDGET_S, f"parse_tool_blocks took {dt:.2f}s"
assert blocks == []
def test_tool_code_param_distinct_name_flood_is_fast():
# Same distinct-name flood inside tool_code args, reaching the param backref
# scan in _parse_tool_code_block.
args_flood = "tool => 'bash', args => " + "".join(f"<t{i}>" for i in range(45000))
_, dt = _timed(_parse_tool_code_block, args_flood)
assert dt < _BUDGET_S, f"_parse_tool_code_block took {dt:.2f}s"