From fbe3a0d73b5b3e66395104ed27e595d8218f3028 Mon Sep 17 00:00:00 2001 From: nopoz Date: Sat, 27 Jun 2026 15:42:55 -0700 Subject: [PATCH] fix(security): prevent ReDoS in XML and args tool-call parsers (#4941) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(security): prevent ReDoS in XML and args tool-call parsers Four py/polynomial-redos sinks in tool_parsing.py ran lazy/greedy regexes over untrusted model output (tool-call markup is attacker-influenced via prompt injection). When the closing delimiter was absent, each rescanned to end-of-string from every opener -> O(n^2): - args => { ... } in _parse_tool_call_block: greedy \{([\s\S]*)\} restarted from every `args:{` opener. Now finds the opener once and takes through the last `}` (rfind) — equivalent capture, O(n). - _XML_INVOKE_RE: lazy ([\s\S]*?). Now _iter_xml_invoke pairs each opener with the first reachable and stops when none is. - _XML_DIRECT_TOOL_RE and the ([\s\S]*?) param scan in _parse_tool_code_block: lazy backreference patterns. Now _iter_backref_blocks pairs each opener with the nearest matching closer and memoizes tag names with no remaining closer, so an opener flood stays O(n). All four are output-equivalent to the originals on well-formed tool-call markup; the lazy patterns remain defined (still re-exported via agent_tools) but no longer drive a finditer over untrusted text. Adds tests/test_redos_xml_tool_parsers.py pinning correctness and bounding the opener-flood inputs (old paths took 4-15s). * fix(security): harden invoke-parameter and distinct-name tag scans Forward-only the two residual ReDoS paths in the XML/tool parsers that the outer-delimiter fix left quadratic: - _parse_xml_invoke parsed with _XML_PARAM_RE.finditer, so a closed body full of unclosed openers rescanned the body from every opener (O(n^2), ~11s at 8k openers). Now scans forward-only via _iter_named_blocks, factored out of _iter_xml_invoke. - _iter_backref_blocks only memoized repeated missing tag names; a flood of distinct unclosed names searched the suffix once per name (O(n^2)). It now indexes every closer by name in one linear pass and binary-searches per opener (O(n log n)). Covers the direct and tool_code backref scans. Output-equivalent to the prior scanners (200k randomized trials match the memoized version for both the direct ci=True and tool_code ci=False configs). Adds regressions for the closed-invoke parameter flood and the distinct-name floods (45k openers now run in ~0.05s, were 5-6s). --- src/tool_parsing.py | 146 ++++++++++++++++---- tests/test_redos_xml_tool_parsers.py | 197 +++++++++++++++++++++++++++ 2 files changed, 315 insertions(+), 28 deletions(-) create mode 100644 tests/test_redos_xml_tool_parsers.py diff --git a/src/tool_parsing.py b/src/tool_parsing.py index fefd4efea..218b53105 100644 --- a/src/tool_parsing.py +++ b/src/tool_parsing.py @@ -6,6 +6,7 @@ Supports fenced code blocks, [TOOL_CALL] blocks, and XML-style blocks. """ import ast +import bisect import json import logging import re @@ -70,6 +71,27 @@ _XML_DIRECT_TOOL_RE = re.compile( r"<\s*([A-Za-z_][\w-]*)\s*>([\s\S]*?)", re.IGNORECASE, ) +# Forward-only delimiters for the lazy XML patterns above, so untrusted "many +# openers, no closer" model output can't drive finditer's O(n^2) lazy rescan +# (CodeQL py/polynomial-redos). Consumed by _iter_xml_invoke / _iter_xml_direct. +_XML_INVOKE_OPEN_RE = re.compile(r'\s*', re.IGNORECASE) +_XML_INVOKE_CLOSE_RE = re.compile(r'', re.IGNORECASE) +_XML_DIRECT_OPEN_RE = re.compile(r"<\s*([A-Za-z_][\w-]*)\s*>", re.IGNORECASE) +# Split ... delimiters: the parameter scan inside an +# invoke body is forward-only too, so a closed invoke stuffed with unclosed +# parameter openers can't drive finditer's O(n^2) rescan. See _iter_named_blocks. +_XML_PARAM_OPEN_RE = re.compile(r'', re.IGNORECASE) +_XML_PARAM_CLOSE_RE = re.compile(r'', re.IGNORECASE) +# Closer tokens (any tag name) for the backref scanners, pre-indexed by name so a +# flood of distinct unclosed tag names stays near-linear. See _iter_backref_blocks. +_XML_DIRECT_CLOSE_ANY_RE = re.compile(r"", re.IGNORECASE) +# `args => { ... }` opener (its closer is the last `}`, found with rfind) and the +# `` opener for tool_code XML params — both split out of greedy/backref +# patterns that finditer would otherwise rescan from every opener. See +# _parse_tool_call_block / _parse_tool_code_block. +_ARGS_BRACE_OPEN_RE = re.compile(r'args\s*(?:=>|:|=)\s*\{') +_TOOL_CODE_PARAM_OPEN_RE = re.compile(r"<(\w+)>") +_TOOL_CODE_PARAM_CLOSE_ANY_RE = re.compile(r"") # Pattern 3b: StepFun Step-3.x native tool-call tokens. The tokenizer defines: # <|tool▁calls▁begin|> ... <|tool▁calls▁end|> @@ -507,11 +529,15 @@ def _parse_tool_call_block(raw: str) -> Optional[ToolBlock]: if cmd_match: content = cmd_match.group(1) - # Pattern: args => {content} — extract everything inside the nested braces + # Pattern: args => {content} — extract everything inside the nested braces. + # Find the opener, then take through the LAST `}` (rfind). Equivalent to the + # greedy `\{([\s\S]*)\}` capture, but the bounded opener + rfind avoids + # finditer rescanning from every `args:{` opener (CodeQL py/polynomial-redos). if not content: - args_match = re.search(r'args\s*(?:=>|:|=)\s*\{([\s\S]*)\}', raw, re.DOTALL) - if args_match: - inner = args_match.group(1).strip() + am = _ARGS_BRACE_OPEN_RE.search(raw) + close = raw.rfind('}') + if am and close >= am.end(): + inner = raw[am.end():close].strip() # Strip quotes and key prefixes inner = re.sub(r'^--?\w+\s+', '', inner) inner = inner.strip('\'"') @@ -539,8 +565,8 @@ def _parse_tool_call_block(raw: str) -> Optional[ToolBlock]: return None -def _parse_xml_invoke(inv_match) -> Optional[ToolBlock]: - """Parse an ... match. +def _parse_xml_invoke(name, body) -> Optional[ToolBlock]: + """Parse an ... call. Delegates content-shaping to function_call_to_tool_block — the SAME converter used for native function calls — so the full tool set (every @@ -555,17 +581,16 @@ def _parse_xml_invoke(inv_match) -> Optional[ToolBlock]: # (e.g. ) and function_call_to_tool_block matches # case-sensitively against the lowercase _TOOL_NAME_MAP / TOOL_TAGS, so a # raw capitalized name would be silently dropped. - tool_name = inv_match.group(1).lower() - body = inv_match.group(2) + tool_name = name.lower() params = {} - for pm in _XML_PARAM_RE.finditer(body): - params[pm.group(1)] = pm.group(2).strip() + for pname, pval in _iter_named_blocks(body, _XML_PARAM_OPEN_RE, _XML_PARAM_CLOSE_RE): + params[pname] = pval.strip() # Local import to avoid a circular import at module load. from src.tool_schemas import function_call_to_tool_block return function_call_to_tool_block(tool_name, json.dumps(params)) -def _parse_xml_direct_tool(tool_match) -> Optional[ToolBlock]: +def _parse_xml_direct_tool(name, body) -> Optional[ToolBlock]: """Parse direct XML tool tags inside . Some local models emit: @@ -575,13 +600,13 @@ def _parse_xml_direct_tool(tool_match) -> Optional[ToolBlock]: Keep this as an adapter to the canonical function-call converter so aliases and per-tool argument formatting stay in one place. """ - tool_name = tool_match.group(1).lower().replace("-", "_") + tool_name = name.lower().replace("-", "_") if tool_name in {"invoke", "parameter", "tool_call", "function_call"}: return None mapped = _TOOL_NAME_MAP.get(tool_name) or (tool_name if tool_name in TOOL_TAGS else None) if not mapped: return None - body = tool_match.group(2).strip() + body = body.strip() if not body: return None try: @@ -716,10 +741,12 @@ def _parse_tool_code_block(raw: str) -> Optional[ToolBlock]: args_match = re.search(r"args\s*=>\s*['\"]?\s*([\s\S]*?)\s*['\"]?\s*$", raw, re.DOTALL) args_body = args_match.group(1).strip().strip("'\"") if args_match else "" - # Parse XML params inside args (e.g. ls) + # Parse XML params inside args (e.g. ls). Forward-only + # backref scan so a `...` opener flood can't drive the O(n^2) lazy + # rescan (CodeQL py/polynomial-redos); see _iter_backref_blocks. xml_params = {} - for pm in re.finditer(r"<(\w+)>([\s\S]*?)", args_body): - xml_params[pm.group(1)] = pm.group(2).strip() + for pname, pval in _iter_backref_blocks(args_body, _TOOL_CODE_PARAM_OPEN_RE, _TOOL_CODE_PARAM_CLOSE_ANY_RE): + xml_params[pname] = pval.strip() # When the model gave structured params, hand them to the canonical # converter (same as native calls + ) so the full tool set and @@ -800,6 +827,69 @@ def _strip_delimited(text: str, open_re, close_re) -> str: return "".join(out) +def _iter_named_blocks(text, open_re, close_re): + """Forward-only equivalent of ``open_re([\\s\\S]*?)close_re`` finditer where + open_re captures a name in group 1: yield ``(name, body)``, pairing each + opener with the first ``close_re`` after it. O(n) once no closer is reachable + from an opener, no later opener has one either (see _iter_delimited), so + untrusted opener floods can't drive the lazy O(n^2) rescan.""" + pos = 0 + while True: + om = open_re.search(text, pos) + if om is None: + return + cm = close_re.search(text, om.end()) + if cm is None: + return + yield om.group(1), text[om.end():cm.start()] + pos = cm.end() + + +def _iter_xml_invoke(text): + """Forward-only ``...`` scan (see _iter_named_blocks).""" + return _iter_named_blocks(text, _XML_INVOKE_OPEN_RE, _XML_INVOKE_CLOSE_RE) + + +def _iter_backref_blocks(text, open_re, close_any_re, ci=False): + """Forward-only equivalent of an ``([\\s\\S]*?)`` backreference + finditer (same-name open/close): yield ``(name, body)``, pairing each opener + with the nearest following matching closer and skipping an opener whose + closer is unreachable. + + Every closer is indexed by tag name in one linear pass, then each opener + binary-searches its own name's closer positions. A flood of distinct unclosed + tag names therefore stays O(n log n) rather than the lazy backref's O(n^2) + suffix rescan (CodeQL py/polynomial-redos); per-name memoization alone left + that distinct-name case quadratic. ``close_any_re`` matches ANY closer and + captures its tag name in group 1; ``ci`` lowercases names for matching, since + the original backref closer is case-insensitive under re.IGNORECASE.""" + norm = (lambda s: s.lower()) if ci else (lambda s: s) + closer_starts = {} + closer_ends = {} + for cm in close_any_re.finditer(text): + k = norm(cm.group(1)) + closer_starts.setdefault(k, []).append(cm.start()) + closer_ends.setdefault(k, []).append(cm.end()) + om = open_re.search(text) + while om is not None: + name = om.group(1) + k = norm(name) + resume = om.end() + starts = closer_starts.get(k) + if starts: + i = bisect.bisect_left(starts, om.end()) + if i < len(starts): + yield name, text[om.end():starts[i]] + resume = closer_ends[k][i] + om = open_re.search(text, resume) + + +def _iter_xml_direct(text): + """Forward-only equivalent of ``_XML_DIRECT_TOOL_RE.finditer`` (see + _iter_backref_blocks).""" + return _iter_backref_blocks(text, _XML_DIRECT_OPEN_RE, _XML_DIRECT_CLOSE_ANY_RE, ci=True) + + def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]: """Extract executable tool blocks from LLM response text. @@ -840,8 +930,8 @@ def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]: # If a code block's content is an XML call (some models wrap # tool calls in ```python or ```xml fences), parse the invoke instead. if ' markup, not literal code. Whether or @@ -882,13 +972,13 @@ def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]: text, _XML_TOOL_CALL_OPEN_RE, _XML_TOOL_CALL_CLOSE_RE ): body = text[inner_start:inner_end] - for inv in _XML_INVOKE_RE.finditer(body): - block = _parse_xml_invoke(inv) + for inv_name, inv_body in _iter_xml_invoke(body): + block = _parse_xml_invoke(inv_name, inv_body) if block: blocks.append(block) if not blocks: - for direct in _XML_DIRECT_TOOL_RE.finditer(body): - block = _parse_xml_direct_tool(direct) + for d_name, d_body in _iter_xml_direct(body): + block = _parse_xml_direct_tool(d_name, d_body) if block: blocks.append(block) # Some local models stream an opening wrapper and a @@ -896,20 +986,20 @@ def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]: if not blocks: for m in _XML_OPEN_TOOL_CALL_RE.finditer(text): body = m.group(1) - for inv in _XML_INVOKE_RE.finditer(body): - block = _parse_xml_invoke(inv) + for inv_name, inv_body in _iter_xml_invoke(body): + block = _parse_xml_invoke(inv_name, inv_body) if block: blocks.append(block) if blocks: break - for direct in _XML_DIRECT_TOOL_RE.finditer(body): - block = _parse_xml_direct_tool(direct) + for d_name, d_body in _iter_xml_direct(body): + block = _parse_xml_direct_tool(d_name, d_body) if block: blocks.append(block) # Try bare without wrapper if not blocks: - for inv in _XML_INVOKE_RE.finditer(text): - block = _parse_xml_invoke(inv) + for inv_name, inv_body in _iter_xml_invoke(text): + block = _parse_xml_invoke(inv_name, inv_body) if block: blocks.append(block) diff --git a/tests/test_redos_xml_tool_parsers.py b/tests/test_redos_xml_tool_parsers.py new file mode 100644 index 000000000..c2602b4ef --- /dev/null +++ b/tests/test_redos_xml_tool_parsers.py @@ -0,0 +1,197 @@ +"""Regression tests for the remaining ReDoS sinks in tool_parsing.py. + +A previous fix (test_redos_llm_parsers.py) hardened the delimiter-bounded +[TOOL_CALL]// scanners but explicitly left four patterns +that CodeQL (py/polynomial-redos) flagged on the next rescan: + + * `args => { ... }` in `_parse_tool_call_block` — greedy `\\{([\\s\\S]*)\\}` + that `re.search` restarts from every `args:{` opener -> O(n^2). + * `_XML_INVOKE_RE` — lazy `([\\s\\S]*?)` that rescans to + end-of-string from every opener when no `` follows. + * `_XML_DIRECT_TOOL_RE` and the `([\\s\\S]*?)` param scan in + `_parse_tool_code_block` — lazy *backreference* patterns with the same + opener-flood blowup. + +These run over untrusted model output (tool-call markup is attacker-influenced +via prompt injection), so each is now a forward-only scan. The tests pin: + * correctness is unchanged for legitimate tool-call markup, and + * pathological "many openers, no closer" inputs complete promptly. + +The timing bound is loose (seconds) so it never flakes on a slow CI box; the +unguarded patterns took 2-15s on these inputs, so the margin is ~100x. +""" + +import time + +import pytest + +import src.agent_tools # noqa: F401 (break agent_tools<->tool_parsing import cycle) +from src.tool_parsing import ( + parse_tool_blocks, + strip_tool_blocks, + _parse_tool_call_block, + _parse_tool_code_block, +) + +_BUDGET_S = 4.0 + + +def _timed(fn, *args): + start = time.perf_counter() + result = fn(*args) + return result, time.perf_counter() - start + + +# ── correctness is preserved ──────────────────────────────────────────────── + +def test_xml_invoke_call_still_parsed(): + blocks = parse_tool_blocks( + 'ls -la' + ) + assert [(b.tool_type, b.content) for b in blocks] == [("bash", "ls -la")] + + +def test_xml_direct_tool_still_parsed(): + blocks = parse_tool_blocks('weather today') + assert [(b.tool_type, b.content) for b in blocks] == [("web_search", "weather today")] + + +def test_xml_direct_tool_backref_is_case_insensitive(): + # `` matched case-insensitively under re.IGNORECASE; the forward-only + # scanner preserves that (mixed-case closer still pairs with its opener). + blocks = parse_tool_blocks('q') + assert [(b.tool_type, b.content) for b in blocks] == [("web_search", "q")] + + +def test_tool_code_xml_params_still_parsed(): + blocks = parse_tool_blocks("{tool => 'bash', args => 'ls -la'}") + assert [(b.tool_type, b.content) for b in blocks] == [("bash", "ls -la")] + + +def test_xml_invoke_multiple_parameters_still_parsed(): + # The invoke parameter scan is forward-only; a well-formed invoke with more + # than one must still yield every name/value pair. + blocks = parse_tool_blocks( + '' + 'rust traits' + 'week' + '' + ) + assert len(blocks) == 1 + assert blocks[0].tool_type == "web_search" + assert '"query": "rust traits"' in blocks[0].content + assert '"time_filter": "week"' in blocks[0].content + + +def test_xml_direct_distinct_tag_names_still_parsed(): + # Distinct sibling tags inside each pair with their own closer; + # the forward-only direct scan must keep matching after the first block. + blocks = parse_tool_blocks( + 'weathernotes.txt' + ) + assert [(b.tool_type, b.content) for b in blocks] == [ + ("web_search", "weather"), + ("read_file", "notes.txt"), + ] + + +def test_tool_call_args_brace_still_parsed(): + blocks = parse_tool_blocks('[TOOL_CALL]{tool => "shell", args => {--command "ls"}}[/TOOL_CALL]') + assert [(b.tool_type, b.content) for b in blocks] == [("bash", "ls")] + + +def test_args_brace_takes_through_last_close_brace(): + # `\\{([\\s\\S]*)\\}` is greedy to the LAST `}`; the rfind-based rewrite must + # match that (keep the nested object intact, not stop at the first `}`). + block = _parse_tool_call_block('tool => "bash", args => {--command "echo {x} done"}') + assert block is not None and block.tool_type == "bash" + assert block.content == "echo {x} done" + + +def test_fenced_invoke_still_parsed(): + blocks = parse_tool_blocks( + '```python\nwhoami\n```' + ) + assert [(b.tool_type, b.content) for b in blocks] == [("bash", "whoami")] + + +# ── pathological inputs no longer blow up ─────────────────────────────────── + +def test_args_brace_opener_flood_is_fast(): + # Many `args:{` openers, no closing `}` — old greedy capture restarted from + # every opener (>10s); the bounded opener + rfind is O(n). + evil = "args:{{a" * 14000 + block, dt = _timed(_parse_tool_call_block, evil) + assert dt < _BUDGET_S, f"_parse_tool_call_block took {dt:.2f}s" + assert block is None + # And through the public path, wrapped in a [TOOL_CALL] block. + _, dt2 = _timed(parse_tool_blocks, "[TOOL_CALL]{" + evil + "}[/TOOL_CALL]") + assert dt2 < _BUDGET_S, f"parse_tool_blocks took {dt2:.2f}s" + + +def test_xml_invoke_opener_flood_is_fast(): + # Bare opener flood, no closer. + evil = ('' + "a" * 10) * 6000 + blocks, dt = _timed(parse_tool_blocks, evil) + assert dt < _BUDGET_S, f"parse_tool_blocks took {dt:.2f}s" + assert blocks == [] + + +def test_xml_invoke_stale_closer_before_opener_flood_is_fast(): + # A lone leading satisfies a substring guard, but no opener after + # it has a reachable closer. + evil = "" + ('' + "a" * 10) * 6000 + _, dt = _timed(parse_tool_blocks, evil) + assert dt < _BUDGET_S, f"parse_tool_blocks took {dt:.2f}s" + + +def test_xml_direct_backref_opener_flood_is_fast(): + # wrapper (no ) routes into the open-wrapper path, + # which reaches the _XML_DIRECT_TOOL_RE backreference scan: a `...` + # flood with no `` closer. + evil = "" + "b" * 6000 + blocks, dt = _timed(parse_tool_blocks, evil) + assert dt < _BUDGET_S, f"parse_tool_blocks took {dt:.2f}s" + assert blocks == [] + + +def test_tool_code_param_backref_flood_is_fast(): + # `...` param flood inside tool_code args, no `` closer — exercises + # the `([\\s\\S]*?)` backreference scan in _parse_tool_code_block. + args_flood = "tool => 'bash', args => " + "a" * 6000 + block, dt = _timed(_parse_tool_code_block, args_flood) + assert dt < _BUDGET_S, f"_parse_tool_code_block took {dt:.2f}s" + # Through the public path, inside a closed block. + _, dt2 = _timed(parse_tool_blocks, "{" + args_flood + "}") + assert dt2 < _BUDGET_S, f"parse_tool_blocks took {dt2:.2f}s" + + +def test_xml_invoke_closed_with_parameter_opener_flood_is_fast(): + # A CLOSED whose body is a flood of `` openers + # with no `` closer: the invoke delimiter pairs fine, but the + # inner parameter scan must not rescan the body from every opener (O(n^2)). + evil = ('' + + '' * 6000 + + '') + blocks, dt = _timed(parse_tool_blocks, evil) + assert dt < _BUDGET_S, f"parse_tool_blocks took {dt:.2f}s" + # No `` ever closes, so no params are captured. + assert len(blocks) == 1 and blocks[0].tool_type == "bash" + + +def test_xml_direct_distinct_name_opener_flood_is_fast(): + # Distinct unclosed tag names (`...`) defeat per-name memoization; + # the scan must still stay near-linear instead of searching the suffix once + # per new name. + evil = "" + "".join(f"" for i in range(45000)) + blocks, dt = _timed(parse_tool_blocks, evil) + assert dt < _BUDGET_S, f"parse_tool_blocks took {dt:.2f}s" + assert blocks == [] + + +def test_tool_code_param_distinct_name_flood_is_fast(): + # Same distinct-name flood inside tool_code args, reaching the param backref + # scan in _parse_tool_code_block. + args_flood = "tool => 'bash', args => " + "".join(f"" for i in range(45000)) + _, dt = _timed(_parse_tool_code_block, args_flood) + assert dt < _BUDGET_S, f"_parse_tool_code_block took {dt:.2f}s"