fix(security): prevent ReDoS in XML and args tool-call parsers (#4941)

* fix(security): prevent ReDoS in XML and args tool-call parsers Four py/polynomial-redos sinks in tool_parsing.py ran lazy/greedy regexes over untrusted model output (tool-call markup is attacker-influenced via prompt injection). When the closing delimiter was absent, each rescanned to end-of-string from every opener -> O(n^2): - args => { ... } in _parse_tool_call_block: greedy \{([\s\S]*)\} restarted from every `args:{` opener. Now finds the opener once and takes through the last `}` (rfind) — equivalent capture, O(n). - _XML_INVOKE_RE: lazy <invoke ...>([\s\S]*?)</invoke>. Now _iter_xml_invoke pairs each opener with the first reachable </invoke> and stops when none is. - _XML_DIRECT_TOOL_RE and the <tag>([\s\S]*?)</\1> param scan in _parse_tool_code_block: lazy backreference patterns. Now _iter_backref_blocks pairs each opener with the nearest matching closer and memoizes tag names with no remaining closer, so an opener flood stays O(n). All four are output-equivalent to the originals on well-formed tool-call markup; the lazy patterns remain defined (still re-exported via agent_tools) but no longer drive a finditer over untrusted text. Adds tests/test_redos_xml_tool_parsers.py pinning correctness and bounding the opener-flood inputs (old paths took 4-15s). * fix(security): harden invoke-parameter and distinct-name tag scans Forward-only the two residual ReDoS paths in the XML/tool parsers that the outer-delimiter fix left quadratic: - _parse_xml_invoke parsed <parameter> with _XML_PARAM_RE.finditer, so a closed <invoke> body full of unclosed <parameter> openers rescanned the body from every opener (O(n^2), ~11s at 8k openers). Now scans forward-only via _iter_named_blocks, factored out of _iter_xml_invoke. - _iter_backref_blocks only memoized repeated missing tag names; a flood of distinct unclosed names searched the suffix once per name (O(n^2)). It now indexes every closer by name in one linear pass and binary-searches per opener (O(n log n)). Covers the direct and tool_code backref scans. Output-equivalent to the prior scanners (200k randomized trials match the memoized version for both the direct ci=True and tool_code ci=False configs). Adds regressions for the closed-invoke parameter flood and the distinct-name floods (45k openers now run in ~0.05s, were 5-6s).
2026-06-28 07:35:27 -04:00 · 2026-06-27 15:42:55 -07:00
parent df9907c09f
commit fbe3a0d73b
2 changed files with 315 additions and 28 deletions
@@ -6,6 +6,7 @@ Supports fenced code blocks, [TOOL_CALL] blocks, and XML-style <invoke> blocks.
 """

 import ast
+import bisect
 import json
 import logging
 import re
@@ -70,6 +71,27 @@ _XML_DIRECT_TOOL_RE = re.compile(
    r"<\s*([A-Za-z_][\w-]*)\s*>([\s\S]*?)</\s*\1\s*>",
    re.IGNORECASE,
 )
+# Forward-only delimiters for the lazy XML patterns above, so untrusted "many
+# openers, no closer" model output can't drive finditer's O(n^2) lazy rescan
+# (CodeQL py/polynomial-redos). Consumed by _iter_xml_invoke / _iter_xml_direct.
+_XML_INVOKE_OPEN_RE = re.compile(r'<invoke\s+name=["\'](\w+)["\']>\s*', re.IGNORECASE)
+_XML_INVOKE_CLOSE_RE = re.compile(r'</invoke>', re.IGNORECASE)
+_XML_DIRECT_OPEN_RE = re.compile(r"<\s*([A-Za-z_][\w-]*)\s*>", re.IGNORECASE)
+# Split <parameter ...>...</parameter> delimiters: the parameter scan inside an
+# invoke body is forward-only too, so a closed invoke stuffed with unclosed
+# parameter openers can't drive finditer's O(n^2) rescan. See _iter_named_blocks.
+_XML_PARAM_OPEN_RE = re.compile(r'<parameter\s+name=["\'](\w+)["\']>', re.IGNORECASE)
+_XML_PARAM_CLOSE_RE = re.compile(r'</parameter>', re.IGNORECASE)
+# Closer tokens (any tag name) for the backref scanners, pre-indexed by name so a
+# flood of distinct unclosed tag names stays near-linear. See _iter_backref_blocks.
+_XML_DIRECT_CLOSE_ANY_RE = re.compile(r"</\s*([A-Za-z_][\w-]*)\s*>", re.IGNORECASE)
+# `args => { ... }` opener (its closer is the last `}`, found with rfind) and the
+# `<tag>` opener for tool_code XML params — both split out of greedy/backref
+# patterns that finditer would otherwise rescan from every opener. See
+# _parse_tool_call_block / _parse_tool_code_block.
+_ARGS_BRACE_OPEN_RE = re.compile(r'args\s*(?:=>|:|=)\s*\{')
+_TOOL_CODE_PARAM_OPEN_RE = re.compile(r"<(\w+)>")
+_TOOL_CODE_PARAM_CLOSE_ANY_RE = re.compile(r"</(\w+)>")

 # Pattern 3b: StepFun Step-3.x native tool-call tokens. The tokenizer defines:
 #   <｜tool▁calls▁begin｜> ... <｜tool▁calls▁end｜>
@@ -507,11 +529,15 @@ def _parse_tool_call_block(raw: str) -> Optional[ToolBlock]:
        if cmd_match:
            content = cmd_match.group(1)

-    # Pattern: args => {content} — extract everything inside the nested braces
+    # Pattern: args => {content} — extract everything inside the nested braces.
+    # Find the opener, then take through the LAST `}` (rfind). Equivalent to the
+    # greedy `\{([\s\S]*)\}` capture, but the bounded opener + rfind avoids
+    # finditer rescanning from every `args:{` opener (CodeQL py/polynomial-redos).
    if not content:
-        args_match = re.search(r'args\s*(?:=>|:|=)\s*\{([\s\S]*)\}', raw, re.DOTALL)
-        if args_match:
-            inner = args_match.group(1).strip()
+        am = _ARGS_BRACE_OPEN_RE.search(raw)
+        close = raw.rfind('}')
+        if am and close >= am.end():
+            inner = raw[am.end():close].strip()
            # Strip quotes and key prefixes
            inner = re.sub(r'^--?\w+\s+', '', inner)
            inner = inner.strip('\'"')
@@ -539,8 +565,8 @@ def _parse_tool_call_block(raw: str) -> Optional[ToolBlock]:
    return None


-def _parse_xml_invoke(inv_match) -> Optional[ToolBlock]:
-    """Parse an <invoke name="tool"><parameter ...>...</parameter></invoke> match.
+def _parse_xml_invoke(name, body) -> Optional[ToolBlock]:
+    """Parse an <invoke name="tool"><parameter ...>...</parameter></invoke> call.

    Delegates content-shaping to function_call_to_tool_block — the SAME
    converter used for native function calls — so the full tool set (every
@@ -555,17 +581,16 @@ def _parse_xml_invoke(inv_match) -> Optional[ToolBlock]:
    # (e.g. <invoke name="Bash">) and function_call_to_tool_block matches
    # case-sensitively against the lowercase _TOOL_NAME_MAP / TOOL_TAGS, so a
    # raw capitalized name would be silently dropped.
-    tool_name = inv_match.group(1).lower()
-    body = inv_match.group(2)
+    tool_name = name.lower()
    params = {}
-    for pm in _XML_PARAM_RE.finditer(body):
-        params[pm.group(1)] = pm.group(2).strip()
+    for pname, pval in _iter_named_blocks(body, _XML_PARAM_OPEN_RE, _XML_PARAM_CLOSE_RE):
+        params[pname] = pval.strip()
    # Local import to avoid a circular import at module load.
    from src.tool_schemas import function_call_to_tool_block
    return function_call_to_tool_block(tool_name, json.dumps(params))


-def _parse_xml_direct_tool(tool_match) -> Optional[ToolBlock]:
+def _parse_xml_direct_tool(name, body) -> Optional[ToolBlock]:
    """Parse direct XML tool tags inside <tool_call>.

    Some local models emit:
@@ -575,13 +600,13 @@ def _parse_xml_direct_tool(tool_match) -> Optional[ToolBlock]:
    Keep this as an adapter to the canonical function-call converter so aliases
    and per-tool argument formatting stay in one place.
    """
-    tool_name = tool_match.group(1).lower().replace("-", "_")
+    tool_name = name.lower().replace("-", "_")
    if tool_name in {"invoke", "parameter", "tool_call", "function_call"}:
        return None
    mapped = _TOOL_NAME_MAP.get(tool_name) or (tool_name if tool_name in TOOL_TAGS else None)
    if not mapped:
        return None
-    body = tool_match.group(2).strip()
+    body = body.strip()
    if not body:
        return None
    try:
@@ -716,10 +741,12 @@ def _parse_tool_code_block(raw: str) -> Optional[ToolBlock]:
    args_match = re.search(r"args\s*=>\s*['\"]?\s*([\s\S]*?)\s*['\"]?\s*$", raw, re.DOTALL)
    args_body = args_match.group(1).strip().strip("'\"") if args_match else ""

-    # Parse XML params inside args (e.g. <command>ls</command>)
+    # Parse XML params inside args (e.g. <command>ls</command>). Forward-only
+    # backref scan so a `<x><x>...` opener flood can't drive the O(n^2) lazy
+    # rescan (CodeQL py/polynomial-redos); see _iter_backref_blocks.
    xml_params = {}
-    for pm in re.finditer(r"<(\w+)>([\s\S]*?)</\1>", args_body):
-        xml_params[pm.group(1)] = pm.group(2).strip()
+    for pname, pval in _iter_backref_blocks(args_body, _TOOL_CODE_PARAM_OPEN_RE, _TOOL_CODE_PARAM_CLOSE_ANY_RE):
+        xml_params[pname] = pval.strip()

    # When the model gave structured params, hand them to the canonical
    # converter (same as native calls + <invoke>) so the full tool set and
@@ -800,6 +827,69 @@ def _strip_delimited(text: str, open_re, close_re) -> str:
    return "".join(out)


+def _iter_named_blocks(text, open_re, close_re):
+    """Forward-only equivalent of ``open_re([\\s\\S]*?)close_re`` finditer where
+    open_re captures a name in group 1: yield ``(name, body)``, pairing each
+    opener with the first ``close_re`` after it. O(n) once no closer is reachable
+    from an opener, no later opener has one either (see _iter_delimited), so
+    untrusted opener floods can't drive the lazy O(n^2) rescan."""
+    pos = 0
+    while True:
+        om = open_re.search(text, pos)
+        if om is None:
+            return
+        cm = close_re.search(text, om.end())
+        if cm is None:
+            return
+        yield om.group(1), text[om.end():cm.start()]
+        pos = cm.end()
+
+
+def _iter_xml_invoke(text):
+    """Forward-only ``<invoke name="..">...</invoke>`` scan (see _iter_named_blocks)."""
+    return _iter_named_blocks(text, _XML_INVOKE_OPEN_RE, _XML_INVOKE_CLOSE_RE)
+
+
+def _iter_backref_blocks(text, open_re, close_any_re, ci=False):
+    """Forward-only equivalent of an ``<tag>([\\s\\S]*?)</tag>`` backreference
+    finditer (same-name open/close): yield ``(name, body)``, pairing each opener
+    with the nearest following matching closer and skipping an opener whose
+    closer is unreachable.
+
+    Every closer is indexed by tag name in one linear pass, then each opener
+    binary-searches its own name's closer positions. A flood of distinct unclosed
+    tag names therefore stays O(n log n) rather than the lazy backref's O(n^2)
+    suffix rescan (CodeQL py/polynomial-redos); per-name memoization alone left
+    that distinct-name case quadratic. ``close_any_re`` matches ANY closer and
+    captures its tag name in group 1; ``ci`` lowercases names for matching, since
+    the original backref closer is case-insensitive under re.IGNORECASE."""
+    norm = (lambda s: s.lower()) if ci else (lambda s: s)
+    closer_starts = {}
+    closer_ends = {}
+    for cm in close_any_re.finditer(text):
+        k = norm(cm.group(1))
+        closer_starts.setdefault(k, []).append(cm.start())
+        closer_ends.setdefault(k, []).append(cm.end())
+    om = open_re.search(text)
+    while om is not None:
+        name = om.group(1)
+        k = norm(name)
+        resume = om.end()
+        starts = closer_starts.get(k)
+        if starts:
+            i = bisect.bisect_left(starts, om.end())
+            if i < len(starts):
+                yield name, text[om.end():starts[i]]
+                resume = closer_ends[k][i]
+        om = open_re.search(text, resume)
+
+
+def _iter_xml_direct(text):
+    """Forward-only equivalent of ``_XML_DIRECT_TOOL_RE.finditer`` (see
+    _iter_backref_blocks)."""
+    return _iter_backref_blocks(text, _XML_DIRECT_OPEN_RE, _XML_DIRECT_CLOSE_ANY_RE, ci=True)
+
+
 def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]:
    """Extract executable tool blocks from LLM response text.

@@ -840,8 +930,8 @@ def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]:
            # If a code block's content is an <invoke> XML call (some models wrap
            # tool calls in ```python or ```xml fences), parse the invoke instead.
            if '<invoke' in content:
-                for inv in _XML_INVOKE_RE.finditer(content):
-                    block = _parse_xml_invoke(inv)
+                for inv_name, inv_body in _iter_xml_invoke(content):
+                    block = _parse_xml_invoke(inv_name, inv_body)
                    if block:
                        blocks.append(block)
                # This fenced block is <invoke> markup, not literal code. Whether or
@@ -882,13 +972,13 @@ def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]:
            text, _XML_TOOL_CALL_OPEN_RE, _XML_TOOL_CALL_CLOSE_RE
        ):
            body = text[inner_start:inner_end]
-            for inv in _XML_INVOKE_RE.finditer(body):
-                block = _parse_xml_invoke(inv)
+            for inv_name, inv_body in _iter_xml_invoke(body):
+                block = _parse_xml_invoke(inv_name, inv_body)
                if block:
                    blocks.append(block)
            if not blocks:
-                for direct in _XML_DIRECT_TOOL_RE.finditer(body):
-                    block = _parse_xml_direct_tool(direct)
+                for d_name, d_body in _iter_xml_direct(body):
+                    block = _parse_xml_direct_tool(d_name, d_body)
                    if block:
                        blocks.append(block)
        # Some local models stream an opening <tool_call> wrapper and a
@@ -896,20 +986,20 @@ def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]:
        if not blocks:
            for m in _XML_OPEN_TOOL_CALL_RE.finditer(text):
                body = m.group(1)
-                for inv in _XML_INVOKE_RE.finditer(body):
-                    block = _parse_xml_invoke(inv)
+                for inv_name, inv_body in _iter_xml_invoke(body):
+                    block = _parse_xml_invoke(inv_name, inv_body)
                    if block:
                        blocks.append(block)
                if blocks:
                    break
-                for direct in _XML_DIRECT_TOOL_RE.finditer(body):
-                    block = _parse_xml_direct_tool(direct)
+                for d_name, d_body in _iter_xml_direct(body):
+                    block = _parse_xml_direct_tool(d_name, d_body)
                    if block:
                        blocks.append(block)
        # Try bare <invoke> without wrapper
        if not blocks:
-            for inv in _XML_INVOKE_RE.finditer(text):
-                block = _parse_xml_invoke(inv)
+            for inv_name, inv_body in _iter_xml_invoke(text):
+                block = _parse_xml_invoke(inv_name, inv_body)
                if block:
                    blocks.append(block)

@@ -0,0 +1,197 @@
+"""Regression tests for the remaining ReDoS sinks in tool_parsing.py.
+
+A previous fix (test_redos_llm_parsers.py) hardened the delimiter-bounded
+[TOOL_CALL]/<tool_call>/<tool_code> scanners but explicitly left four patterns
+that CodeQL (py/polynomial-redos) flagged on the next rescan:
+
+  * `args => { ... }` in `_parse_tool_call_block` — greedy `\\{([\\s\\S]*)\\}`
+    that `re.search` restarts from every `args:{` opener -> O(n^2).
+  * `_XML_INVOKE_RE` — lazy `<invoke ...>([\\s\\S]*?)</invoke>` that rescans to
+    end-of-string from every opener when no `</invoke>` follows.
+  * `_XML_DIRECT_TOOL_RE` and the `<tag>([\\s\\S]*?)</\\1>` param scan in
+    `_parse_tool_code_block` — lazy *backreference* patterns with the same
+    opener-flood blowup.
+
+These run over untrusted model output (tool-call markup is attacker-influenced
+via prompt injection), so each is now a forward-only scan. The tests pin:
+  * correctness is unchanged for legitimate tool-call markup, and
+  * pathological "many openers, no closer" inputs complete promptly.
+
+The timing bound is loose (seconds) so it never flakes on a slow CI box; the
+unguarded patterns took 2-15s on these inputs, so the margin is ~100x.
+"""
+
+import time
+
+import pytest
+
+import src.agent_tools  # noqa: F401  (break agent_tools<->tool_parsing import cycle)
+from src.tool_parsing import (
+    parse_tool_blocks,
+    strip_tool_blocks,
+    _parse_tool_call_block,
+    _parse_tool_code_block,
+)
+
+_BUDGET_S = 4.0
+
+
+def _timed(fn, *args):
+    start = time.perf_counter()
+    result = fn(*args)
+    return result, time.perf_counter() - start
+
+
+# ── correctness is preserved ────────────────────────────────────────────────
+
+def test_xml_invoke_call_still_parsed():
+    blocks = parse_tool_blocks(
+        '<tool_call><invoke name="bash"><parameter name="command">ls -la</parameter></invoke></tool_call>'
+    )
+    assert [(b.tool_type, b.content) for b in blocks] == [("bash", "ls -la")]
+
+
+def test_xml_direct_tool_still_parsed():
+    blocks = parse_tool_blocks('<tool_call><web_search>weather today</web_search></tool_call>')
+    assert [(b.tool_type, b.content) for b in blocks] == [("web_search", "weather today")]
+
+
+def test_xml_direct_tool_backref_is_case_insensitive():
+    # `</\\1>` matched case-insensitively under re.IGNORECASE; the forward-only
+    # scanner preserves that (mixed-case closer still pairs with its opener).
+    blocks = parse_tool_blocks('<tool_call><Web_Search>q</WEB_SEARCH></tool_call>')
+    assert [(b.tool_type, b.content) for b in blocks] == [("web_search", "q")]
+
+
+def test_tool_code_xml_params_still_parsed():
+    blocks = parse_tool_blocks("<tool_code>{tool => 'bash', args => '<command>ls -la</command>'}</tool_code>")
+    assert [(b.tool_type, b.content) for b in blocks] == [("bash", "ls -la")]
+
+
+def test_xml_invoke_multiple_parameters_still_parsed():
+    # The invoke parameter scan is forward-only; a well-formed invoke with more
+    # than one <parameter> must still yield every name/value pair.
+    blocks = parse_tool_blocks(
+        '<tool_call><invoke name="web_search">'
+        '<parameter name="query">rust traits</parameter>'
+        '<parameter name="time_filter">week</parameter>'
+        '</invoke></tool_call>'
+    )
+    assert len(blocks) == 1
+    assert blocks[0].tool_type == "web_search"
+    assert '"query": "rust traits"' in blocks[0].content
+    assert '"time_filter": "week"' in blocks[0].content
+
+
+def test_xml_direct_distinct_tag_names_still_parsed():
+    # Distinct sibling tags inside <tool_call> each pair with their own closer;
+    # the forward-only direct scan must keep matching after the first block.
+    blocks = parse_tool_blocks(
+        '<tool_call><web_search>weather</web_search><read_file>notes.txt</read_file></tool_call>'
+    )
+    assert [(b.tool_type, b.content) for b in blocks] == [
+        ("web_search", "weather"),
+        ("read_file", "notes.txt"),
+    ]
+
+
+def test_tool_call_args_brace_still_parsed():
+    blocks = parse_tool_blocks('[TOOL_CALL]{tool => "shell", args => {--command "ls"}}[/TOOL_CALL]')
+    assert [(b.tool_type, b.content) for b in blocks] == [("bash", "ls")]
+
+
+def test_args_brace_takes_through_last_close_brace():
+    # `\\{([\\s\\S]*)\\}` is greedy to the LAST `}`; the rfind-based rewrite must
+    # match that (keep the nested object intact, not stop at the first `}`).
+    block = _parse_tool_call_block('tool => "bash", args => {--command "echo {x} done"}')
+    assert block is not None and block.tool_type == "bash"
+    assert block.content == "echo {x} done"
+
+
+def test_fenced_invoke_still_parsed():
+    blocks = parse_tool_blocks(
+        '```python\n<invoke name="bash"><parameter name="command">whoami</parameter></invoke>\n```'
+    )
+    assert [(b.tool_type, b.content) for b in blocks] == [("bash", "whoami")]
+
+
+# ── pathological inputs no longer blow up ───────────────────────────────────
+
+def test_args_brace_opener_flood_is_fast():
+    # Many `args:{` openers, no closing `}` — old greedy capture restarted from
+    # every opener (>10s); the bounded opener + rfind is O(n).
+    evil = "args:{{a" * 14000
+    block, dt = _timed(_parse_tool_call_block, evil)
+    assert dt < _BUDGET_S, f"_parse_tool_call_block took {dt:.2f}s"
+    assert block is None
+    # And through the public path, wrapped in a [TOOL_CALL] block.
+    _, dt2 = _timed(parse_tool_blocks, "[TOOL_CALL]{" + evil + "}[/TOOL_CALL]")
+    assert dt2 < _BUDGET_S, f"parse_tool_blocks took {dt2:.2f}s"
+
+
+def test_xml_invoke_opener_flood_is_fast():
+    # Bare <invoke> opener flood, no </invoke> closer.
+    evil = ('<invoke name="x">' + "a" * 10) * 6000
+    blocks, dt = _timed(parse_tool_blocks, evil)
+    assert dt < _BUDGET_S, f"parse_tool_blocks took {dt:.2f}s"
+    assert blocks == []
+
+
+def test_xml_invoke_stale_closer_before_opener_flood_is_fast():
+    # A lone leading </invoke> satisfies a substring guard, but no opener after
+    # it has a reachable closer.
+    evil = "</invoke>" + ('<invoke name="x">' + "a" * 10) * 6000
+    _, dt = _timed(parse_tool_blocks, evil)
+    assert dt < _BUDGET_S, f"parse_tool_blocks took {dt:.2f}s"
+
+
+def test_xml_direct_backref_opener_flood_is_fast():
+    # <tool_call> wrapper (no </tool_call>) routes into the open-wrapper path,
+    # which reaches the _XML_DIRECT_TOOL_RE backreference scan: a `<a><a>...`
+    # flood with no `</a>` closer.
+    evil = "<tool_call>" + "<a><a>b" * 6000
+    blocks, dt = _timed(parse_tool_blocks, evil)
+    assert dt < _BUDGET_S, f"parse_tool_blocks took {dt:.2f}s"
+    assert blocks == []
+
+
+def test_tool_code_param_backref_flood_is_fast():
+    # `<x><x>...` param flood inside tool_code args, no `</x>` closer — exercises
+    # the `<tag>([\\s\\S]*?)</\\1>` backreference scan in _parse_tool_code_block.
+    args_flood = "tool => 'bash', args => " + "<x><x>a" * 6000
+    block, dt = _timed(_parse_tool_code_block, args_flood)
+    assert dt < _BUDGET_S, f"_parse_tool_code_block took {dt:.2f}s"
+    # Through the public path, inside a closed <tool_code> block.
+    _, dt2 = _timed(parse_tool_blocks, "<tool_code>{" + args_flood + "}</tool_code>")
+    assert dt2 < _BUDGET_S, f"parse_tool_blocks took {dt2:.2f}s"
+
+
+def test_xml_invoke_closed_with_parameter_opener_flood_is_fast():
+    # A CLOSED <invoke> whose body is a flood of `<parameter name=..>` openers
+    # with no `</parameter>` closer: the invoke delimiter pairs fine, but the
+    # inner parameter scan must not rescan the body from every opener (O(n^2)).
+    evil = ('<tool_call><invoke name="bash">'
+            + '<parameter name="x">' * 6000
+            + '</invoke></tool_call>')
+    blocks, dt = _timed(parse_tool_blocks, evil)
+    assert dt < _BUDGET_S, f"parse_tool_blocks took {dt:.2f}s"
+    # No `</parameter>` ever closes, so no params are captured.
+    assert len(blocks) == 1 and blocks[0].tool_type == "bash"
+
+
+def test_xml_direct_distinct_name_opener_flood_is_fast():
+    # Distinct unclosed tag names (`<t0><t1>...`) defeat per-name memoization;
+    # the scan must still stay near-linear instead of searching the suffix once
+    # per new name.
+    evil = "<tool_call>" + "".join(f"<t{i}>" for i in range(45000))
+    blocks, dt = _timed(parse_tool_blocks, evil)
+    assert dt < _BUDGET_S, f"parse_tool_blocks took {dt:.2f}s"
+    assert blocks == []
+
+
+def test_tool_code_param_distinct_name_flood_is_fast():
+    # Same distinct-name flood inside tool_code args, reaching the param backref
+    # scan in _parse_tool_code_block.
+    args_flood = "tool => 'bash', args => " + "".join(f"<t{i}>" for i in range(45000))
+    _, dt = _timed(_parse_tool_code_block, args_flood)
+    assert dt < _BUDGET_S, f"_parse_tool_code_block took {dt:.2f}s"