"""Regression tests for the remaining ReDoS sinks in tool_parsing.py. A previous fix (test_redos_llm_parsers.py) hardened the delimiter-bounded [TOOL_CALL]// scanners but explicitly left four patterns that CodeQL (py/polynomial-redos) flagged on the next rescan: * `args => { ... }` in `_parse_tool_call_block` — greedy `\\{([\\s\\S]*)\\}` that `re.search` restarts from every `args:{` opener -> O(n^2). * `_XML_INVOKE_RE` — lazy `([\\s\\S]*?)` that rescans to end-of-string from every opener when no `` follows. * `_XML_DIRECT_TOOL_RE` and the `([\\s\\S]*?)` param scan in `_parse_tool_code_block` — lazy *backreference* patterns with the same opener-flood blowup. These run over untrusted model output (tool-call markup is attacker-influenced via prompt injection), so each is now a forward-only scan. The tests pin: * correctness is unchanged for legitimate tool-call markup, and * pathological "many openers, no closer" inputs complete promptly. The timing bound is loose (seconds) so it never flakes on a slow CI box; the unguarded patterns took 2-15s on these inputs, so the margin is ~100x. """ import time import pytest import src.agent_tools # noqa: F401 (break agent_tools<->tool_parsing import cycle) from src.tool_parsing import ( parse_tool_blocks, strip_tool_blocks, _parse_tool_call_block, _parse_tool_code_block, ) _BUDGET_S = 4.0 def _timed(fn, *args): start = time.perf_counter() result = fn(*args) return result, time.perf_counter() - start # ── correctness is preserved ──────────────────────────────────────────────── def test_xml_invoke_call_still_parsed(): blocks = parse_tool_blocks( 'ls -la' ) assert [(b.tool_type, b.content) for b in blocks] == [("bash", "ls -la")] def test_xml_direct_tool_still_parsed(): blocks = parse_tool_blocks('weather today') assert [(b.tool_type, b.content) for b in blocks] == [("web_search", "weather today")] def test_xml_direct_tool_backref_is_case_insensitive(): # `` matched case-insensitively under re.IGNORECASE; the forward-only # scanner preserves that (mixed-case closer still pairs with its opener). blocks = parse_tool_blocks('q') assert [(b.tool_type, b.content) for b in blocks] == [("web_search", "q")] def test_tool_code_xml_params_still_parsed(): blocks = parse_tool_blocks("{tool => 'bash', args => 'ls -la'}") assert [(b.tool_type, b.content) for b in blocks] == [("bash", "ls -la")] def test_xml_invoke_multiple_parameters_still_parsed(): # The invoke parameter scan is forward-only; a well-formed invoke with more # than one must still yield every name/value pair. blocks = parse_tool_blocks( '' 'rust traits' 'week' '' ) assert len(blocks) == 1 assert blocks[0].tool_type == "web_search" assert '"query": "rust traits"' in blocks[0].content assert '"time_filter": "week"' in blocks[0].content def test_xml_direct_distinct_tag_names_still_parsed(): # Distinct sibling tags inside each pair with their own closer; # the forward-only direct scan must keep matching after the first block. blocks = parse_tool_blocks( 'weathernotes.txt' ) assert [(b.tool_type, b.content) for b in blocks] == [ ("web_search", "weather"), ("read_file", "notes.txt"), ] def test_tool_call_args_brace_still_parsed(): blocks = parse_tool_blocks('[TOOL_CALL]{tool => "shell", args => {--command "ls"}}[/TOOL_CALL]') assert [(b.tool_type, b.content) for b in blocks] == [("bash", "ls")] def test_args_brace_takes_through_last_close_brace(): # `\\{([\\s\\S]*)\\}` is greedy to the LAST `}`; the rfind-based rewrite must # match that (keep the nested object intact, not stop at the first `}`). block = _parse_tool_call_block('tool => "bash", args => {--command "echo {x} done"}') assert block is not None and block.tool_type == "bash" assert block.content == "echo {x} done" def test_fenced_invoke_still_parsed(): blocks = parse_tool_blocks( '```python\nwhoami\n```' ) assert [(b.tool_type, b.content) for b in blocks] == [("bash", "whoami")] # ── pathological inputs no longer blow up ─────────────────────────────────── def test_args_brace_opener_flood_is_fast(): # Many `args:{` openers, no closing `}` — old greedy capture restarted from # every opener (>10s); the bounded opener + rfind is O(n). evil = "args:{{a" * 14000 block, dt = _timed(_parse_tool_call_block, evil) assert dt < _BUDGET_S, f"_parse_tool_call_block took {dt:.2f}s" assert block is None # And through the public path, wrapped in a [TOOL_CALL] block. _, dt2 = _timed(parse_tool_blocks, "[TOOL_CALL]{" + evil + "}[/TOOL_CALL]") assert dt2 < _BUDGET_S, f"parse_tool_blocks took {dt2:.2f}s" def test_xml_invoke_opener_flood_is_fast(): # Bare opener flood, no closer. evil = ('' + "a" * 10) * 6000 blocks, dt = _timed(parse_tool_blocks, evil) assert dt < _BUDGET_S, f"parse_tool_blocks took {dt:.2f}s" assert blocks == [] def test_xml_invoke_stale_closer_before_opener_flood_is_fast(): # A lone leading satisfies a substring guard, but no opener after # it has a reachable closer. evil = "" + ('' + "a" * 10) * 6000 _, dt = _timed(parse_tool_blocks, evil) assert dt < _BUDGET_S, f"parse_tool_blocks took {dt:.2f}s" def test_xml_direct_backref_opener_flood_is_fast(): # wrapper (no ) routes into the open-wrapper path, # which reaches the _XML_DIRECT_TOOL_RE backreference scan: a `...` # flood with no `` closer. evil = "" + "b" * 6000 blocks, dt = _timed(parse_tool_blocks, evil) assert dt < _BUDGET_S, f"parse_tool_blocks took {dt:.2f}s" assert blocks == [] def test_tool_code_param_backref_flood_is_fast(): # `...` param flood inside tool_code args, no `` closer — exercises # the `([\\s\\S]*?)` backreference scan in _parse_tool_code_block. args_flood = "tool => 'bash', args => " + "a" * 6000 block, dt = _timed(_parse_tool_code_block, args_flood) assert dt < _BUDGET_S, f"_parse_tool_code_block took {dt:.2f}s" # Through the public path, inside a closed block. _, dt2 = _timed(parse_tool_blocks, "{" + args_flood + "}") assert dt2 < _BUDGET_S, f"parse_tool_blocks took {dt2:.2f}s" def test_xml_invoke_closed_with_parameter_opener_flood_is_fast(): # A CLOSED whose body is a flood of `` openers # with no `` closer: the invoke delimiter pairs fine, but the # inner parameter scan must not rescan the body from every opener (O(n^2)). evil = ('' + '' * 6000 + '') blocks, dt = _timed(parse_tool_blocks, evil) assert dt < _BUDGET_S, f"parse_tool_blocks took {dt:.2f}s" # No `` ever closes, so no params are captured. assert len(blocks) == 1 and blocks[0].tool_type == "bash" def test_xml_direct_distinct_name_opener_flood_is_fast(): # Distinct unclosed tag names (`...`) defeat per-name memoization; # the scan must still stay near-linear instead of searching the suffix once # per new name. evil = "" + "".join(f"" for i in range(45000)) blocks, dt = _timed(parse_tool_blocks, evil) assert dt < _BUDGET_S, f"parse_tool_blocks took {dt:.2f}s" assert blocks == [] def test_tool_code_param_distinct_name_flood_is_fast(): # Same distinct-name flood inside tool_code args, reaching the param backref # scan in _parse_tool_code_block. args_flood = "tool => 'bash', args => " + "".join(f"" for i in range(45000)) _, dt = _timed(_parse_tool_code_block, args_flood) assert dt < _BUDGET_S, f"_parse_tool_code_block took {dt:.2f}s"