diff --git a/src/tool_parsing.py b/src/tool_parsing.py index b31e114f9..1ed997903 100644 --- a/src/tool_parsing.py +++ b/src/tool_parsing.py @@ -354,14 +354,15 @@ def parse_tool_blocks(text: str) -> List[ToolBlock]: # If a code block's content is an XML call (some models wrap # tool calls in ```python or ```xml fences), parse the invoke instead. if ' markup, not literal code. Whether or + # not any call converted, never fall through to append the raw XML as + # a python/bash block — e.g. a hyphenated/namespaced tool name that + # _XML_INVOKE_RE's \w+ can't match would otherwise be executed as code. + continue blocks.append(ToolBlock(tag, content)) # Pattern 2: [TOOL_CALL] blocks (only if no fenced blocks found) diff --git a/tests/test_fenced_invoke_no_raw_xml.py b/tests/test_fenced_invoke_no_raw_xml.py new file mode 100644 index 000000000..c23cb648e --- /dev/null +++ b/tests/test_fenced_invoke_no_raw_xml.py @@ -0,0 +1,36 @@ +"""Issue #2925 — a fenced ```python/```bash block wrapping an call that +can't be converted (e.g. a hyphenated/namespaced tool name that _XML_INVOKE_RE's +\\w+ won't match, or an unknown tool) must NOT fall through and ship the raw XML +to the code executor as if it were python/bash. +""" +import sys +from unittest.mock import MagicMock + +for mod in ['src.agent_tools', 'src.tool_parsing', 'src.tool_schemas', 'src.tool_execution']: + sys.modules.pop(mod, None) +for mod in [ + 'sqlalchemy', 'sqlalchemy.orm', 'sqlalchemy.ext', 'sqlalchemy.ext.declarative', + 'sqlalchemy.ext.hybrid', 'sqlalchemy.sql', 'sqlalchemy.sql.expression', + 'src.database', 'core.models', 'core.database', 'core.auth' +]: + if mod not in sys.modules: + sys.modules[mod] = MagicMock() + +import src.agent_tools # noqa: E402, F401 +from src.tool_parsing import parse_tool_blocks # noqa: E402 + + +def test_unconvertible_invoke_in_fence_is_not_executed_as_code(): + text = '```python\n\n1\n\n```' + blocks = parse_tool_blocks(text) + # the hyphenated name can't match _XML_INVOKE_RE, so nothing converts — + # the raw XML must not be appended as a python/bash code block. + assert not any( + b.tool_type in ("python", "bash") and ") still works. + blocks = parse_tool_blocks('```python\nprint("hi")\n```') + assert any(b.tool_type == "python" and 'print("hi")' in b.content for b in blocks), blocks