From 2e16394b413a7f64e2c4e43ceb7efc14fbd5f50d Mon Sep 17 00:00:00 2001 From: Dividesbyzer0 <54127744+zoomdbz@users.noreply.github.com> Date: Tue, 23 Jun 2026 17:20:13 -0400 Subject: [PATCH] fix(agent): parse misfenced read_file calls (#4799) --- src/tool_parsing.py | 85 ++++++++++++++++++++- tests/test_misfenced_read_file_tool_call.py | 70 +++++++++++++++++ 2 files changed, 154 insertions(+), 1 deletion(-) create mode 100644 tests/test_misfenced_read_file_tool_call.py diff --git a/src/tool_parsing.py b/src/tool_parsing.py index 4b19d9236..a68a5a6b6 100644 --- a/src/tool_parsing.py +++ b/src/tool_parsing.py @@ -308,6 +308,88 @@ def _parse_misfenced_web_lookup(content: str) -> Optional[ToolBlock]: return ToolBlock("web_fetch", url) + +def _parse_misfenced_read_file_lookup(content: str, *, allow_shell_style: bool = False) -> Optional[ToolBlock]: + """Recover simple read_file calls wrapped in python/bash fences.""" + stripped = content.strip() + if not stripped: + return None + + try: + module = ast.parse(stripped, mode="exec") + except SyntaxError: + module = None + if module and len(module.body) == 1 and isinstance(module.body[0], ast.Expr): + call = module.body[0].value + if isinstance(call, ast.Call) and isinstance(call.func, ast.Name): + if call.func.id.lower() != "read_file" or len(call.args) > 1: + return None + args = {} + if call.args: + path = _literal_string(call.args[0]) + if not path: + return None + args["path"] = path + allowed = {"path", "file", "file_path", "offset", "limit"} + for keyword in call.keywords: + if keyword.arg not in allowed: + return None + key = "path" if keyword.arg in ("file", "file_path") else keyword.arg + if key == "path": + path = _literal_string(keyword.value) + if not path: + return None + args["path"] = path + continue + try: + value = ast.literal_eval(keyword.value) + except (ValueError, SyntaxError, TypeError): + return None + if not isinstance(value, int) or value < 0: + return None + args[key] = value + if not args.get("path"): + return None + from src.tool_schemas import function_call_to_tool_block + return function_call_to_tool_block("read_file", json.dumps(args)) + + if not allow_shell_style: + return None + lines = [line.strip() for line in stripped.splitlines() if line.strip()] + if len(lines) != 1: + return None + match = re.fullmatch(r"read_file\s+(.+)", lines[0], re.IGNORECASE) + if not match: + return None + path = match.group(1).strip() + if not path: + return None + if path.startswith("{"): + try: + args = json.loads(path) + except json.JSONDecodeError: + return None + if not isinstance(args, dict): + return None + normalized = {} + raw_path = args.get("path") or args.get("file") or args.get("file_path") + if isinstance(raw_path, str) and raw_path.strip(): + normalized["path"] = raw_path.strip() + for key in ("offset", "limit"): + value = args.get(key) + if isinstance(value, int) and value >= 0: + normalized[key] = value + if not normalized.get("path"): + return None + from src.tool_schemas import function_call_to_tool_block + return function_call_to_tool_block("read_file", json.dumps(normalized)) + if len(path) >= 2 and path[0] == path[-1] and path[0] in "'\"": + path = path[1:-1].strip() + if not path: + return None + return ToolBlock("read_file", path) + + def _coerce_raw_web_query(value) -> Optional[str]: if isinstance(value, str) and value.strip(): return value.strip() @@ -704,7 +786,8 @@ def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]: # _XML_INVOKE_RE's \w+ can't match would otherwise be executed as code. continue if tag in ("python", "bash"): - block = _parse_misfenced_web_lookup(content) + block = (_parse_misfenced_web_lookup(content) + or _parse_misfenced_read_file_lookup(content, allow_shell_style=(tag == "bash"))) if block: blocks.append(block) continue diff --git a/tests/test_misfenced_read_file_tool_call.py b/tests/test_misfenced_read_file_tool_call.py new file mode 100644 index 000000000..3c536e973 --- /dev/null +++ b/tests/test_misfenced_read_file_tool_call.py @@ -0,0 +1,70 @@ +import json + +import src.agent_tools # noqa: F401 (break agent_tools<->tool_parsing import cycle) +from src.tool_parsing import parse_tool_blocks, strip_tool_blocks + + +def test_bash_fenced_read_file_function_call_runs_as_read_file(): + blocks = parse_tool_blocks('```bash\nread_file("notes/todo.md")\n```') + + assert len(blocks) == 1 + assert blocks[0].tool_type == "read_file" + assert blocks[0].content == "notes/todo.md" + + +def test_python_fenced_read_file_function_call_runs_as_read_file(): + blocks = parse_tool_blocks('```python\nread_file(path="notes/todo.md", offset=3, limit=2)\n```') + + assert len(blocks) == 1 + assert blocks[0].tool_type == "read_file" + assert json.loads(blocks[0].content) == { + "path": "notes/todo.md", + "offset": 3, + "limit": 2, + } + + +def test_bash_fenced_read_file_command_runs_as_read_file(): + blocks = parse_tool_blocks('```bash\nread_file "notes/todo.md"\n```') + + assert len(blocks) == 1 + assert blocks[0].tool_type == "read_file" + assert blocks[0].content == "notes/todo.md" + + +def test_bash_fenced_read_file_json_command_runs_as_read_file(): + blocks = parse_tool_blocks('```bash\nread_file {"path":"notes/todo.md","offset":1,"limit":4}\n```') + + assert len(blocks) == 1 + assert blocks[0].tool_type == "read_file" + assert json.loads(blocks[0].content) == { + "path": "notes/todo.md", + "offset": 1, + "limit": 4, + } + + +def test_multiline_bash_read_file_block_stays_bash(): + blocks = parse_tool_blocks('```bash\nread_file notes/todo.md\necho done\n```') + + assert len(blocks) == 1 + assert blocks[0].tool_type == "bash" + assert "read_file notes/todo.md" in blocks[0].content + + +def test_nontrivial_python_read_file_name_stays_python_code(): + blocks = parse_tool_blocks('```python\nprint(read_file("notes/todo.md"))\n```') + + assert len(blocks) == 1 + assert blocks[0].tool_type == "python" + + +def test_strip_tool_blocks_removes_rescued_read_file_fence(): + text = 'Opening file:\n```bash\nread_file "notes/todo.md"\n```\nDone.' + + cleaned = strip_tool_blocks(text) + + assert "```" not in cleaned + assert "read_file" not in cleaned + assert "Opening file:" in cleaned + assert "Done." in cleaned