mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-24 05:35:31 -04:00
fix(agent): parse misfenced read_file calls (#4799)
This commit is contained in:
+84
-1
@@ -308,6 +308,88 @@ def _parse_misfenced_web_lookup(content: str) -> Optional[ToolBlock]:
|
|||||||
return ToolBlock("web_fetch", url)
|
return ToolBlock("web_fetch", url)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_misfenced_read_file_lookup(content: str, *, allow_shell_style: bool = False) -> Optional[ToolBlock]:
|
||||||
|
"""Recover simple read_file calls wrapped in python/bash fences."""
|
||||||
|
stripped = content.strip()
|
||||||
|
if not stripped:
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
module = ast.parse(stripped, mode="exec")
|
||||||
|
except SyntaxError:
|
||||||
|
module = None
|
||||||
|
if module and len(module.body) == 1 and isinstance(module.body[0], ast.Expr):
|
||||||
|
call = module.body[0].value
|
||||||
|
if isinstance(call, ast.Call) and isinstance(call.func, ast.Name):
|
||||||
|
if call.func.id.lower() != "read_file" or len(call.args) > 1:
|
||||||
|
return None
|
||||||
|
args = {}
|
||||||
|
if call.args:
|
||||||
|
path = _literal_string(call.args[0])
|
||||||
|
if not path:
|
||||||
|
return None
|
||||||
|
args["path"] = path
|
||||||
|
allowed = {"path", "file", "file_path", "offset", "limit"}
|
||||||
|
for keyword in call.keywords:
|
||||||
|
if keyword.arg not in allowed:
|
||||||
|
return None
|
||||||
|
key = "path" if keyword.arg in ("file", "file_path") else keyword.arg
|
||||||
|
if key == "path":
|
||||||
|
path = _literal_string(keyword.value)
|
||||||
|
if not path:
|
||||||
|
return None
|
||||||
|
args["path"] = path
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
value = ast.literal_eval(keyword.value)
|
||||||
|
except (ValueError, SyntaxError, TypeError):
|
||||||
|
return None
|
||||||
|
if not isinstance(value, int) or value < 0:
|
||||||
|
return None
|
||||||
|
args[key] = value
|
||||||
|
if not args.get("path"):
|
||||||
|
return None
|
||||||
|
from src.tool_schemas import function_call_to_tool_block
|
||||||
|
return function_call_to_tool_block("read_file", json.dumps(args))
|
||||||
|
|
||||||
|
if not allow_shell_style:
|
||||||
|
return None
|
||||||
|
lines = [line.strip() for line in stripped.splitlines() if line.strip()]
|
||||||
|
if len(lines) != 1:
|
||||||
|
return None
|
||||||
|
match = re.fullmatch(r"read_file\s+(.+)", lines[0], re.IGNORECASE)
|
||||||
|
if not match:
|
||||||
|
return None
|
||||||
|
path = match.group(1).strip()
|
||||||
|
if not path:
|
||||||
|
return None
|
||||||
|
if path.startswith("{"):
|
||||||
|
try:
|
||||||
|
args = json.loads(path)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return None
|
||||||
|
if not isinstance(args, dict):
|
||||||
|
return None
|
||||||
|
normalized = {}
|
||||||
|
raw_path = args.get("path") or args.get("file") or args.get("file_path")
|
||||||
|
if isinstance(raw_path, str) and raw_path.strip():
|
||||||
|
normalized["path"] = raw_path.strip()
|
||||||
|
for key in ("offset", "limit"):
|
||||||
|
value = args.get(key)
|
||||||
|
if isinstance(value, int) and value >= 0:
|
||||||
|
normalized[key] = value
|
||||||
|
if not normalized.get("path"):
|
||||||
|
return None
|
||||||
|
from src.tool_schemas import function_call_to_tool_block
|
||||||
|
return function_call_to_tool_block("read_file", json.dumps(normalized))
|
||||||
|
if len(path) >= 2 and path[0] == path[-1] and path[0] in "'\"":
|
||||||
|
path = path[1:-1].strip()
|
||||||
|
if not path:
|
||||||
|
return None
|
||||||
|
return ToolBlock("read_file", path)
|
||||||
|
|
||||||
|
|
||||||
def _coerce_raw_web_query(value) -> Optional[str]:
|
def _coerce_raw_web_query(value) -> Optional[str]:
|
||||||
if isinstance(value, str) and value.strip():
|
if isinstance(value, str) and value.strip():
|
||||||
return value.strip()
|
return value.strip()
|
||||||
@@ -704,7 +786,8 @@ def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]:
|
|||||||
# _XML_INVOKE_RE's \w+ can't match would otherwise be executed as code.
|
# _XML_INVOKE_RE's \w+ can't match would otherwise be executed as code.
|
||||||
continue
|
continue
|
||||||
if tag in ("python", "bash"):
|
if tag in ("python", "bash"):
|
||||||
block = _parse_misfenced_web_lookup(content)
|
block = (_parse_misfenced_web_lookup(content)
|
||||||
|
or _parse_misfenced_read_file_lookup(content, allow_shell_style=(tag == "bash")))
|
||||||
if block:
|
if block:
|
||||||
blocks.append(block)
|
blocks.append(block)
|
||||||
continue
|
continue
|
||||||
|
|||||||
@@ -0,0 +1,70 @@
|
|||||||
|
import json
|
||||||
|
|
||||||
|
import src.agent_tools # noqa: F401 (break agent_tools<->tool_parsing import cycle)
|
||||||
|
from src.tool_parsing import parse_tool_blocks, strip_tool_blocks
|
||||||
|
|
||||||
|
|
||||||
|
def test_bash_fenced_read_file_function_call_runs_as_read_file():
|
||||||
|
blocks = parse_tool_blocks('```bash\nread_file("notes/todo.md")\n```')
|
||||||
|
|
||||||
|
assert len(blocks) == 1
|
||||||
|
assert blocks[0].tool_type == "read_file"
|
||||||
|
assert blocks[0].content == "notes/todo.md"
|
||||||
|
|
||||||
|
|
||||||
|
def test_python_fenced_read_file_function_call_runs_as_read_file():
|
||||||
|
blocks = parse_tool_blocks('```python\nread_file(path="notes/todo.md", offset=3, limit=2)\n```')
|
||||||
|
|
||||||
|
assert len(blocks) == 1
|
||||||
|
assert blocks[0].tool_type == "read_file"
|
||||||
|
assert json.loads(blocks[0].content) == {
|
||||||
|
"path": "notes/todo.md",
|
||||||
|
"offset": 3,
|
||||||
|
"limit": 2,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_bash_fenced_read_file_command_runs_as_read_file():
|
||||||
|
blocks = parse_tool_blocks('```bash\nread_file "notes/todo.md"\n```')
|
||||||
|
|
||||||
|
assert len(blocks) == 1
|
||||||
|
assert blocks[0].tool_type == "read_file"
|
||||||
|
assert blocks[0].content == "notes/todo.md"
|
||||||
|
|
||||||
|
|
||||||
|
def test_bash_fenced_read_file_json_command_runs_as_read_file():
|
||||||
|
blocks = parse_tool_blocks('```bash\nread_file {"path":"notes/todo.md","offset":1,"limit":4}\n```')
|
||||||
|
|
||||||
|
assert len(blocks) == 1
|
||||||
|
assert blocks[0].tool_type == "read_file"
|
||||||
|
assert json.loads(blocks[0].content) == {
|
||||||
|
"path": "notes/todo.md",
|
||||||
|
"offset": 1,
|
||||||
|
"limit": 4,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_multiline_bash_read_file_block_stays_bash():
|
||||||
|
blocks = parse_tool_blocks('```bash\nread_file notes/todo.md\necho done\n```')
|
||||||
|
|
||||||
|
assert len(blocks) == 1
|
||||||
|
assert blocks[0].tool_type == "bash"
|
||||||
|
assert "read_file notes/todo.md" in blocks[0].content
|
||||||
|
|
||||||
|
|
||||||
|
def test_nontrivial_python_read_file_name_stays_python_code():
|
||||||
|
blocks = parse_tool_blocks('```python\nprint(read_file("notes/todo.md"))\n```')
|
||||||
|
|
||||||
|
assert len(blocks) == 1
|
||||||
|
assert blocks[0].tool_type == "python"
|
||||||
|
|
||||||
|
|
||||||
|
def test_strip_tool_blocks_removes_rescued_read_file_fence():
|
||||||
|
text = 'Opening file:\n```bash\nread_file "notes/todo.md"\n```\nDone.'
|
||||||
|
|
||||||
|
cleaned = strip_tool_blocks(text)
|
||||||
|
|
||||||
|
assert "```" not in cleaned
|
||||||
|
assert "read_file" not in cleaned
|
||||||
|
assert "Opening file:" in cleaned
|
||||||
|
assert "Done." in cleaned
|
||||||
Reference in New Issue
Block a user