fix: route misfenced web lookups to web tools

Fixes #3067
This commit is contained in:
Nicholai
2026-06-06 03:46:31 -06:00
committed by GitHub
parent e87a1ad8d2
commit 33edc40eae
4 changed files with 151 additions and 7 deletions
+104 -1
View File
@@ -5,9 +5,10 @@ Regex-based parsing of tool invocations from LLM response text.
Supports fenced code blocks, [TOOL_CALL] blocks, and XML-style <invoke> blocks.
"""
import re
import ast
import json
import logging
import re
from typing import List, Optional
from src.agent_tools import ToolBlock, TOOL_TAGS
@@ -176,11 +177,108 @@ _TOOL_NAME_MAP = {
"todos": "manage_notes",
}
_MISFENCED_WEB_TOOL_NAMES = {
"web_search": "web_search",
"websearch": "web_search",
"google_search": "web_search",
"google_search_retrieval": "web_search",
"google_search_grounding": "web_search",
"web_fetch": "web_fetch",
"webfetch": "web_fetch",
"fetch_url": "web_fetch",
}
# ---------------------------------------------------------------------------
# Parsing functions
# ---------------------------------------------------------------------------
def _literal_string(value) -> Optional[str]:
"""Return a string from a small literal AST node, or None."""
try:
parsed = ast.literal_eval(value)
except (ValueError, SyntaxError, TypeError):
return None
if isinstance(parsed, str):
return parsed.strip()
if isinstance(parsed, list):
for item in parsed:
if isinstance(item, str) and item.strip():
return item.strip()
return None
def _parse_misfenced_web_lookup(content: str) -> Optional[ToolBlock]:
"""Recover simple web_search/web_fetch calls wrapped in python/bash fences.
Some local fenced-tool models write:
```python
web_search("latest python release")
```
That is an intended tool call, not Python code. Keep this intentionally
narrow: only a single bare function call to a known web tool alias converts.
"""
try:
module = ast.parse(content.strip(), mode="exec")
except SyntaxError:
return None
if len(module.body) != 1 or not isinstance(module.body[0], ast.Expr):
return None
call = module.body[0].value
if not isinstance(call, ast.Call) or not isinstance(call.func, ast.Name):
return None
mapped = _MISFENCED_WEB_TOOL_NAMES.get(call.func.id.lower())
if mapped not in ("web_search", "web_fetch"):
return None
if len(call.args) > 1:
return None
args = {}
if call.args:
key = "url" if mapped == "web_fetch" else "query"
value = _literal_string(call.args[0])
if not value:
return None
args[key] = value
allowed = {"query", "queries", "url", "time_filter", "freshness", "max_pages"}
for keyword in call.keywords:
if keyword.arg not in allowed:
return None
key = "query" if keyword.arg == "queries" else keyword.arg
value = _literal_string(keyword.value)
if value is not None:
args[key] = value
continue
try:
parsed = ast.literal_eval(keyword.value)
except (ValueError, SyntaxError, TypeError):
return None
if key == "max_pages" and isinstance(parsed, int):
args[key] = parsed
continue
return None
if mapped == "web_search":
query = args.get("query")
if not query:
return None
payload = {"query": query}
for key in ("time_filter", "freshness", "max_pages"):
if key in args:
payload[key] = args[key]
if len(payload) == 1:
return ToolBlock("web_search", query)
return ToolBlock("web_search", json.dumps(payload))
url = args.get("url")
if not url:
return None
return ToolBlock("web_fetch", url)
def _parse_tool_call_block(raw: str) -> Optional[ToolBlock]:
"""Parse a [TOOL_CALL] block into a ToolBlock.
@@ -363,6 +461,11 @@ def parse_tool_blocks(text: str) -> List[ToolBlock]:
# a python/bash block — e.g. a hyphenated/namespaced tool name that
# _XML_INVOKE_RE's \w+ can't match would otherwise be executed as code.
continue
if tag in ("python", "bash"):
block = _parse_misfenced_web_lookup(content)
if block:
blocks.append(block)
continue
blocks.append(ToolBlock(tag, content))
# Pattern 2: [TOOL_CALL] blocks (only if no fenced blocks found)