From 33edc40eae862127d85dbf92522a7699d30b0045 Mon Sep 17 00:00:00 2001 From: Nicholai Date: Sat, 6 Jun 2026 03:46:31 -0600 Subject: [PATCH] fix: route misfenced web lookups to web tools Fixes #3067 --- src/agent_loop.py | 11 ++- src/tool_index.py | 6 +- src/tool_parsing.py | 105 ++++++++++++++++++++++++- tests/test_fenced_invoke_no_raw_xml.py | 36 +++++++++ 4 files changed, 151 insertions(+), 7 deletions(-) diff --git a/src/agent_loop.py b/src/agent_loop.py index 9a56c0201..f3bab9d41 100644 --- a/src/agent_loop.py +++ b/src/agent_loop.py @@ -67,6 +67,7 @@ The block executes automatically and you see the output.""" _AGENT_RULES = """\ ## Rules - Only use tools when needed. Don't search for things you already know. +- For web lookup/search/latest/current requests, use `web_search` or `web_fetch`. Do NOT use `bash`, `python`, `curl`, `requests`, or scraping code for web lookup unless web tools are disabled or already failed. - These exact tags execute automatically. For showing code examples, use ```shell, ```sh, ```py, etc. instead. - Multiple tool blocks per response OK. 60s timeout per tool, 10K char output limit. - Code/content >15 lines → ```create_document (NOT in chat). Short snippets OK in chat. @@ -113,6 +114,7 @@ _API_AGENT_RULES = """\ - Prefer native tool/function calling when tools are needed. - Only call tools when they materially help answer the request. - You MUST use tools to take action — do not describe what you would do. Act, don't narrate. +- For web lookup/search/latest/current requests, call `web_search` or `web_fetch`. Do NOT use shell, Python, curl, requests, or scraping code for web lookup unless web tools are unavailable or already failed. - Keep answers concise unless the user asks for depth. - For long code or content, use document tools instead of pasting large blocks into chat. - Editing an existing document: ALWAYS use `edit_document` with find/replace. Only use `update_document` for genuine full rewrites (>50% changed) — do NOT echo the entire file back for small edits. @@ -176,7 +178,8 @@ TOOL_SECTIONS = { ```bash ``` -Run any shell command. Output is returned to you. Use for: installing packages, checking files, git, curl, system info, etc. +Run any shell command. Output is returned to you. Use for: installing packages, checking files, git, system info, process management, etc. +Do NOT use bash/curl for web lookup/search/latest/current requests when `web_search` or `web_fetch` is available. NEVER use bash to create or change files — no `>`/`>>` redirects, no heredocs (`cat > f << 'EOF'`), no `tee`, `sed -i`, `awk -i`, no `python -c` that writes. To CREATE or fully rewrite a file use `write_file`; to change part of an existing file use `edit_file`. Those show a diff and are the ONLY allowed way to write files. (bash is for read-only inspection: `ls`, `cat` to READ, `grep`, `git status`/`git diff`, builds, installs.) For LONG-running commands (package installs, pip/npm, ffmpeg, model downloads, training, builds — anything that may take more than ~20s), make the FIRST line `#!bg` to run it in the BACKGROUND. You get a job id back immediately and are automatically re-invoked with the full output when it finishes — so you never block the chat waiting. Example: ```bash @@ -190,7 +193,8 @@ NEVER pipe multi-line Python through `python -c "..."` — shell quoting eats re ```python ``` -Execute Python code. Use for computation, data processing, scripting. NOT for writing code for the user (use create_document for that). Same sandbox limits as bash — no TTY, no GUI, no `input()`; for anything the user should interact with, generate a single HTML file with inline JS instead.""", +Execute Python code. Use for computation, data processing, scripting. NOT for writing code for the user (use create_document for that). Same sandbox limits as bash — no TTY, no GUI, no `input()`; for anything the user should interact with, generate a single HTML file with inline JS instead. +Do NOT use Python/requests for web lookup/search/latest/current requests when `web_search` or `web_fetch` is available.""", "web_search": """\ ```web_search @@ -200,7 +204,8 @@ Or with JSON for fresh news: ```web_search {"query": "", "time_filter": "day"} ``` -Search the web for a SINGLE quick fact/lookup mid-task. For news / "today" / "latest" queries, pass `time_filter` ("day", "week", "month", or "year"). NOT for "research X" / "do research on X" / "look into X" requests — those mean a multi-source DEEP RESEARCH job: use `trigger_research` instead (it runs in the Deep Research sidebar and produces a full report). web_search = one quick query; trigger_research = a researched report.""", +Search the web for a SINGLE quick fact/lookup mid-task. For news / "today" / "latest" queries, pass `time_filter` ("day", "week", "month", or "year"). NOT for "research X" / "do research on X" / "look into X" requests — those mean a multi-source DEEP RESEARCH job: use `trigger_research` instead (it runs in the Deep Research sidebar and produces a full report). web_search = one quick query; trigger_research = a researched report. +Use this instead of `bash`, `curl`, `python`, `requests`, or scraping code for web lookup/search/latest/current requests.""", "web_fetch": """\ ```web_fetch diff --git a/src/tool_index.py b/src/tool_index.py index f6acd0828..b7a703571 100644 --- a/src/tool_index.py +++ b/src/tool_index.py @@ -90,9 +90,9 @@ COLLECTION_NAME = "odysseus_tool_index" # Each tool gets a searchable description that helps retrieval. # These are richer than the system prompt one-liners — they're for embedding. BUILTIN_TOOL_DESCRIPTIONS: Dict[str, str] = { - "bash": "Run shell commands on the server. Install packages, check files, git operations, curl, system info, process management, networking.", - "python": "Execute Python code for computation, data processing, math, scripting, parsing, API calls. Not for writing code for the user.", - "web_search": "Quick single web lookup for a fact, current event, or doc mid-task. NOT for 'research X' / 'do research on X' requests — those are deep-research jobs (use trigger_research). web_search = one query; trigger_research = a full researched report in the sidebar.", + "bash": "Run shell commands on the server. Install packages, check files, git operations, system info, and process management. Do not use for web lookup/search; use web_search or web_fetch when web tools are available.", + "python": "Execute Python code for computation, data processing, math, scripting, and parsing. Not for writing code for the user. Do not use for web lookup/search; use web_search or web_fetch when web tools are available.", + "web_search": "Quick single web lookup for a fact, current event, latest/current information, or doc mid-task. Use this instead of bash/curl/python/requests for web searches. NOT for 'research X' / 'do research on X' requests — those are deep-research jobs (use trigger_research). web_search = one query; trigger_research = a full researched report in the sidebar.", "web_fetch": "Fetch and read the text content of a specific URL/website the user names (e.g. 'check example.com', 'open this link'). Use when you have a concrete URL; for open-ended lookups use web_search instead.", "read_file": "Read a file from disk and return its contents. View source code, config files, logs. Supports an optional line range (offset/limit) for large files.", "grep": "Search file CONTENTS for a regex across a directory tree (ripgrep-backed, honours .gitignore). Returns file:line:match. Use to find where code/symbols/strings live — prefer over bash grep.", diff --git a/src/tool_parsing.py b/src/tool_parsing.py index 1ed997903..4d2d8e66b 100644 --- a/src/tool_parsing.py +++ b/src/tool_parsing.py @@ -5,9 +5,10 @@ Regex-based parsing of tool invocations from LLM response text. Supports fenced code blocks, [TOOL_CALL] blocks, and XML-style blocks. """ -import re +import ast import json import logging +import re from typing import List, Optional from src.agent_tools import ToolBlock, TOOL_TAGS @@ -176,11 +177,108 @@ _TOOL_NAME_MAP = { "todos": "manage_notes", } +_MISFENCED_WEB_TOOL_NAMES = { + "web_search": "web_search", + "websearch": "web_search", + "google_search": "web_search", + "google_search_retrieval": "web_search", + "google_search_grounding": "web_search", + "web_fetch": "web_fetch", + "webfetch": "web_fetch", + "fetch_url": "web_fetch", +} + # --------------------------------------------------------------------------- # Parsing functions # --------------------------------------------------------------------------- +def _literal_string(value) -> Optional[str]: + """Return a string from a small literal AST node, or None.""" + try: + parsed = ast.literal_eval(value) + except (ValueError, SyntaxError, TypeError): + return None + if isinstance(parsed, str): + return parsed.strip() + if isinstance(parsed, list): + for item in parsed: + if isinstance(item, str) and item.strip(): + return item.strip() + return None + + +def _parse_misfenced_web_lookup(content: str) -> Optional[ToolBlock]: + """Recover simple web_search/web_fetch calls wrapped in python/bash fences. + + Some local fenced-tool models write: + + ```python + web_search("latest python release") + ``` + + That is an intended tool call, not Python code. Keep this intentionally + narrow: only a single bare function call to a known web tool alias converts. + """ + try: + module = ast.parse(content.strip(), mode="exec") + except SyntaxError: + return None + if len(module.body) != 1 or not isinstance(module.body[0], ast.Expr): + return None + call = module.body[0].value + if not isinstance(call, ast.Call) or not isinstance(call.func, ast.Name): + return None + + mapped = _MISFENCED_WEB_TOOL_NAMES.get(call.func.id.lower()) + if mapped not in ("web_search", "web_fetch"): + return None + if len(call.args) > 1: + return None + + args = {} + if call.args: + key = "url" if mapped == "web_fetch" else "query" + value = _literal_string(call.args[0]) + if not value: + return None + args[key] = value + + allowed = {"query", "queries", "url", "time_filter", "freshness", "max_pages"} + for keyword in call.keywords: + if keyword.arg not in allowed: + return None + key = "query" if keyword.arg == "queries" else keyword.arg + value = _literal_string(keyword.value) + if value is not None: + args[key] = value + continue + try: + parsed = ast.literal_eval(keyword.value) + except (ValueError, SyntaxError, TypeError): + return None + if key == "max_pages" and isinstance(parsed, int): + args[key] = parsed + continue + return None + + if mapped == "web_search": + query = args.get("query") + if not query: + return None + payload = {"query": query} + for key in ("time_filter", "freshness", "max_pages"): + if key in args: + payload[key] = args[key] + if len(payload) == 1: + return ToolBlock("web_search", query) + return ToolBlock("web_search", json.dumps(payload)) + + url = args.get("url") + if not url: + return None + return ToolBlock("web_fetch", url) + def _parse_tool_call_block(raw: str) -> Optional[ToolBlock]: """Parse a [TOOL_CALL] block into a ToolBlock. @@ -363,6 +461,11 @@ def parse_tool_blocks(text: str) -> List[ToolBlock]: # a python/bash block — e.g. a hyphenated/namespaced tool name that # _XML_INVOKE_RE's \w+ can't match would otherwise be executed as code. continue + if tag in ("python", "bash"): + block = _parse_misfenced_web_lookup(content) + if block: + blocks.append(block) + continue blocks.append(ToolBlock(tag, content)) # Pattern 2: [TOOL_CALL] blocks (only if no fenced blocks found) diff --git a/tests/test_fenced_invoke_no_raw_xml.py b/tests/test_fenced_invoke_no_raw_xml.py index c23cb648e..15d195eb4 100644 --- a/tests/test_fenced_invoke_no_raw_xml.py +++ b/tests/test_fenced_invoke_no_raw_xml.py @@ -34,3 +34,39 @@ def test_plain_fenced_python_block_still_parses_as_code(): # No regression: an ordinary fenced python block (no ) still works. blocks = parse_tool_blocks('```python\nprint("hi")\n```') assert any(b.tool_type == "python" and 'print("hi")' in b.content for b in blocks), blocks + + +def test_simple_web_search_call_inside_python_fence_runs_as_web_search(): + blocks = parse_tool_blocks('```python\nweb_search("latest Python release")\n```') + assert len(blocks) == 1 + assert blocks[0].tool_type == "web_search" + assert blocks[0].content == "latest Python release" + + +def test_google_search_alias_inside_bash_fence_preserves_freshness_args(): + blocks = parse_tool_blocks( + '```bash\ngoogle_search(query="Qwen latest release", freshness="week", max_pages=7)\n```' + ) + assert len(blocks) == 1 + assert blocks[0].tool_type == "web_search" + assert '"query": "Qwen latest release"' in blocks[0].content + assert '"freshness": "week"' in blocks[0].content + assert '"max_pages": 7' in blocks[0].content + + +def test_nontrivial_python_with_web_search_name_stays_python_code(): + blocks = parse_tool_blocks('```python\nprint(web_search("latest Python release"))\n```') + assert len(blocks) == 1 + assert blocks[0].tool_type == "python" + + +def test_plain_search_function_inside_python_fence_stays_python_code(): + blocks = parse_tool_blocks('```python\nsearch("private customer name")\n```') + assert len(blocks) == 1 + assert blocks[0].tool_type == "python" + + +def test_plain_fetch_function_inside_python_fence_stays_python_code(): + blocks = parse_tool_blocks('```python\nfetch("internal-url")\n```') + assert len(blocks) == 1 + assert blocks[0].tool_type == "python"