diff --git a/src/tool_parsing.py b/src/tool_parsing.py index 3f296c2e6..97d3f3477 100644 --- a/src/tool_parsing.py +++ b/src/tool_parsing.py @@ -188,6 +188,12 @@ _MISFENCED_WEB_TOOL_NAMES = { "fetch_url": "web_fetch", } +_RAW_WEB_JSON_TOOL_RE = re.compile( + r"\b(?:web_search|websearch|google_search|google_search_retrieval|google_search_grounding)\b", + re.IGNORECASE, +) +_RAW_WEB_JSON_ALLOWED_KEYS = {"query", "queries", "time_filter", "freshness", "max_pages"} + # --------------------------------------------------------------------------- # Parsing functions @@ -279,6 +285,73 @@ def _parse_misfenced_web_lookup(content: str) -> Optional[ToolBlock]: return None return ToolBlock("web_fetch", url) + +def _coerce_raw_web_query(value) -> Optional[str]: + if isinstance(value, str) and value.strip(): + return value.strip() + if isinstance(value, list): + for item in value: + if isinstance(item, str) and item.strip(): + return item.strip() + return None + + +def _raw_web_json_to_tool_block(payload) -> Optional[ToolBlock]: + if not isinstance(payload, dict): + return None + if set(payload) - _RAW_WEB_JSON_ALLOWED_KEYS: + return None + + query = _coerce_raw_web_query(payload.get("query")) + if not query: + query = _coerce_raw_web_query(payload.get("queries")) + if not query: + return None + + content = {"query": query} + for key in ("time_filter", "freshness"): + value = payload.get(key) + if isinstance(value, str) and value.strip().lower() in ("day", "week", "month", "year"): + content[key] = value.strip().lower() + + max_pages = payload.get("max_pages") + if isinstance(max_pages, int) and 1 <= max_pages <= 10: + content["max_pages"] = max_pages + + if len(content) == 1: + return ToolBlock("web_search", query) + return ToolBlock("web_search", json.dumps(content)) + + +def _parse_raw_web_json_lookup(text: str) -> Optional[tuple[ToolBlock, tuple[int, int]]]: + """Recover local text-model web_search calls emitted as prose + bare JSON. + + Some non-native tool models leak the intended call as: + + Need to do web_search for ... + {"query": "...", "time_filter": "week"} + + Keep this narrower than fenced/tool markup: it only runs when a known web + tool name appears shortly before a JSON object shaped like web_search args. + """ + if not isinstance(text, str): + return None + + decoder = json.JSONDecoder() + for mention in _RAW_WEB_JSON_TOOL_RE.finditer(text): + search_start = mention.end() + search_end = min(len(text), search_start + 1200) + for brace in re.finditer(r"\{", text[search_start:search_end]): + start = search_start + brace.start() + try: + parsed, end = decoder.raw_decode(text[start:]) + except json.JSONDecodeError: + continue + block = _raw_web_json_to_tool_block(parsed) + if block: + return block, (start, start + end) + return None + def _parse_tool_call_block(raw: str) -> Optional[ToolBlock]: """Parse a [TOOL_CALL] block into a ToolBlock. @@ -436,6 +509,8 @@ def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]: 3. XML-style / blocks 4. blocks (MiniMax-M2.5 style) 5. DeepSeek DSML markup (normalized to first) + 6. Non-native local model fallback: prose mentioning web_search followed by + bare JSON args, e.g. {"query":"...", "time_filter":"week"} `skip_fenced`: when True, Pattern 1 (fenced ```bash/```python/```json code blocks) is not matched at all. Native function-calling models (GPT/Claude/ @@ -509,6 +584,12 @@ def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]: if block: blocks.append(block) + # Pattern 6: local text-model web_search call leaked as prose + bare JSON. + if not blocks and not skip_fenced: + raw_web_json = _parse_raw_web_json_lookup(text) + if raw_web_json: + blocks.append(raw_web_json[0]) + return blocks @@ -532,6 +613,11 @@ def strip_tool_blocks(text: str, skip_fenced: bool = False) -> str: cleaned = _TOOL_CALL_RE.sub('', cleaned) cleaned = _XML_TOOL_CALL_RE.sub('', cleaned) cleaned = _TOOL_CODE_RE.sub('', cleaned) + if not skip_fenced: + raw_web_json = _parse_raw_web_json_lookup(cleaned) + if raw_web_json: + _, (start, end) = raw_web_json + cleaned = cleaned[:start] + cleaned[end:] # Strip bare blocks not wrapped in cleaned = re.sub(r'', '', cleaned, flags=re.DOTALL | re.IGNORECASE) cleaned = re.sub(r'\n{3,}', '\n\n', cleaned) diff --git a/tests/test_web_search_raw_json_tool_call.py b/tests/test_web_search_raw_json_tool_call.py new file mode 100644 index 000000000..3c68c2ed2 --- /dev/null +++ b/tests/test_web_search_raw_json_tool_call.py @@ -0,0 +1,71 @@ +"""Local text models can leak web_search calls as prose plus bare JSON. + +gpt-oss-20b sometimes writes: + + Need to do web_search for ... + {"query":"...", "time_filter":"week"} + +That is an intended tool call in non-native/textual tool mode, but older parsing +only recognized fenced blocks, [TOOL_CALL], XML invoke, and tool_code markup. +""" +import json +import sys +from unittest.mock import MagicMock + +for mod in ['src.agent_tools', 'src.tool_parsing', 'src.tool_schemas', 'src.tool_execution']: + sys.modules.pop(mod, None) +for mod in [ + 'sqlalchemy', 'sqlalchemy.orm', 'sqlalchemy.ext', 'sqlalchemy.ext.declarative', + 'sqlalchemy.ext.hybrid', 'sqlalchemy.sql', 'sqlalchemy.sql.expression', + 'src.database', 'core.models', 'core.database', 'core.auth' +]: + if mod not in sys.modules: + sys.modules[mod] = MagicMock() + +import src.agent_tools # noqa: E402, F401 +from src.tool_parsing import parse_tool_blocks, strip_tool_blocks # noqa: E402 + + +def test_raw_json_after_web_search_phrase_runs_as_web_search(): + text = ( + "Need to do web_search for best chocolate chip cookies. Use web_search function.\n\n" + '{"query":"best chocolate chip cookie recipe","time_filter":"week"}' + ) + + blocks = parse_tool_blocks(text) + + assert len(blocks) == 1 + assert blocks[0].tool_type == "web_search" + payload = json.loads(blocks[0].content) + assert payload == { + "query": "best chocolate chip cookie recipe", + "time_filter": "week", + } + + +def test_raw_json_without_web_tool_name_is_ignored(): + text = 'Here is a saved search config:\n\n{"query":"private customer name"}' + + assert parse_tool_blocks(text) == [] + + +def test_raw_json_fallback_is_disabled_for_native_parser_gate(): + text = ( + "Need to do web_search for best chocolate chip cookies.\n\n" + '{"query":"best chocolate chip cookie recipe"}' + ) + + assert parse_tool_blocks(text, skip_fenced=True) == [] + + +def test_strip_tool_blocks_removes_executed_raw_json(): + text = ( + "Need to do web_search for best chocolate chip cookies. Use web_search function.\n\n" + '{"query":"best chocolate chip cookie recipe","time_filter":"week"}' + ) + + cleaned = strip_tool_blocks(text) + + assert '{"query"' not in cleaned + assert "best chocolate chip cookie recipe" not in cleaned + assert "Need to do web_search" in cleaned