fix(agent): index api_call so RAG tool selection can retrieve it (#3923)

* fix(agent): index api_call so RAG tool selection can retrieve it api_call exists in FUNCTION_TOOL_SCHEMAS and the agent's system prompt advertises configured API integrations, but the tool had no entry in BUILTIN_TOOL_DESCRIPTIONS. RAG tool selection embeds those descriptions and retrieves the top-K per message, so a tool without one can never be selected: the agent claims it can call Home Assistant/Miniflux/Gitea/etc. and then never receives the api_call schema (unless the Personal Assistant ASSISTANT_ALWAYS_AVAILABLE path applies). Add a retrieval-rich description for api_call, plus an ast-based parity test asserting every FUNCTION_TOOL_SCHEMAS tool has an index description so the next added tool cannot silently drift the same way. Fixes #3794 * fix(agent): route API-integration intent to api_call at selection time Addresses review (RaresKeY) on #3923: indexing api_call in the ToolIndex description was necessary but not sufficient — the #3794 repro ('Use the api_call tool to call Home Assistant GET /api/states') matched no domain in _classify_agent_request, classified as low-signal, so the agent loop skipped retrieval entirely and the schema filter sent only ALWAYS_AVAILABLE (manage_memory/ask_user/update_plan). api_call never reached the model. - _classify_agent_request: detect API-integration intent (api_call, integration(s), Home Assistant/Miniflux/Gitea/Linkding/Jellyfin) -> new 'integrations' domain, so the turn is no longer low-signal. - _DOMAIN_TOOL_MAP['integrations'] = {api_call}: deterministically seeds api_call into relevant tools after retrieval, independent of embeddings. - _DOMAIN_RULES['integrations']: rule pack (required — _domain_rules_for_tools indexes _DOMAIN_RULES[domain] directly). - tool_index _KEYWORD_HINTS: parity hint for the retrieval / keyword-fallback paths. - Regression drives the real classifier -> domain-map -> FUNCTION_TOOL_SCHEMAS filter chain and asserts api_call is advertised for the #3794 prompt. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-23 05:05:24 -04:00 · 2026-06-18 11:43:25 +03:00
parent f70db19cc6
commit b51d83b16d
4 changed files with 157 additions and 0 deletions
@@ -0,0 +1,78 @@
+"""Regression: api_call reaches the model for API-integration intent (#3794).
+
+The repro prompt — "Use the api_call tool to call Home Assistant GET
+/api/states" — matched no domain in ``_classify_agent_request``, so it was
+treated as low-signal. The agent loop then skipped retrieval and the function
+schema filter sent only the always-available tools (manage_memory / ask_user /
+update_plan); ``api_call`` was never advertised to the model even though the
+ToolIndex description existed. Adding the registry description alone did not fix
+runtime selection.
+
+These tests drive the real path the agent uses — classifier -> domain tool map
+(relevant tools) -> FUNCTION_TOOL_SCHEMAS filter — using the actual functions and
+constants, so they would fail on the pre-fix code (empty domains -> low-signal ->
+no api_call). They skip locally when the agent's heavy deps (httpx/embeddings)
+are absent, and run in CI where they are installed.
+"""
+import pytest
+
+agent_loop = pytest.importorskip("src.agent_loop")
+
+REPRO = "Use the api_call tool to call Home Assistant GET /api/states"
+
+
+def _selected_tools(domains):
+    """Mirror agent_loop's deterministic domain seeding (see the loop over
+    `_intent['domains']` that updates `_relevant_tools` from `_DOMAIN_TOOL_MAP`)."""
+    tools = set()
+    for domain in domains:
+        tools |= agent_loop._DOMAIN_TOOL_MAP.get(domain, set())
+    return tools
+
+
+def _schema_names_sent(tools):
+    """Mirror the api-model schema filter that keeps only selected tools."""
+    return {
+        s.get("function", {}).get("name")
+        for s in agent_loop.FUNCTION_TOOL_SCHEMAS
+        if s.get("function", {}).get("name") in tools
+    }
+
+
+@pytest.mark.parametrize(
+    "prompt",
+    [
+        REPRO,
+        "check my home assistant lights",
+        "fetch the latest unread from miniflux via the api_call tool",
+        "call my gitea integration to list repos",
+    ],
+)
+def test_integration_prompts_are_not_low_signal(prompt):
+    intent = agent_loop._classify_agent_request([], prompt)
+    assert intent["low_signal"] is False, intent
+    assert "integrations" in intent["domains"], intent
+
+
+def test_repro_selects_and_sends_api_call_schema():
+    intent = agent_loop._classify_agent_request([], REPRO)
+    selected = _selected_tools(intent["domains"])
+    assert "api_call" in selected, selected
+    # The schema filter must actually advertise api_call to the model.
+    assert "api_call" in _schema_names_sent(selected), "api_call schema must reach the model"
+
+
+def test_integrations_domain_has_a_rule_pack():
+    # _domain_rules_for_tools indexes _DOMAIN_RULES[domain] directly, so a domain
+    # present in _DOMAIN_TOOL_MAP without a _DOMAIN_RULES entry would KeyError the
+    # moment api_call is selected.
+    rules = agent_loop._domain_rules_for_tools({"api_call"})
+    assert any("api_call" in r for r in rules), rules
+
+
+def test_plain_greeting_does_not_pull_api_call():
+    # Guard against over-matching: an unrelated message stays low-signal and must
+    # not drag the integration tool into context.
+    intent = agent_loop._classify_agent_request([], "hey there, how are you")
+    assert "integrations" not in intent["domains"], intent
+    assert "api_call" not in _selected_tools(intent["domains"])
@@ -0,0 +1,56 @@
+"""Every FUNCTION_TOOL_SCHEMAS tool must have a ToolIndex description.
+
+Agent mode selects tools by embedding BUILTIN_TOOL_DESCRIPTIONS and
+retrieving the top-K per message. A tool that exists in tool_schemas but has
+no description entry can never be retrieved, so the agent advertises the
+capability (e.g. API integrations in the system prompt) while the schema is
+never actually sent to the model. api_call was missing exactly this way.
+
+Parsed with ast instead of importing, so the test does not pull in the
+embedding/ChromaDB stack.
+"""
+import ast
+import os
+
+ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+
+def _assigned_value(tree, name):
+    for node in tree.body:
+        if isinstance(node, ast.Assign):
+            if any(isinstance(t, ast.Name) and t.id == name for t in node.targets):
+                return node.value
+        elif isinstance(node, ast.AnnAssign):
+            if isinstance(node.target, ast.Name) and node.target.id == name:
+                return node.value
+    raise AssertionError(f"{name} assignment not found")
+
+
+def _schema_tool_names():
+    src = open(os.path.join(ROOT, "src", "tool_schemas.py"), encoding="utf-8").read()
+    value = _assigned_value(ast.parse(src), "FUNCTION_TOOL_SCHEMAS")
+    return {item["function"]["name"] for item in ast.literal_eval(value)}
+
+
+def _indexed_tool_names():
+    src = open(os.path.join(ROOT, "src", "tool_index.py"), encoding="utf-8").read()
+    value = _assigned_value(ast.parse(src), "BUILTIN_TOOL_DESCRIPTIONS")
+    return {ast.literal_eval(key) for key in value.keys}
+
+
+def test_every_schema_tool_has_an_index_description():
+    missing = _schema_tool_names() - _indexed_tool_names()
+    assert not missing, (
+        "Tools defined in FUNCTION_TOOL_SCHEMAS but absent from "
+        f"BUILTIN_TOOL_DESCRIPTIONS (RAG can never select them): {sorted(missing)}"
+    )
+
+
+def test_api_call_is_indexed_with_a_real_description():
+    src = open(os.path.join(ROOT, "src", "tool_index.py"), encoding="utf-8").read()
+    value = _assigned_value(ast.parse(src), "BUILTIN_TOOL_DESCRIPTIONS")
+    descriptions = {
+        ast.literal_eval(k): ast.literal_eval(v) for k, v in zip(value.keys, value.values)
+    }
+    assert "api_call" in descriptions
+    assert len(descriptions["api_call"]) > 50