mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-18 10:45:31 -04:00
fix(agent): index api_call so RAG tool selection can retrieve it (#3923)
* fix(agent): index api_call so RAG tool selection can retrieve it api_call exists in FUNCTION_TOOL_SCHEMAS and the agent's system prompt advertises configured API integrations, but the tool had no entry in BUILTIN_TOOL_DESCRIPTIONS. RAG tool selection embeds those descriptions and retrieves the top-K per message, so a tool without one can never be selected: the agent claims it can call Home Assistant/Miniflux/Gitea/etc. and then never receives the api_call schema (unless the Personal Assistant ASSISTANT_ALWAYS_AVAILABLE path applies). Add a retrieval-rich description for api_call, plus an ast-based parity test asserting every FUNCTION_TOOL_SCHEMAS tool has an index description so the next added tool cannot silently drift the same way. Fixes #3794 * fix(agent): route API-integration intent to api_call at selection time Addresses review (RaresKeY) on #3923: indexing api_call in the ToolIndex description was necessary but not sufficient — the #3794 repro ('Use the api_call tool to call Home Assistant GET /api/states') matched no domain in _classify_agent_request, classified as low-signal, so the agent loop skipped retrieval entirely and the schema filter sent only ALWAYS_AVAILABLE (manage_memory/ask_user/update_plan). api_call never reached the model. - _classify_agent_request: detect API-integration intent (api_call, integration(s), Home Assistant/Miniflux/Gitea/Linkding/Jellyfin) -> new 'integrations' domain, so the turn is no longer low-signal. - _DOMAIN_TOOL_MAP['integrations'] = {api_call}: deterministically seeds api_call into relevant tools after retrieval, independent of embeddings. - _DOMAIN_RULES['integrations']: rule pack (required — _domain_rules_for_tools indexes _DOMAIN_RULES[domain] directly). - tool_index _KEYWORD_HINTS: parity hint for the retrieval / keyword-fallback paths. - Regression drives the real classifier -> domain-map -> FUNCTION_TOOL_SCHEMAS filter chain and asserts api_call is advertised for the #3794 prompt. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
committed by
GitHub
parent
f70db19cc6
commit
b51d83b16d
@@ -267,6 +267,10 @@ _DOMAIN_RULES = {
|
||||
- Use `resolve_contact` to look up a contact's email or phone number by name. Searches the CardDAV address book and sent email history.
|
||||
- Use `manage_contact` to list, add, update, or delete contacts in the address book.
|
||||
- Do NOT use `manage_memory` for contact lookups — contact details live in the address book, not memory.""",
|
||||
"integrations": """\
|
||||
## Integration/API rules
|
||||
- To query or control a configured service integration (Home Assistant, Miniflux, Gitea, Linkding, Jellyfin, or any other registered service), use `api_call` with the integration name, HTTP method, path, and optional JSON body.
|
||||
- Do not use shell, curl, or `app_api` to reach a user's connected integration when `api_call` is available.""",
|
||||
}
|
||||
|
||||
_DOMAIN_TOOL_MAP = {
|
||||
@@ -280,6 +284,7 @@ _DOMAIN_TOOL_MAP = {
|
||||
"files": {"bash", "python", "read_file", "write_file", "edit_file", "grep", "glob", "ls", "get_workspace"},
|
||||
"settings": {"manage_settings", "manage_endpoints", "manage_mcp", "manage_webhooks", "manage_tokens", "app_api"},
|
||||
"contacts": {"resolve_contact", "manage_contact"},
|
||||
"integrations": {"api_call"},
|
||||
}
|
||||
|
||||
def _domain_rules_for_tools(tool_names: set) -> list[str]:
|
||||
@@ -815,6 +820,15 @@ def _classify_agent_request(messages: List[Dict], last_user: str) -> Dict[str, o
|
||||
domains.add("settings")
|
||||
if has(r"\b(contact|contacts|phone|phone number|address book|vcard)\b"):
|
||||
domains.add("contacts")
|
||||
# API-integration intent — calling a configured service via the api_call
|
||||
# tool. Without this the #3794 repro ("Use the api_call tool to call Home
|
||||
# Assistant GET /api/states") matched no domain, classified as low-signal,
|
||||
# and the tool never reached the schema filter. Detect it explicitly so the
|
||||
# "integrations" domain seeds api_call deterministically (see
|
||||
# _DOMAIN_TOOL_MAP), independent of embedding retrieval.
|
||||
if has(r"\bapi[ _]call\b", r"\bintegrations?\b",
|
||||
r"\b(?:home ?assistant|miniflux|gitea|linkding|jellyfin)\b"):
|
||||
domains.add("integrations")
|
||||
|
||||
low_signal = not continuation and not domains
|
||||
return {
|
||||
|
||||
@@ -94,6 +94,7 @@ BUILTIN_TOOL_DESCRIPTIONS: Dict[str, str] = {
|
||||
"manage_endpoints": "Endpoint management: list, add, delete, enable, or disable model API endpoints.",
|
||||
"manage_mcp": "MCP server management: list, add, delete, reconnect servers, or list available tools.",
|
||||
"manage_webhooks": "Webhook management: list, add, delete, enable, or disable webhooks.",
|
||||
"api_call": "Call a configured API integration by name (Home Assistant, Miniflux, Gitea, Linkding, Jellyfin, RSS reader, git forge, bookmark manager, smart home, or any other registered service). Make a GET/POST/PUT/PATCH/DELETE request to the integration's endpoint path, with an optional JSON body. Use whenever the user asks to query or control one of their connected integrations/services.",
|
||||
"manage_tokens": "API token management: list, create, or delete API access tokens.",
|
||||
"manage_documents": "List, read, delete, or tidy documents in the editor panel. action='list' returns clickable rows (most-recent first) so the user can open any doc by clicking. action='read' (aka view/open/get) with document_id returns the content; supports offset=<N> + limit=<N> to page through large docs (response includes next_offset when more remains, so you can keep calling with offset=next_offset). action='delete' with document_id removes a doc (only way to delete). Use this for ANY 'show/read/list/open my documents/docs/files/notes' request — never shell or curl.",
|
||||
"manage_research": "List, read/open, or delete saved DEEP RESEARCH results from the Library. action='list' returns clickable [query](#research-<id>) rows (most-recent first). action='read' (aka open/view/get) with id returns the report + sources. action='delete' with id removes it. Use this for ANY 'open/read/find/delete my research / that report / the research on X' request. NOTE: this is for EXISTING research; to START new research use trigger_research.",
|
||||
@@ -414,6 +415,14 @@ class ToolIndex:
|
||||
"my settings", "change setting", "change a setting", "set setting",
|
||||
"preference", "preferences", "configure"}):
|
||||
{"manage_settings", "ui_control"},
|
||||
# API-integration intent → the api_call tool. Mirrors the agent-loop
|
||||
# "integrations" domain so api_call still surfaces on the retrieval and
|
||||
# keyword-fallback paths (not just the deterministic domain seed) when a
|
||||
# user names a connected service.
|
||||
frozenset({"api_call", "api call", "integration", "integrations",
|
||||
"home assistant", "homeassistant", "miniflux", "gitea",
|
||||
"linkding", "jellyfin"}):
|
||||
{"api_call"},
|
||||
# Managing EXISTING research in the Library — open/read/find/delete.
|
||||
frozenset({"my research", "the research", "research on", "open research",
|
||||
"read research", "find research", "delete research",
|
||||
|
||||
@@ -0,0 +1,78 @@
|
||||
"""Regression: api_call reaches the model for API-integration intent (#3794).
|
||||
|
||||
The repro prompt — "Use the api_call tool to call Home Assistant GET
|
||||
/api/states" — matched no domain in ``_classify_agent_request``, so it was
|
||||
treated as low-signal. The agent loop then skipped retrieval and the function
|
||||
schema filter sent only the always-available tools (manage_memory / ask_user /
|
||||
update_plan); ``api_call`` was never advertised to the model even though the
|
||||
ToolIndex description existed. Adding the registry description alone did not fix
|
||||
runtime selection.
|
||||
|
||||
These tests drive the real path the agent uses — classifier -> domain tool map
|
||||
(relevant tools) -> FUNCTION_TOOL_SCHEMAS filter — using the actual functions and
|
||||
constants, so they would fail on the pre-fix code (empty domains -> low-signal ->
|
||||
no api_call). They skip locally when the agent's heavy deps (httpx/embeddings)
|
||||
are absent, and run in CI where they are installed.
|
||||
"""
|
||||
import pytest
|
||||
|
||||
agent_loop = pytest.importorskip("src.agent_loop")
|
||||
|
||||
REPRO = "Use the api_call tool to call Home Assistant GET /api/states"
|
||||
|
||||
|
||||
def _selected_tools(domains):
|
||||
"""Mirror agent_loop's deterministic domain seeding (see the loop over
|
||||
`_intent['domains']` that updates `_relevant_tools` from `_DOMAIN_TOOL_MAP`)."""
|
||||
tools = set()
|
||||
for domain in domains:
|
||||
tools |= agent_loop._DOMAIN_TOOL_MAP.get(domain, set())
|
||||
return tools
|
||||
|
||||
|
||||
def _schema_names_sent(tools):
|
||||
"""Mirror the api-model schema filter that keeps only selected tools."""
|
||||
return {
|
||||
s.get("function", {}).get("name")
|
||||
for s in agent_loop.FUNCTION_TOOL_SCHEMAS
|
||||
if s.get("function", {}).get("name") in tools
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"prompt",
|
||||
[
|
||||
REPRO,
|
||||
"check my home assistant lights",
|
||||
"fetch the latest unread from miniflux via the api_call tool",
|
||||
"call my gitea integration to list repos",
|
||||
],
|
||||
)
|
||||
def test_integration_prompts_are_not_low_signal(prompt):
|
||||
intent = agent_loop._classify_agent_request([], prompt)
|
||||
assert intent["low_signal"] is False, intent
|
||||
assert "integrations" in intent["domains"], intent
|
||||
|
||||
|
||||
def test_repro_selects_and_sends_api_call_schema():
|
||||
intent = agent_loop._classify_agent_request([], REPRO)
|
||||
selected = _selected_tools(intent["domains"])
|
||||
assert "api_call" in selected, selected
|
||||
# The schema filter must actually advertise api_call to the model.
|
||||
assert "api_call" in _schema_names_sent(selected), "api_call schema must reach the model"
|
||||
|
||||
|
||||
def test_integrations_domain_has_a_rule_pack():
|
||||
# _domain_rules_for_tools indexes _DOMAIN_RULES[domain] directly, so a domain
|
||||
# present in _DOMAIN_TOOL_MAP without a _DOMAIN_RULES entry would KeyError the
|
||||
# moment api_call is selected.
|
||||
rules = agent_loop._domain_rules_for_tools({"api_call"})
|
||||
assert any("api_call" in r for r in rules), rules
|
||||
|
||||
|
||||
def test_plain_greeting_does_not_pull_api_call():
|
||||
# Guard against over-matching: an unrelated message stays low-signal and must
|
||||
# not drag the integration tool into context.
|
||||
intent = agent_loop._classify_agent_request([], "hey there, how are you")
|
||||
assert "integrations" not in intent["domains"], intent
|
||||
assert "api_call" not in _selected_tools(intent["domains"])
|
||||
@@ -0,0 +1,56 @@
|
||||
"""Every FUNCTION_TOOL_SCHEMAS tool must have a ToolIndex description.
|
||||
|
||||
Agent mode selects tools by embedding BUILTIN_TOOL_DESCRIPTIONS and
|
||||
retrieving the top-K per message. A tool that exists in tool_schemas but has
|
||||
no description entry can never be retrieved, so the agent advertises the
|
||||
capability (e.g. API integrations in the system prompt) while the schema is
|
||||
never actually sent to the model. api_call was missing exactly this way.
|
||||
|
||||
Parsed with ast instead of importing, so the test does not pull in the
|
||||
embedding/ChromaDB stack.
|
||||
"""
|
||||
import ast
|
||||
import os
|
||||
|
||||
ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
|
||||
def _assigned_value(tree, name):
|
||||
for node in tree.body:
|
||||
if isinstance(node, ast.Assign):
|
||||
if any(isinstance(t, ast.Name) and t.id == name for t in node.targets):
|
||||
return node.value
|
||||
elif isinstance(node, ast.AnnAssign):
|
||||
if isinstance(node.target, ast.Name) and node.target.id == name:
|
||||
return node.value
|
||||
raise AssertionError(f"{name} assignment not found")
|
||||
|
||||
|
||||
def _schema_tool_names():
|
||||
src = open(os.path.join(ROOT, "src", "tool_schemas.py"), encoding="utf-8").read()
|
||||
value = _assigned_value(ast.parse(src), "FUNCTION_TOOL_SCHEMAS")
|
||||
return {item["function"]["name"] for item in ast.literal_eval(value)}
|
||||
|
||||
|
||||
def _indexed_tool_names():
|
||||
src = open(os.path.join(ROOT, "src", "tool_index.py"), encoding="utf-8").read()
|
||||
value = _assigned_value(ast.parse(src), "BUILTIN_TOOL_DESCRIPTIONS")
|
||||
return {ast.literal_eval(key) for key in value.keys}
|
||||
|
||||
|
||||
def test_every_schema_tool_has_an_index_description():
|
||||
missing = _schema_tool_names() - _indexed_tool_names()
|
||||
assert not missing, (
|
||||
"Tools defined in FUNCTION_TOOL_SCHEMAS but absent from "
|
||||
f"BUILTIN_TOOL_DESCRIPTIONS (RAG can never select them): {sorted(missing)}"
|
||||
)
|
||||
|
||||
|
||||
def test_api_call_is_indexed_with_a_real_description():
|
||||
src = open(os.path.join(ROOT, "src", "tool_index.py"), encoding="utf-8").read()
|
||||
value = _assigned_value(ast.parse(src), "BUILTIN_TOOL_DESCRIPTIONS")
|
||||
descriptions = {
|
||||
ast.literal_eval(k): ast.literal_eval(v) for k, v in zip(value.keys, value.values)
|
||||
}
|
||||
assert "api_call" in descriptions
|
||||
assert len(descriptions["api_call"]) > 50
|
||||
Reference in New Issue
Block a user