Files
odysseus/tests/test_tool_rag_keyword_hints.py
Dividesbyzer0 a07fe35936 fix(agent): honor explicit web search requests
Promote explicit web-search phrasing to tool use and keep web_search/web_fetch available for that turn even when the stale web toggle is false.
2026-06-15 15:02:10 +09:00

66 lines
2.9 KiB
Python

"""Regression for issue #1707 — the agent tool-RAG force-included the entire
email toolset on any "tell me ..." query, crowding out the relevant tools so the
model believed it only had email tools and refused web/other tasks.
Root cause: `_KEYWORD_HINTS` in src/tool_index.py listed "tell" under the email
intent, and `get_tools_for_query` force-includes a hint's tools whenever any of
its keywords appears (word-boundary match). "tell" appears in a huge fraction of
requests (the reporter's was "visit <url> and tell me the title"), so email tools
were force-included for non-email queries.
These hints are deterministic string matching — no embeddings — so we can test
`get_tools_for_query` directly with retrieval stubbed out (no ChromaDB needed).
"""
from src.tool_index import ToolIndex, ALWAYS_AVAILABLE
_EMAIL_TOOLS = {
"list_emails", "read_email", "send_email", "reply_to_email",
"bulk_email", "delete_email", "archive_email", "mark_email_read",
}
def _index_without_embeddings():
"""A ToolIndex whose retrieval returns nothing, so get_tools_for_query
exercises only the deterministic base + keyword-hint logic."""
ti = ToolIndex.__new__(ToolIndex) # skip __init__ (no ChromaDB/fastembed)
ti.retrieve = lambda query, k=8: []
return ti
def test_tell_in_web_query_does_not_force_email_tools():
"""The #1707 repro: a web request that merely contains the word 'tell' must
NOT drag in the email toolset."""
ti = _index_without_embeddings()
q = "visit https://www.youtube.com/user/PewDiePie and tell me the title of his latest video"
tools = ti.get_tools_for_query(q)
leaked = _EMAIL_TOOLS & tools
assert not leaked, f"'tell me' must not force-include email tools, got {sorted(leaked)}"
# web_search / web_fetch are always-available and must remain present.
assert "web_search" in tools and "web_fetch" in tools
def test_explicit_web_search_query_gets_web_tools_without_retrieval():
"""Explicit web-search phrasing must surface web tools even if embeddings
return nothing."""
ti = _index_without_embeddings()
tools = ti.get_tools_for_query("use web search and find a recipe for chocolate chip cookies")
assert "web_search" in tools and "web_fetch" in tools
def test_genuine_email_query_still_gets_email_tools():
"""Removing 'tell' must not break real email intent — the actual email
keywords still force-include the toolset."""
ti = _index_without_embeddings()
tools = ti.get_tools_for_query("reply to the unread email in my inbox")
assert {"reply_to_email", "send_email", "read_email"} <= tools
def test_plain_tell_request_stays_minimal():
"""A bare 'tell me a joke' must not pull in email tools either."""
ti = _index_without_embeddings()
tools = ti.get_tools_for_query("tell me a joke")
assert not (_EMAIL_TOOLS & tools)
# Always-available baseline is still there.
assert set(ALWAYS_AVAILABLE) <= tools