fix(agent): skill-prescribed tools never reach the model's schema list (#4008)

* Agent: make skill-prescribed tools actually callable

The skill index and matched-skill procedures are injected into the
prompt, but tool selection never followed: manage_skills wasn't in the
RAG-selected schema list (so the model substituted manage_memory), and
a matched skill could prescribe tools (grep, read_file) the model had
no schema for. Now:

- manage_skills rides along whenever the owner has any skills indexed
- a Jaccard-matched skill's requires_toolsets join the selection
- viewing a skill mid-turn via manage_skills unlocks its
  requires_toolsets for subsequent rounds
- admin-intent turns send _ADMIN_TOOLS schemas, matching the prompt
  text _build_base_prompt already advertises
- index_for(active_toolsets=None) no longer hides requires_toolsets
  skills from callers that don't know the active set

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>

* Agent: validate skill requires_toolsets against known tools, not TOOL_SECTIONS

grep/glob/ls ship as function schemas without a prompt-prose section,
so gating on TOOL_SECTIONS silently dropped them from a skill's
requires_toolsets.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>

---------

Co-authored-by: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
andrewemer
2026-06-15 06:32:43 -05:00
committed by GitHub
parent e7abb7559d
commit cd02ac7ef6
3 changed files with 191 additions and 5 deletions
+6 -4
View File
@@ -603,7 +603,6 @@ class SkillsManager:
escalation) — those are work-in-progress and pollute the
prompt with half-finished procedures.
"""
active_toolsets = active_toolsets or []
out = []
for s in self.load(owner=owner):
status = s.get("status")
@@ -617,13 +616,16 @@ class SkillsManager:
# Platform gating
if platform and s.get("platforms") and platform not in s["platforms"]:
continue
# requires_toolsets: hide unless every required toolset is active
# requires_toolsets: hide unless every required toolset is active.
# active_toolsets=None means the caller doesn't know the active
# set (API listings, chat preface) — don't gate in that case;
# only an explicit list filters.
req = s.get("requires_toolsets") or []
if req and not all(t in active_toolsets for t in req):
if req and active_toolsets is not None and not all(t in active_toolsets for t in req):
continue
# fallback_for_toolsets: hide when any of those toolsets is active
fb = s.get("fallback_for_toolsets") or []
if fb and any(t in active_toolsets for t in fb):
if fb and active_toolsets and any(t in active_toolsets for t in fb):
continue
out.append({
"name": s["name"],
+87 -1
View File
@@ -1904,6 +1904,44 @@ async def stream_agent_loop(
if _relevant_tools is not None and active_document is not None:
_relevant_tools.update({"edit_document", "update_document", "suggest_document"})
# The skill index injected by _build_system_prompt tells the model to
# call `manage_skills action=view`, and Jaccard-matched skills are pasted
# into the prompt as procedures to follow — but neither path goes through
# tool selection, so the model can be handed a procedure naming tools
# (grep, read_file, ...) that aren't in its schema list. Keep the schemas
# in lockstep: manage_skills is callable whenever any skill is indexed,
# and a matched skill's declared requires_toolsets ride along with it.
if not guide_only and _relevant_tools is not None:
try:
from services.memory.skills import SkillsManager
from src.constants import DATA_DIR
_skills_on = True
try:
from routes.prefs_routes import _load_for_user as _load_prefs
_skills_on = (_load_prefs(owner) or {}).get("skills_enabled", True)
except Exception:
pass
_sm = SkillsManager(DATA_DIR)
_owner_skills = _sm.load(owner=owner) if _skills_on else []
if _owner_skills:
_relevant_tools.add("manage_skills")
if _retrieval_query:
# Validate against every known executable tool, not just
# TOOL_SECTIONS — code-nav tools (grep/glob/ls) ship as
# schemas without a prompt-prose section.
from src.tool_policy import known_tool_names
_known = known_tool_names()
for _sk in _sm.get_relevant_skills(
_retrieval_query, skills=_owner_skills,
threshold=0.25, max_items=3,
):
_relevant_tools.update(
t for t in (_sk.get("requires_toolsets") or [])
if t in _known
)
except Exception as _e:
logger.debug(f"[tool-rag] skill-aware tool include skipped: {_e}")
if _relevant_tools is not None:
logger.info("[agent-intent] selected_tools=%s", sorted(_relevant_tools)[:50])
@@ -2167,9 +2205,17 @@ async def stream_agent_loop(
elif _is_api_model:
# Filter schemas by RAG-selected tools (if available)
if _relevant_tools:
# _build_base_prompt unions _ADMIN_TOOLS into the prompt
# sections when admin intent fires — the schema list must
# offer the same names, or the model reads prose describing
# tools it cannot call and substitutes the nearest schema
# it does have (e.g. manage_memory for manage_skills).
_schema_names = set(_relevant_tools)
if _needs_admin:
_schema_names |= _ADMIN_TOOLS
base_schemas = [
s for s in FUNCTION_TOOL_SCHEMAS
if s.get("function", {}).get("name") in _relevant_tools
if s.get("function", {}).get("name") in _schema_names
]
_mcp_filtered = [
s for s in mcp_schemas
@@ -2705,6 +2751,46 @@ async def stream_agent_loop(
)
desc, result = await _tool_task
# A skill the model just loaded can prescribe tools that weren't
# RAG-selected this turn (declared via requires_toolsets in its
# frontmatter). Union them into the selection so the NEXT round's
# schema list includes them — otherwise the model reads "use
# grep" from the skill it fetched but has no grep schema to call.
if (
block.tool_type == "manage_skills"
and _relevant_tools is not None
and not result.get("error")
):
_ms_args = {}
_ms_raw = (block.content or "").strip()
if _ms_raw.startswith("{"):
try:
_ms_args = json.loads(_ms_raw)
except json.JSONDecodeError:
_ms_args = {}
_ms_name = str(_ms_args.get("name", "") or "").strip()
if _ms_name and _ms_args.get("action") in ("view", "view_ref"):
try:
from services.memory.skills import SkillsManager as _SkM
from src.constants import DATA_DIR as _DD
from src.tool_policy import known_tool_names as _ktn
_known = _ktn()
for _sk in _SkM(_DD).load(owner=owner):
if _sk.get("name") == _ms_name:
_new = {
t for t in (_sk.get("requires_toolsets") or [])
if t in _known and t not in _relevant_tools
}
if _new:
_relevant_tools.update(_new)
logger.info(
"[tool-rag] skill '%s' unlocked tools for next round: %s",
_ms_name, sorted(_new),
)
break
except Exception as _e:
logger.debug(f"skill requires_toolsets unlock skipped: {_e}")
# Extract structured web sources from web_search tool output.
# web_search returns {"output": ..., "exit_code": 0}; check "output"
# first so the <!-- SOURCES:…--> marker is found and stripped even
+98
View File
@@ -0,0 +1,98 @@
"""index_for() toolset gating: requires_toolsets must only filter when the
caller provides an explicit active-toolset list.
Callers that don't know the active tool set (API skill listings, the chat
preface) pass active_toolsets=None. The old behavior coerced None to [] and
hid every skill that declared requires_toolsets — so a skill like a local
notes lookup that needs grep + read_file silently vanished from the index
the moment it declared its tool needs. None now means "don't gate".
"""
import sys
from pathlib import Path
from unittest.mock import MagicMock
# ── module-load stubbing (matches other tests in this repo) ──────────
for _mod in ("sqlalchemy", "sqlalchemy.orm", "sqlalchemy.ext", "sqlalchemy.ext.declarative"):
if _mod not in sys.modules:
try:
__import__(_mod)
except ImportError:
sys.modules[_mod] = MagicMock()
from services.memory.skills import SkillsManager # noqa: E402
def _write_skill_md(skills_root: Path, name: str, *, requires: str = "",
fallback: str = "") -> Path:
skill_dir = skills_root / "general" / name
skill_dir.mkdir(parents=True, exist_ok=True)
fm = [
"---",
f"name: {name}",
"description: test skill",
"version: 1.0.0",
"category: general",
"tags: []",
]
if requires:
fm.append(f"requires_toolsets: [{requires}]")
if fallback:
fm.append(f"fallback_for_toolsets: [{fallback}]")
fm += [
"status: published",
"confidence: 0.9",
"source: learned",
"created: 2026-01-01T00:00:00Z",
"---",
"",
"## When to Use",
"- test",
"",
"## Procedure",
"1. step 1",
"",
]
path = skill_dir / "SKILL.md"
path.write_text("\n".join(fm), encoding="utf-8")
return path
def _names(idx):
return {s["name"] for s in idx}
def test_requires_toolsets_not_gated_when_active_set_unknown(tmp_path):
(tmp_path / "skills").mkdir()
_write_skill_md(tmp_path / "skills", "notes-lookup", requires="grep, read_file")
sm = SkillsManager(str(tmp_path))
# None = caller doesn't know the active tool set → no gating.
assert "notes-lookup" in _names(sm.index_for())
assert "notes-lookup" in _names(sm.index_for(active_toolsets=None))
def test_requires_toolsets_gates_on_explicit_list(tmp_path):
(tmp_path / "skills").mkdir()
_write_skill_md(tmp_path / "skills", "notes-lookup", requires="grep, read_file")
sm = SkillsManager(str(tmp_path))
# Explicit list missing a required tool → hidden.
assert "notes-lookup" not in _names(sm.index_for(active_toolsets=["grep"]))
assert "notes-lookup" not in _names(sm.index_for(active_toolsets=[]))
# All required tools active → visible.
assert "notes-lookup" in _names(
sm.index_for(active_toolsets=["grep", "read_file", "ls"]))
def test_fallback_for_toolsets_unaffected_by_none(tmp_path):
(tmp_path / "skills").mkdir()
_write_skill_md(tmp_path / "skills", "web-fallback", fallback="web_search")
sm = SkillsManager(str(tmp_path))
# Fallback skills hide only when the toolset they substitute for is
# known to be active.
assert "web-fallback" in _names(sm.index_for(active_toolsets=None))
assert "web-fallback" in _names(sm.index_for(active_toolsets=[]))
assert "web-fallback" not in _names(
sm.index_for(active_toolsets=["web_search"]))