From cd02ac7ef62d2f0386e67b0a5e97e5d8da643d72 Mon Sep 17 00:00:00 2001 From: andrewemer <44418992+andrewemer@users.noreply.github.com> Date: Mon, 15 Jun 2026 06:32:43 -0500 Subject: [PATCH] fix(agent): skill-prescribed tools never reach the model's schema list (#4008) * Agent: make skill-prescribed tools actually callable The skill index and matched-skill procedures are injected into the prompt, but tool selection never followed: manage_skills wasn't in the RAG-selected schema list (so the model substituted manage_memory), and a matched skill could prescribe tools (grep, read_file) the model had no schema for. Now: - manage_skills rides along whenever the owner has any skills indexed - a Jaccard-matched skill's requires_toolsets join the selection - viewing a skill mid-turn via manage_skills unlocks its requires_toolsets for subsequent rounds - admin-intent turns send _ADMIN_TOOLS schemas, matching the prompt text _build_base_prompt already advertises - index_for(active_toolsets=None) no longer hides requires_toolsets skills from callers that don't know the active set Co-Authored-By: Claude Fable 5 * Agent: validate skill requires_toolsets against known tools, not TOOL_SECTIONS grep/glob/ls ship as function schemas without a prompt-prose section, so gating on TOOL_SECTIONS silently dropped them from a skill's requires_toolsets. Co-Authored-By: Claude Fable 5 --------- Co-authored-by: Claude Fable 5 --- services/memory/skills.py | 10 ++- src/agent_loop.py | 88 ++++++++++++++++++++- tests/test_skill_index_toolset_gating.py | 98 ++++++++++++++++++++++++ 3 files changed, 191 insertions(+), 5 deletions(-) create mode 100644 tests/test_skill_index_toolset_gating.py diff --git a/services/memory/skills.py b/services/memory/skills.py index 9cfe801e1..5baaa88c5 100644 --- a/services/memory/skills.py +++ b/services/memory/skills.py @@ -603,7 +603,6 @@ class SkillsManager: escalation) — those are work-in-progress and pollute the prompt with half-finished procedures. """ - active_toolsets = active_toolsets or [] out = [] for s in self.load(owner=owner): status = s.get("status") @@ -617,13 +616,16 @@ class SkillsManager: # Platform gating if platform and s.get("platforms") and platform not in s["platforms"]: continue - # requires_toolsets: hide unless every required toolset is active + # requires_toolsets: hide unless every required toolset is active. + # active_toolsets=None means the caller doesn't know the active + # set (API listings, chat preface) — don't gate in that case; + # only an explicit list filters. req = s.get("requires_toolsets") or [] - if req and not all(t in active_toolsets for t in req): + if req and active_toolsets is not None and not all(t in active_toolsets for t in req): continue # fallback_for_toolsets: hide when any of those toolsets is active fb = s.get("fallback_for_toolsets") or [] - if fb and any(t in active_toolsets for t in fb): + if fb and active_toolsets and any(t in active_toolsets for t in fb): continue out.append({ "name": s["name"], diff --git a/src/agent_loop.py b/src/agent_loop.py index a42ec4b2e..cd49f3e9d 100644 --- a/src/agent_loop.py +++ b/src/agent_loop.py @@ -1904,6 +1904,44 @@ async def stream_agent_loop( if _relevant_tools is not None and active_document is not None: _relevant_tools.update({"edit_document", "update_document", "suggest_document"}) + # The skill index injected by _build_system_prompt tells the model to + # call `manage_skills action=view`, and Jaccard-matched skills are pasted + # into the prompt as procedures to follow — but neither path goes through + # tool selection, so the model can be handed a procedure naming tools + # (grep, read_file, ...) that aren't in its schema list. Keep the schemas + # in lockstep: manage_skills is callable whenever any skill is indexed, + # and a matched skill's declared requires_toolsets ride along with it. + if not guide_only and _relevant_tools is not None: + try: + from services.memory.skills import SkillsManager + from src.constants import DATA_DIR + _skills_on = True + try: + from routes.prefs_routes import _load_for_user as _load_prefs + _skills_on = (_load_prefs(owner) or {}).get("skills_enabled", True) + except Exception: + pass + _sm = SkillsManager(DATA_DIR) + _owner_skills = _sm.load(owner=owner) if _skills_on else [] + if _owner_skills: + _relevant_tools.add("manage_skills") + if _retrieval_query: + # Validate against every known executable tool, not just + # TOOL_SECTIONS — code-nav tools (grep/glob/ls) ship as + # schemas without a prompt-prose section. + from src.tool_policy import known_tool_names + _known = known_tool_names() + for _sk in _sm.get_relevant_skills( + _retrieval_query, skills=_owner_skills, + threshold=0.25, max_items=3, + ): + _relevant_tools.update( + t for t in (_sk.get("requires_toolsets") or []) + if t in _known + ) + except Exception as _e: + logger.debug(f"[tool-rag] skill-aware tool include skipped: {_e}") + if _relevant_tools is not None: logger.info("[agent-intent] selected_tools=%s", sorted(_relevant_tools)[:50]) @@ -2167,9 +2205,17 @@ async def stream_agent_loop( elif _is_api_model: # Filter schemas by RAG-selected tools (if available) if _relevant_tools: + # _build_base_prompt unions _ADMIN_TOOLS into the prompt + # sections when admin intent fires — the schema list must + # offer the same names, or the model reads prose describing + # tools it cannot call and substitutes the nearest schema + # it does have (e.g. manage_memory for manage_skills). + _schema_names = set(_relevant_tools) + if _needs_admin: + _schema_names |= _ADMIN_TOOLS base_schemas = [ s for s in FUNCTION_TOOL_SCHEMAS - if s.get("function", {}).get("name") in _relevant_tools + if s.get("function", {}).get("name") in _schema_names ] _mcp_filtered = [ s for s in mcp_schemas @@ -2705,6 +2751,46 @@ async def stream_agent_loop( ) desc, result = await _tool_task + # A skill the model just loaded can prescribe tools that weren't + # RAG-selected this turn (declared via requires_toolsets in its + # frontmatter). Union them into the selection so the NEXT round's + # schema list includes them — otherwise the model reads "use + # grep" from the skill it fetched but has no grep schema to call. + if ( + block.tool_type == "manage_skills" + and _relevant_tools is not None + and not result.get("error") + ): + _ms_args = {} + _ms_raw = (block.content or "").strip() + if _ms_raw.startswith("{"): + try: + _ms_args = json.loads(_ms_raw) + except json.JSONDecodeError: + _ms_args = {} + _ms_name = str(_ms_args.get("name", "") or "").strip() + if _ms_name and _ms_args.get("action") in ("view", "view_ref"): + try: + from services.memory.skills import SkillsManager as _SkM + from src.constants import DATA_DIR as _DD + from src.tool_policy import known_tool_names as _ktn + _known = _ktn() + for _sk in _SkM(_DD).load(owner=owner): + if _sk.get("name") == _ms_name: + _new = { + t for t in (_sk.get("requires_toolsets") or []) + if t in _known and t not in _relevant_tools + } + if _new: + _relevant_tools.update(_new) + logger.info( + "[tool-rag] skill '%s' unlocked tools for next round: %s", + _ms_name, sorted(_new), + ) + break + except Exception as _e: + logger.debug(f"skill requires_toolsets unlock skipped: {_e}") + # Extract structured web sources from web_search tool output. # web_search returns {"output": ..., "exit_code": 0}; check "output" # first so the marker is found and stripped even diff --git a/tests/test_skill_index_toolset_gating.py b/tests/test_skill_index_toolset_gating.py new file mode 100644 index 000000000..e977ec926 --- /dev/null +++ b/tests/test_skill_index_toolset_gating.py @@ -0,0 +1,98 @@ +"""index_for() toolset gating: requires_toolsets must only filter when the +caller provides an explicit active-toolset list. + +Callers that don't know the active tool set (API skill listings, the chat +preface) pass active_toolsets=None. The old behavior coerced None to [] and +hid every skill that declared requires_toolsets — so a skill like a local +notes lookup that needs grep + read_file silently vanished from the index +the moment it declared its tool needs. None now means "don't gate". +""" + +import sys +from pathlib import Path +from unittest.mock import MagicMock + +# ── module-load stubbing (matches other tests in this repo) ────────── +for _mod in ("sqlalchemy", "sqlalchemy.orm", "sqlalchemy.ext", "sqlalchemy.ext.declarative"): + if _mod not in sys.modules: + try: + __import__(_mod) + except ImportError: + sys.modules[_mod] = MagicMock() + +from services.memory.skills import SkillsManager # noqa: E402 + + +def _write_skill_md(skills_root: Path, name: str, *, requires: str = "", + fallback: str = "") -> Path: + skill_dir = skills_root / "general" / name + skill_dir.mkdir(parents=True, exist_ok=True) + fm = [ + "---", + f"name: {name}", + "description: test skill", + "version: 1.0.0", + "category: general", + "tags: []", + ] + if requires: + fm.append(f"requires_toolsets: [{requires}]") + if fallback: + fm.append(f"fallback_for_toolsets: [{fallback}]") + fm += [ + "status: published", + "confidence: 0.9", + "source: learned", + "created: 2026-01-01T00:00:00Z", + "---", + "", + "## When to Use", + "- test", + "", + "## Procedure", + "1. step 1", + "", + ] + path = skill_dir / "SKILL.md" + path.write_text("\n".join(fm), encoding="utf-8") + return path + + +def _names(idx): + return {s["name"] for s in idx} + + +def test_requires_toolsets_not_gated_when_active_set_unknown(tmp_path): + (tmp_path / "skills").mkdir() + _write_skill_md(tmp_path / "skills", "notes-lookup", requires="grep, read_file") + sm = SkillsManager(str(tmp_path)) + + # None = caller doesn't know the active tool set → no gating. + assert "notes-lookup" in _names(sm.index_for()) + assert "notes-lookup" in _names(sm.index_for(active_toolsets=None)) + + +def test_requires_toolsets_gates_on_explicit_list(tmp_path): + (tmp_path / "skills").mkdir() + _write_skill_md(tmp_path / "skills", "notes-lookup", requires="grep, read_file") + sm = SkillsManager(str(tmp_path)) + + # Explicit list missing a required tool → hidden. + assert "notes-lookup" not in _names(sm.index_for(active_toolsets=["grep"])) + assert "notes-lookup" not in _names(sm.index_for(active_toolsets=[])) + # All required tools active → visible. + assert "notes-lookup" in _names( + sm.index_for(active_toolsets=["grep", "read_file", "ls"])) + + +def test_fallback_for_toolsets_unaffected_by_none(tmp_path): + (tmp_path / "skills").mkdir() + _write_skill_md(tmp_path / "skills", "web-fallback", fallback="web_search") + sm = SkillsManager(str(tmp_path)) + + # Fallback skills hide only when the toolset they substitute for is + # known to be active. + assert "web-fallback" in _names(sm.index_for(active_toolsets=None)) + assert "web-fallback" in _names(sm.index_for(active_toolsets=[])) + assert "web-fallback" not in _names( + sm.index_for(active_toolsets=["web_search"]))