fix(agent): skill-prescribed tools never reach the model's schema list (#4008)

* Agent: make skill-prescribed tools actually callable The skill index and matched-skill procedures are injected into the prompt, but tool selection never followed: manage_skills wasn't in the RAG-selected schema list (so the model substituted manage_memory), and a matched skill could prescribe tools (grep, read_file) the model had no schema for. Now: - manage_skills rides along whenever the owner has any skills indexed - a Jaccard-matched skill's requires_toolsets join the selection - viewing a skill mid-turn via manage_skills unlocks its requires_toolsets for subsequent rounds - admin-intent turns send _ADMIN_TOOLS schemas, matching the prompt text _build_base_prompt already advertises - index_for(active_toolsets=None) no longer hides requires_toolsets skills from callers that don't know the active set Co-Authored-By: Claude Fable 5 <noreply@anthropic.com> * Agent: validate skill requires_toolsets against known tools, not TOOL_SECTIONS grep/glob/ls ship as function schemas without a prompt-prose section, so gating on TOOL_SECTIONS silently dropped them from a skill's requires_toolsets. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com> --------- Co-authored-by: Claude Fable 5 <noreply@anthropic.com>
2026-06-15 09:15:29 -04:00 · 2026-06-15 06:32:43 -05:00
parent e7abb7559d
commit cd02ac7ef6
3 changed files with 191 additions and 5 deletions
@@ -603,7 +603,6 @@ class SkillsManager:
        escalation) — those are work-in-progress and pollute the
        prompt with half-finished procedures.
        """
-        active_toolsets = active_toolsets or []
        out = []
        for s in self.load(owner=owner):
            status = s.get("status")
@@ -617,13 +616,16 @@ class SkillsManager:
            # Platform gating
            if platform and s.get("platforms") and platform not in s["platforms"]:
                continue
-            # requires_toolsets: hide unless every required toolset is active
+            # requires_toolsets: hide unless every required toolset is active.
+            # active_toolsets=None means the caller doesn't know the active
+            # set (API listings, chat preface) — don't gate in that case;
+            # only an explicit list filters.
            req = s.get("requires_toolsets") or []
-            if req and not all(t in active_toolsets for t in req):
+            if req and active_toolsets is not None and not all(t in active_toolsets for t in req):
                continue
            # fallback_for_toolsets: hide when any of those toolsets is active
            fb = s.get("fallback_for_toolsets") or []
-            if fb and any(t in active_toolsets for t in fb):
+            if fb and active_toolsets and any(t in active_toolsets for t in fb):
                continue
            out.append({
                "name": s["name"],
@@ -1904,6 +1904,44 @@ async def stream_agent_loop(
    if _relevant_tools is not None and active_document is not None:
        _relevant_tools.update({"edit_document", "update_document", "suggest_document"})

+    # The skill index injected by _build_system_prompt tells the model to
+    # call `manage_skills action=view`, and Jaccard-matched skills are pasted
+    # into the prompt as procedures to follow — but neither path goes through
+    # tool selection, so the model can be handed a procedure naming tools
+    # (grep, read_file, ...) that aren't in its schema list. Keep the schemas
+    # in lockstep: manage_skills is callable whenever any skill is indexed,
+    # and a matched skill's declared requires_toolsets ride along with it.
+    if not guide_only and _relevant_tools is not None:
+        try:
+            from services.memory.skills import SkillsManager
+            from src.constants import DATA_DIR
+            _skills_on = True
+            try:
+                from routes.prefs_routes import _load_for_user as _load_prefs
+                _skills_on = (_load_prefs(owner) or {}).get("skills_enabled", True)
+            except Exception:
+                pass
+            _sm = SkillsManager(DATA_DIR)
+            _owner_skills = _sm.load(owner=owner) if _skills_on else []
+            if _owner_skills:
+                _relevant_tools.add("manage_skills")
+                if _retrieval_query:
+                    # Validate against every known executable tool, not just
+                    # TOOL_SECTIONS — code-nav tools (grep/glob/ls) ship as
+                    # schemas without a prompt-prose section.
+                    from src.tool_policy import known_tool_names
+                    _known = known_tool_names()
+                    for _sk in _sm.get_relevant_skills(
+                        _retrieval_query, skills=_owner_skills,
+                        threshold=0.25, max_items=3,
+                    ):
+                        _relevant_tools.update(
+                            t for t in (_sk.get("requires_toolsets") or [])
+                            if t in _known
+                        )
+        except Exception as _e:
+            logger.debug(f"[tool-rag] skill-aware tool include skipped: {_e}")
+
    if _relevant_tools is not None:
        logger.info("[agent-intent] selected_tools=%s", sorted(_relevant_tools)[:50])

@@ -2167,9 +2205,17 @@ async def stream_agent_loop(
        elif _is_api_model:
            # Filter schemas by RAG-selected tools (if available)
            if _relevant_tools:
+                # _build_base_prompt unions _ADMIN_TOOLS into the prompt
+                # sections when admin intent fires — the schema list must
+                # offer the same names, or the model reads prose describing
+                # tools it cannot call and substitutes the nearest schema
+                # it does have (e.g. manage_memory for manage_skills).
+                _schema_names = set(_relevant_tools)
+                if _needs_admin:
+                    _schema_names |= _ADMIN_TOOLS
                base_schemas = [
                    s for s in FUNCTION_TOOL_SCHEMAS
-                    if s.get("function", {}).get("name") in _relevant_tools
+                    if s.get("function", {}).get("name") in _schema_names
                ]
                _mcp_filtered = [
                    s for s in mcp_schemas
@@ -2705,6 +2751,46 @@ async def stream_agent_loop(
                    )
                desc, result = await _tool_task

+            # A skill the model just loaded can prescribe tools that weren't
+            # RAG-selected this turn (declared via requires_toolsets in its
+            # frontmatter). Union them into the selection so the NEXT round's
+            # schema list includes them — otherwise the model reads "use
+            # grep" from the skill it fetched but has no grep schema to call.
+            if (
+                block.tool_type == "manage_skills"
+                and _relevant_tools is not None
+                and not result.get("error")
+            ):
+                _ms_args = {}
+                _ms_raw = (block.content or "").strip()
+                if _ms_raw.startswith("{"):
+                    try:
+                        _ms_args = json.loads(_ms_raw)
+                    except json.JSONDecodeError:
+                        _ms_args = {}
+                _ms_name = str(_ms_args.get("name", "") or "").strip()
+                if _ms_name and _ms_args.get("action") in ("view", "view_ref"):
+                    try:
+                        from services.memory.skills import SkillsManager as _SkM
+                        from src.constants import DATA_DIR as _DD
+                        from src.tool_policy import known_tool_names as _ktn
+                        _known = _ktn()
+                        for _sk in _SkM(_DD).load(owner=owner):
+                            if _sk.get("name") == _ms_name:
+                                _new = {
+                                    t for t in (_sk.get("requires_toolsets") or [])
+                                    if t in _known and t not in _relevant_tools
+                                }
+                                if _new:
+                                    _relevant_tools.update(_new)
+                                    logger.info(
+                                        "[tool-rag] skill '%s' unlocked tools for next round: %s",
+                                        _ms_name, sorted(_new),
+                                    )
+                                break
+                    except Exception as _e:
+                        logger.debug(f"skill requires_toolsets unlock skipped: {_e}")
+
            # Extract structured web sources from web_search tool output.
            # web_search returns {"output": ..., "exit_code": 0}; check "output"
            # first so the <!-- SOURCES:…--> marker is found and stripped even
@@ -0,0 +1,98 @@
+"""index_for() toolset gating: requires_toolsets must only filter when the
+caller provides an explicit active-toolset list.
+
+Callers that don't know the active tool set (API skill listings, the chat
+preface) pass active_toolsets=None. The old behavior coerced None to [] and
+hid every skill that declared requires_toolsets — so a skill like a local
+notes lookup that needs grep + read_file silently vanished from the index
+the moment it declared its tool needs. None now means "don't gate".
+"""
+
+import sys
+from pathlib import Path
+from unittest.mock import MagicMock
+
+# ── module-load stubbing (matches other tests in this repo) ──────────
+for _mod in ("sqlalchemy", "sqlalchemy.orm", "sqlalchemy.ext", "sqlalchemy.ext.declarative"):
+    if _mod not in sys.modules:
+        try:
+            __import__(_mod)
+        except ImportError:
+            sys.modules[_mod] = MagicMock()
+
+from services.memory.skills import SkillsManager  # noqa: E402
+
+
+def _write_skill_md(skills_root: Path, name: str, *, requires: str = "",
+                    fallback: str = "") -> Path:
+    skill_dir = skills_root / "general" / name
+    skill_dir.mkdir(parents=True, exist_ok=True)
+    fm = [
+        "---",
+        f"name: {name}",
+        "description: test skill",
+        "version: 1.0.0",
+        "category: general",
+        "tags: []",
+    ]
+    if requires:
+        fm.append(f"requires_toolsets: [{requires}]")
+    if fallback:
+        fm.append(f"fallback_for_toolsets: [{fallback}]")
+    fm += [
+        "status: published",
+        "confidence: 0.9",
+        "source: learned",
+        "created: 2026-01-01T00:00:00Z",
+        "---",
+        "",
+        "## When to Use",
+        "- test",
+        "",
+        "## Procedure",
+        "1. step 1",
+        "",
+    ]
+    path = skill_dir / "SKILL.md"
+    path.write_text("\n".join(fm), encoding="utf-8")
+    return path
+
+
+def _names(idx):
+    return {s["name"] for s in idx}
+
+
+def test_requires_toolsets_not_gated_when_active_set_unknown(tmp_path):
+    (tmp_path / "skills").mkdir()
+    _write_skill_md(tmp_path / "skills", "notes-lookup", requires="grep, read_file")
+    sm = SkillsManager(str(tmp_path))
+
+    # None = caller doesn't know the active tool set → no gating.
+    assert "notes-lookup" in _names(sm.index_for())
+    assert "notes-lookup" in _names(sm.index_for(active_toolsets=None))
+
+
+def test_requires_toolsets_gates_on_explicit_list(tmp_path):
+    (tmp_path / "skills").mkdir()
+    _write_skill_md(tmp_path / "skills", "notes-lookup", requires="grep, read_file")
+    sm = SkillsManager(str(tmp_path))
+
+    # Explicit list missing a required tool → hidden.
+    assert "notes-lookup" not in _names(sm.index_for(active_toolsets=["grep"]))
+    assert "notes-lookup" not in _names(sm.index_for(active_toolsets=[]))
+    # All required tools active → visible.
+    assert "notes-lookup" in _names(
+        sm.index_for(active_toolsets=["grep", "read_file", "ls"]))
+
+
+def test_fallback_for_toolsets_unaffected_by_none(tmp_path):
+    (tmp_path / "skills").mkdir()
+    _write_skill_md(tmp_path / "skills", "web-fallback", fallback="web_search")
+    sm = SkillsManager(str(tmp_path))
+
+    # Fallback skills hide only when the toolset they substitute for is
+    # known to be active.
+    assert "web-fallback" in _names(sm.index_for(active_toolsets=None))
+    assert "web-fallback" in _names(sm.index_for(active_toolsets=[]))
+    assert "web-fallback" not in _names(
+        sm.index_for(active_toolsets=["web_search"]))