fix(agent): skill-prescribed tools never reach the model's schema list (#4008)

* Agent: make skill-prescribed tools actually callable The skill index and matched-skill procedures are injected into the prompt, but tool selection never followed: manage_skills wasn't in the RAG-selected schema list (so the model substituted manage_memory), and a matched skill could prescribe tools (grep, read_file) the model had no schema for. Now: - manage_skills rides along whenever the owner has any skills indexed - a Jaccard-matched skill's requires_toolsets join the selection - viewing a skill mid-turn via manage_skills unlocks its requires_toolsets for subsequent rounds - admin-intent turns send _ADMIN_TOOLS schemas, matching the prompt text _build_base_prompt already advertises - index_for(active_toolsets=None) no longer hides requires_toolsets skills from callers that don't know the active set Co-Authored-By: Claude Fable 5 <noreply@anthropic.com> * Agent: validate skill requires_toolsets against known tools, not TOOL_SECTIONS grep/glob/ls ship as function schemas without a prompt-prose section, so gating on TOOL_SECTIONS silently dropped them from a skill's requires_toolsets. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com> --------- Co-authored-by: Claude Fable 5 <noreply@anthropic.com>
2026-06-15 17:25:26 -04:00 · 2026-06-15 06:32:43 -05:00
parent e7abb7559d
commit cd02ac7ef6
3 changed files with 191 additions and 5 deletions
@@ -603,7 +603,6 @@ class SkillsManager:
        escalation) — those are work-in-progress and pollute the
        prompt with half-finished procedures.
        """
        active_toolsets = active_toolsets or []
        out = []
        for s in self.load(owner=owner):
            status = s.get("status")
@@ -617,13 +616,16 @@ class SkillsManager:
            # Platform gating
            if platform and s.get("platforms") and platform not in s["platforms"]:
                continue
-            # requires_toolsets: hide unless every required toolset is active
+            # requires_toolsets: hide unless every required toolset is active.
            # active_toolsets=None means the caller doesn't know the active
            # set (API listings, chat preface) — don't gate in that case;
            # only an explicit list filters.
            req = s.get("requires_toolsets") or []
-            if req and not all(t in active_toolsets for t in req):
+            if req and active_toolsets is not None and not all(t in active_toolsets for t in req):
                continue
            # fallback_for_toolsets: hide when any of those toolsets is active
            fb = s.get("fallback_for_toolsets") or []
-            if fb and any(t in active_toolsets for t in fb):
+            if fb and active_toolsets and any(t in active_toolsets for t in fb):
                continue
            out.append({
                "name": s["name"],
@@ -1904,6 +1904,44 @@ async def stream_agent_loop(
    if _relevant_tools is not None and active_document is not None:
        _relevant_tools.update({"edit_document", "update_document", "suggest_document"})
    # The skill index injected by _build_system_prompt tells the model to
    # call `manage_skills action=view`, and Jaccard-matched skills are pasted
    # into the prompt as procedures to follow — but neither path goes through
    # tool selection, so the model can be handed a procedure naming tools
    # (grep, read_file, ...) that aren't in its schema list. Keep the schemas
    # in lockstep: manage_skills is callable whenever any skill is indexed,
    # and a matched skill's declared requires_toolsets ride along with it.
    if not guide_only and _relevant_tools is not None:
        try:
            from services.memory.skills import SkillsManager
            from src.constants import DATA_DIR
            _skills_on = True
            try:
                from routes.prefs_routes import _load_for_user as _load_prefs
                _skills_on = (_load_prefs(owner) or {}).get("skills_enabled", True)
            except Exception:
                pass
            _sm = SkillsManager(DATA_DIR)
            _owner_skills = _sm.load(owner=owner) if _skills_on else []
            if _owner_skills:
                _relevant_tools.add("manage_skills")
                if _retrieval_query:
                    # Validate against every known executable tool, not just
                    # TOOL_SECTIONS — code-nav tools (grep/glob/ls) ship as
                    # schemas without a prompt-prose section.
                    from src.tool_policy import known_tool_names
                    _known = known_tool_names()
                    for _sk in _sm.get_relevant_skills(
                        _retrieval_query, skills=_owner_skills,
                        threshold=0.25, max_items=3,
                    ):
                        _relevant_tools.update(
                            t for t in (_sk.get("requires_toolsets") or [])
                            if t in _known
                        )
        except Exception as _e:
            logger.debug(f"[tool-rag] skill-aware tool include skipped: {_e}")
    if _relevant_tools is not None:
        logger.info("[agent-intent] selected_tools=%s", sorted(_relevant_tools)[:50])
@@ -2167,9 +2205,17 @@ async def stream_agent_loop(
        elif _is_api_model:
            # Filter schemas by RAG-selected tools (if available)
            if _relevant_tools:
                # _build_base_prompt unions _ADMIN_TOOLS into the prompt
                # sections when admin intent fires — the schema list must
                # offer the same names, or the model reads prose describing
                # tools it cannot call and substitutes the nearest schema
                # it does have (e.g. manage_memory for manage_skills).
                _schema_names = set(_relevant_tools)
                if _needs_admin:
                    _schema_names |= _ADMIN_TOOLS
                base_schemas = [
                    s for s in FUNCTION_TOOL_SCHEMAS
-                    if s.get("function", {}).get("name") in _relevant_tools
+                    if s.get("function", {}).get("name") in _schema_names
                ]
                _mcp_filtered = [
                    s for s in mcp_schemas
@@ -2705,6 +2751,46 @@ async def stream_agent_loop(
                    )
                desc, result = await _tool_task
            # A skill the model just loaded can prescribe tools that weren't
            # RAG-selected this turn (declared via requires_toolsets in its
            # frontmatter). Union them into the selection so the NEXT round's
            # schema list includes them — otherwise the model reads "use
            # grep" from the skill it fetched but has no grep schema to call.
            if (
                block.tool_type == "manage_skills"
                and _relevant_tools is not None
                and not result.get("error")
            ):
                _ms_args = {}
                _ms_raw = (block.content or "").strip()
                if _ms_raw.startswith("{"):
                    try:
                        _ms_args = json.loads(_ms_raw)
                    except json.JSONDecodeError:
                        _ms_args = {}
                _ms_name = str(_ms_args.get("name", "") or "").strip()
                if _ms_name and _ms_args.get("action") in ("view", "view_ref"):
                    try:
                        from services.memory.skills import SkillsManager as _SkM
                        from src.constants import DATA_DIR as _DD
                        from src.tool_policy import known_tool_names as _ktn
                        _known = _ktn()
                        for _sk in _SkM(_DD).load(owner=owner):
                            if _sk.get("name") == _ms_name:
                                _new = {
                                    t for t in (_sk.get("requires_toolsets") or [])
                                    if t in _known and t not in _relevant_tools
                                }
                                if _new:
                                    _relevant_tools.update(_new)
                                    logger.info(
                                        "[tool-rag] skill '%s' unlocked tools for next round: %s",
                                        _ms_name, sorted(_new),
                                    )
                                break
                    except Exception as _e:
                        logger.debug(f"skill requires_toolsets unlock skipped: {_e}")
            # Extract structured web sources from web_search tool output.
            # web_search returns {"output": ..., "exit_code": 0}; check "output"
            # first so the <!-- SOURCES:…--> marker is found and stripped even
@@ -0,0 +1,98 @@
 """index_for() toolset gating: requires_toolsets must only filter when the
 caller provides an explicit active-toolset list.
 Callers that don't know the active tool set (API skill listings, the chat
 preface) pass active_toolsets=None. The old behavior coerced None to [] and
 hid every skill that declared requires_toolsets — so a skill like a local
 notes lookup that needs grep + read_file silently vanished from the index
 the moment it declared its tool needs. None now means "don't gate".
 """
 import sys
 from pathlib import Path
 from unittest.mock import MagicMock
 # ── module-load stubbing (matches other tests in this repo) ──────────
 for _mod in ("sqlalchemy", "sqlalchemy.orm", "sqlalchemy.ext", "sqlalchemy.ext.declarative"):
    if _mod not in sys.modules:
        try:
            __import__(_mod)
        except ImportError:
            sys.modules[_mod] = MagicMock()
 from services.memory.skills import SkillsManager  # noqa: E402
 def _write_skill_md(skills_root: Path, name: str, *, requires: str = "",
                    fallback: str = "") -> Path:
    skill_dir = skills_root / "general" / name
    skill_dir.mkdir(parents=True, exist_ok=True)
    fm = [
        "---",
        f"name: {name}",
        "description: test skill",
        "version: 1.0.0",
        "category: general",
        "tags: []",
    ]
    if requires:
        fm.append(f"requires_toolsets: [{requires}]")
    if fallback:
        fm.append(f"fallback_for_toolsets: [{fallback}]")
    fm += [
        "status: published",
        "confidence: 0.9",
        "source: learned",
        "created: 2026-01-01T00:00:00Z",
        "---",
        "",
        "## When to Use",
        "- test",
        "",
        "## Procedure",
        "1. step 1",
        "",
    ]
    path = skill_dir / "SKILL.md"
    path.write_text("\n".join(fm), encoding="utf-8")
    return path
 def _names(idx):
    return {s["name"] for s in idx}
 def test_requires_toolsets_not_gated_when_active_set_unknown(tmp_path):
    (tmp_path / "skills").mkdir()
    _write_skill_md(tmp_path / "skills", "notes-lookup", requires="grep, read_file")
    sm = SkillsManager(str(tmp_path))
    # None = caller doesn't know the active tool set → no gating.
    assert "notes-lookup" in _names(sm.index_for())
    assert "notes-lookup" in _names(sm.index_for(active_toolsets=None))
 def test_requires_toolsets_gates_on_explicit_list(tmp_path):
    (tmp_path / "skills").mkdir()
    _write_skill_md(tmp_path / "skills", "notes-lookup", requires="grep, read_file")
    sm = SkillsManager(str(tmp_path))
    # Explicit list missing a required tool → hidden.
    assert "notes-lookup" not in _names(sm.index_for(active_toolsets=["grep"]))
    assert "notes-lookup" not in _names(sm.index_for(active_toolsets=[]))
    # All required tools active → visible.
    assert "notes-lookup" in _names(
        sm.index_for(active_toolsets=["grep", "read_file", "ls"]))
 def test_fallback_for_toolsets_unaffected_by_none(tmp_path):
    (tmp_path / "skills").mkdir()
    _write_skill_md(tmp_path / "skills", "web-fallback", fallback="web_search")
    sm = SkillsManager(str(tmp_path))
    # Fallback skills hide only when the toolset they substitute for is
    # known to be active.
    assert "web-fallback" in _names(sm.index_for(active_toolsets=None))
    assert "web-fallback" in _names(sm.index_for(active_toolsets=[]))
    assert "web-fallback" not in _names(
        sm.index_for(active_toolsets=["web_search"]))