fix(tasks): offer shell/file tools to scheduled task agents by default (#4398)

The scheduled-task runner built the agent's tool set from RAG retrieval plus ASSISTANT_ALWAYS_AVAILABLE. Neither includes bash/python (nor the file tools), and no keyword hint force-includes them, so a task only saw the shell when the tool-embedding index happened to surface it. On hosts where that index is empty or degraded (e.g. a fresh Docker deploy), retrieval returns nothing and the task agent never receives bash/python — telling the user the shell is disabled even for an admin owner. Offer the shell/file group to task agents by default, mirroring the chat agent where these are on unless a privilege or global setting turns them off. The existing blocked_tools_for_owner() gate in stream_agent_loop still strips the whole group for non-admin multi-user owners and only admits it for admins and single-user (AUTH_ENABLED=false) deployments, so this changes what is offered, not who is allowed. A crew that defines an explicit enabled_tools allowlist still has its restriction honored. Also merge the operator's global disabled_tools setting into the scheduler's disabled set before composing relevant_tools and before entering the agent loop, matching what chat already does. Without it, the global tool-disable contract did not reach unattended scheduled tasks: an admin or AUTH_ENABLED=false task could still see and call shell/file tools the operator had turned off globally, since the prompt/schema/execution gates only enforce the disabled tools passed in.
2026-06-17 10:15:27 -04:00 · 2026-06-16 18:57:30 +05:30
parent a36b423a4e
commit dd20c2bc75
2 changed files with 201 additions and 8 deletions
@@ -19,6 +19,34 @@ def _utcnow() -> datetime:
    return datetime.now(timezone.utc).replace(tzinfo=None)


+# Shell/file tools a scheduled task's agent should be offered by default,
+# mirroring the chat agent (where these are on unless a privilege or global
+# setting turns them off). The RAG tool selector + ASSISTANT_ALWAYS_AVAILABLE
+# never include bash/python, so on a host with an empty/degraded tool-embedding
+# index a task could not run shell or Python even for an admin owner. Offering
+# them here is safe: stream_agent_loop's blocked_tools_for_owner() still strips
+# this whole group for non-admin multi-user owners, and only admits it for
+# admins and single-user (AUTH_ENABLED=false) deployments.
+TASK_DEFAULT_SHELL_TOOLS = frozenset({
+    "bash", "python", "read_file", "write_file", "edit_file",
+    "grep", "glob", "ls", "get_workspace",
+})
+
+
+def compose_task_relevant_tools(rag_tools, assistant_always, disabled_tools):
+    """Compose the relevant-tools set offered to a scheduled task's agent.
+
+    Unions the RAG-retrieved tools, the assistant's always-available set, and
+    the default shell/file group, then removes anything the task's crew
+    explicitly disabled via its `enabled_tools` allowlist. Per-owner admin
+    gating is applied later by stream_agent_loop (blocked_tools_for_owner).
+    """
+    tools = set(rag_tools) | set(assistant_always) | set(TASK_DEFAULT_SHELL_TOOLS)
+    if disabled_tools:
+        tools -= set(disabled_tools)
+    return tools
+
+
 # ── Shared TTL cache (singleflight) ────────────────────────────────────────
 # Multiple scheduled tasks firing in the same minute often need the same
 # external data (Miniflux unreads, MCP tool snapshots, etc.). This cache
@@ -1391,17 +1419,30 @@ class TaskScheduler:
            time_str = _utcnow().strftime("%A, %B %d %Y, %H:%M UTC")
        system_prompt = f"Current time: {time_str}\n\n{system_prompt}"

-        # Compute tool filter from CrewMember.enabled_tools if set
-        disabled_tools = None
+        # Compute the disabled-tools set: the crew's enabled_tools allowlist
+        # (inverted) plus the operator's global disabled_tools setting. The
+        # global list must be merged here — chat does the same merge before
+        # entering the agent loop (routes/chat_routes.py) — otherwise an admin
+        # or AUTH_ENABLED=false scheduled task would still see and call shell/
+        # file tools after the operator disabled them globally, because the
+        # prompt/schema/execution gates only enforce what is passed in.
+        disabled_tools: set[str] = set()
        if crew and crew.enabled_tools:
            try:
                enabled = json.loads(crew.enabled_tools)
                if isinstance(enabled, list) and enabled:
                    from src.tool_index import BUILTIN_TOOL_DESCRIPTIONS
                    all_tools = set(BUILTIN_TOOL_DESCRIPTIONS.keys())
-                    disabled_tools = all_tools - set(enabled)
+                    disabled_tools |= all_tools - set(enabled)
            except Exception:
                pass
+        try:
+            from src.settings import get_setting
+            _global_disabled = get_setting("disabled_tools", [])
+            if isinstance(_global_disabled, list):
+                disabled_tools.update(_global_disabled)
+        except Exception:
+            pass

        # RAG-select relevant tools for this prompt + always-available assistant tools.
        # Without this, all 40+ tools get sent and models hit their tool limit.
@@ -1411,10 +1452,10 @@ class TaskScheduler:
            tool_idx = get_tool_index()
            if tool_idx:
                rag_tools = tool_idx.get_tools_for_query(task.prompt or "", k=8)
-                relevant_tools = (rag_tools | ASSISTANT_ALWAYS_AVAILABLE)
-                if disabled_tools:
-                    relevant_tools -= disabled_tools
-                logger.info(f"[assistant] RAG selected {len(rag_tools)} tools + {len(ASSISTANT_ALWAYS_AVAILABLE)} always-available = {len(relevant_tools)} total for '{task.name}'")
+                relevant_tools = compose_task_relevant_tools(
+                    rag_tools, ASSISTANT_ALWAYS_AVAILABLE, disabled_tools
+                )
+                logger.info(f"[assistant] RAG selected {len(rag_tools)} tools + {len(ASSISTANT_ALWAYS_AVAILABLE)} always-available + shell/file defaults = {len(relevant_tools)} total for '{task.name}'")
        except Exception as e:
            logger.warning(f"[assistant] RAG tool selection failed, using all: {e}")

@@ -1422,7 +1463,7 @@ class TaskScheduler:
        try:
            result = await self._run_agent_loop(
                endpoint_url, model, task, session_id,
-                system_prompt=system_prompt, disabled_tools=disabled_tools,
+                system_prompt=system_prompt, disabled_tools=disabled_tools or None,
                relevant_tools=relevant_tools,
            )
        except Exception as e:
@@ -0,0 +1,152 @@
+"""Scheduled tasks must be offered shell/file tools by default.
+
+Regression for #4163: the task runner built `relevant_tools` from RAG output
+plus ASSISTANT_ALWAYS_AVAILABLE, neither of which includes bash/python. On a
+host with an empty/degraded tool-embedding index, RAG returns nothing, so a
+task agent never received the shell — even for an admin owner. The fix offers
+the shell/file group by default and lets stream_agent_loop's owner gate decide
+who actually keeps it.
+"""
+
+from types import SimpleNamespace
+
+from src.task_scheduler import (
+    TASK_DEFAULT_SHELL_TOOLS,
+    TaskScheduler,
+    compose_task_relevant_tools,
+)
+from src.tool_index import ASSISTANT_ALWAYS_AVAILABLE
+
+
+def test_assistant_always_available_lacks_shell():
+    # Pins the precondition that made the bug possible: the assistant set the
+    # task runner relied on does not contain the shell/Python tools.
+    assert "bash" not in ASSISTANT_ALWAYS_AVAILABLE
+    assert "python" not in ASSISTANT_ALWAYS_AVAILABLE
+
+
+def test_shell_offered_when_rag_returns_nothing():
+    # Degraded/empty embedding index -> rag_tools is empty (the #4163 case).
+    tools = compose_task_relevant_tools(set(), ASSISTANT_ALWAYS_AVAILABLE, None)
+    assert "bash" in tools
+    assert "python" in tools
+    assert TASK_DEFAULT_SHELL_TOOLS <= tools
+
+
+def test_assistant_and_rag_tools_preserved():
+    tools = compose_task_relevant_tools(
+        {"web_fetch"}, ASSISTANT_ALWAYS_AVAILABLE, None
+    )
+    assert "web_fetch" in tools          # RAG-selected tool kept
+    assert "manage_calendar" in tools    # assistant-always member kept
+    assert "bash" in tools               # shell default added
+
+
+def test_crew_allowlist_restriction_still_honored():
+    # A crew that defines enabled_tools yields a `disabled_tools` set
+    # (all_tools - enabled). Anything it disables must stay disabled, including
+    # the shell defaults — the task owner explicitly scoped the tools.
+    disabled = {"bash", "python", "edit_file"}
+    tools = compose_task_relevant_tools(set(), ASSISTANT_ALWAYS_AVAILABLE, disabled)
+    assert "bash" not in tools
+    assert "python" not in tools
+    assert "edit_file" not in tools
+    # Shell tools the crew did NOT disable remain available.
+    assert "read_file" in tools
+
+
+def test_offered_shell_maps_to_real_schemas_for_admin():
+    # End-to-end with the real schema list: the names we add are actual
+    # function schemas, so an admin/single-user task (nothing in disabled_tools)
+    # really does get bash/python offered to the model — not just named in prose.
+    from src.agent_loop import FUNCTION_TOOL_SCHEMAS
+
+    schema_names = {s["function"]["name"] for s in FUNCTION_TOOL_SCHEMAS}
+    offered = compose_task_relevant_tools(set(), ASSISTANT_ALWAYS_AVAILABLE, None)
+    admin_schemas = offered & schema_names  # mirrors agent_loop's relevant∩schemas
+    assert "bash" in admin_schemas
+    assert "python" in admin_schemas
+
+
+def test_non_admin_owner_block_strips_shell_end_to_end():
+    # Defense check: the runner now OFFERS shell tools, but stream_agent_loop
+    # subtracts blocked_tools_for_owner() (== NON_ADMIN_BLOCKED_TOOLS for a
+    # non-admin multi-user owner) from both the prompt and the schemas. Reusing
+    # that exact block set proves a non-admin task's model never sees the shell.
+    from src.agent_loop import FUNCTION_TOOL_SCHEMAS
+    from src.tool_security import NON_ADMIN_BLOCKED_TOOLS
+
+    schema_names = {s["function"]["name"] for s in FUNCTION_TOOL_SCHEMAS}
+    offered = compose_task_relevant_tools(set(), ASSISTANT_ALWAYS_AVAILABLE, None)
+    non_admin_schemas = (offered - set(NON_ADMIN_BLOCKED_TOOLS)) & schema_names
+    assert "bash" not in non_admin_schemas
+    assert "python" not in non_admin_schemas
+
+
+async def test_scheduled_task_honors_global_disabled_tools(monkeypatch):
+    # RaresKeY review on #4398: the runner offers the shell/file group by
+    # default, but the scheduled-task path only built disabled_tools from the
+    # crew allowlist — it never merged the operator's global disabled_tools
+    # setting. So an admin / AUTH_ENABLED=false task could still see and call
+    # bash/python after the operator turned them off globally, because the
+    # downstream prompt/schema/execution gates only enforce what is passed in.
+    #
+    # Drive the real _execute_llm_task and assert the global list reaches BOTH
+    # sides: it is stripped from relevant_tools AND passed into the agent loop.
+    global_off = ["bash", "python", "read_file"]
+
+    monkeypatch.setattr(
+        "src.settings.get_setting",
+        lambda key, default=None: list(global_off) if key == "disabled_tools" else default,
+    )
+
+    # Degraded-index stand-in that still returns one RAG hit, so we can prove
+    # non-disabled tools survive the merge.
+    class _FakeIndex:
+        def get_tools_for_query(self, query, k=8):
+            return {"web_fetch"}
+
+    monkeypatch.setattr("src.tool_index.get_tool_index", lambda: _FakeIndex())
+
+    captured = {}
+
+    async def _capture(endpoint_url, model, task, session_id, *,
+                       system_prompt=None, disabled_tools=None, relevant_tools=None):
+        captured["disabled_tools"] = disabled_tools
+        captured["relevant_tools"] = relevant_tools
+        return "done"
+
+    scheduler = TaskScheduler(session_manager=None)
+    scheduler._run_agent_loop = _capture
+
+    # No crew_member_id + a preset session/endpoint means the DB is never
+    # touched on this path, so a bare task object is enough to exercise it.
+    task = SimpleNamespace(
+        crew_member_id=None,
+        endpoint_url="http://endpoint",
+        model="util-model",
+        session_id="sess-1",
+        owner="admin",
+        prompt="back up the logs",
+        name="Nightly job",
+        max_steps=5,
+        character_id=None,
+    )
+
+    result = await scheduler._execute_llm_task(task, db=None)
+    assert result == "done"
+
+    # Enforcement side: the global list reached the agent loop, so the
+    # prompt/schema/execution gates will strip these even for an admin owner.
+    passed_disabled = captured["disabled_tools"]
+    assert passed_disabled is not None
+    assert set(global_off) <= set(passed_disabled)
+
+    # Offer side: globally-disabled tools are gone from relevant_tools, but the
+    # rest of the shell/file defaults and the RAG hit survive.
+    offered = captured["relevant_tools"]
+    assert "bash" not in offered
+    assert "python" not in offered
+    assert "read_file" not in offered
+    assert "edit_file" in offered   # shell default NOT globally disabled
+    assert "web_fetch" in offered   # RAG-selected tool preserved