fix(agent): enforce guide-only tool policy (#3088)

2026-06-24 05:35:31 -04:00 · 2026-06-06 18:48:24 -06:00
parent 108ee1e32b
commit a3cb15d0a1
9 changed files with 993 additions and 207 deletions
@@ -0,0 +1,54 @@
+import pytest
+from types import SimpleNamespace
+
+from src.chat_handler import ChatHandler
+
+
+class _UploadHandler:
+    def resolve_upload(self, *_args, **_kwargs):
+        raise AssertionError("attachments must not be resolved when tool preprocessing is disabled")
+
+    def is_image_file(self, *_args, **_kwargs):
+        raise AssertionError("images must not be inspected when tool preprocessing is disabled")
+
+
+@pytest.mark.asyncio
+async def test_preprocess_can_skip_external_context_and_attachment_work(monkeypatch):
+    async def _fail_transcript(*_args, **_kwargs):
+        raise AssertionError("YouTube transcripts must not be fetched")
+
+    async def _fail_comments(*_args, **_kwargs):
+        raise AssertionError("YouTube comments must not be fetched")
+
+    monkeypatch.setattr("src.chat_handler.extract_transcript_async", _fail_transcript)
+    monkeypatch.setattr("src.chat_handler.fetch_youtube_comments", _fail_comments)
+    monkeypatch.setattr(
+        "src.chat_handler.model_supports_vision",
+        lambda *_args, **_kwargs: (_ for _ in ()).throw(
+            AssertionError("vision support must not be probed")
+        ),
+    )
+
+    handler = ChatHandler(
+        session_manager=None,
+        memory_manager=None,
+        chat_processor=None,
+        research_handler=None,
+        preset_manager=None,
+        upload_handler=_UploadHandler(),
+    )
+    sess = SimpleNamespace(model="text-only", endpoint_url="", owner="user", id="session")
+
+    enhanced, user_content, text_ctx, youtube, attachment_meta = await handler.preprocess_message(
+        "Do not use tools. https://www.youtube.com/watch?v=dQw4w9WgXcQ",
+        ["image-id"],
+        sess,
+        auto_opened_docs=[],
+        allow_tool_preprocessing=False,
+    )
+
+    assert enhanced.startswith("Do not use tools.")
+    assert user_content == enhanced
+    assert text_ctx == enhanced
+    assert youtube == []
+    assert attachment_meta == []
@@ -0,0 +1,50 @@
+from pathlib import Path
+
+
+CHAT_ROUTES = Path(__file__).resolve().parents[1] / "routes" / "chat_routes.py"
+
+
+def _source() -> str:
+    return CHAT_ROUTES.read_text(encoding="utf-8")
+
+
+def test_research_fast_path_respects_tool_policy():
+    src = _source()
+    assert "pre_context_tool_policy = build_effective_tool_policy(" in src
+    assert "allow_tool_preprocessing = not pre_context_tool_policy.block_all_tool_calls" in src
+    assert "allow_tool_preprocessing=allow_tool_preprocessing" in src
+    assert "research_blocked_by_policy = bool(" in src
+    assert 'tool_policy.blocks("trigger_research")' in src
+    assert 'tool_policy.blocks("manage_research")' in src
+    assert 'effective_do_research = bool(' in src
+    assert 'if effective_do_research:' in src
+    assert '"is_research": effective_do_research' in src
+    assert "_effective_mode = 'research' if effective_do_research else (chat_mode or 'chat')" in src
+    assert '_model_suffix = "Research" if effective_do_research else None' in src
+    assert "do_research=effective_do_research" in src
+
+
+def test_non_streaming_chat_path_uses_tool_policy_before_context_and_research():
+    src = _source()
+    chat_endpoint = src[src.index("async def chat_endpoint"):src.index("# ------------------------------------------------------------------ #", src.index("async def chat_endpoint"))]
+    assert "tool_policy = build_effective_tool_policy(last_user_message=message)" in chat_endpoint
+    assert "allow_tool_preprocessing = not tool_policy.block_all_tool_calls" in chat_endpoint
+    assert 'if not tool_policy.blocks("manage_memory"):' in chat_endpoint
+    assert "allow_tool_preprocessing=allow_tool_preprocessing" in chat_endpoint
+    assert 'tool_policy.blocks("trigger_research")' in chat_endpoint
+    assert "if use_research and not research_blocked_by_policy:" in chat_endpoint
+    assert "allow_background_extraction=not tool_policy.block_all_tool_calls" in chat_endpoint
+
+
+def test_image_generation_fast_path_checks_policy_before_tool_start():
+    src = _source()
+    policy_gate = src.index('if tool_policy.blocks("generate_image"):')
+    tool_start = src.index('"type": "tool_start", "tool": "generate_image"')
+    generator_call = src.index("do_generate_image(")
+    assert policy_gate < tool_start
+    assert policy_gate < generator_call
+
+
+def test_streaming_chat_paths_disable_background_extraction_under_policy():
+    src = _source()
+    assert src.count("allow_background_extraction=not tool_policy.block_all_tool_calls") >= 3
@@ -0,0 +1,360 @@
+import asyncio
+import json
+import sys
+from types import SimpleNamespace
+
+import src.agent_loop as al
+from src.agent_tools import ToolBlock
+from src.tool_execution import execute_tool_block
+from src.tool_policy import build_effective_tool_policy, detect_guide_only_turn
+
+
+def _collect(gen):
+    async def _run():
+        return [c async for c in gen]
+
+    return asyncio.run(_run())
+
+
+def _events(chunks):
+    out = []
+    for chunk in chunks:
+        if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
+            try:
+                out.append(json.loads(chunk[6:]))
+            except Exception:
+                pass
+    return out
+
+
+def _delta_chunk(text):
+    return "data: " + json.dumps({"delta": text}) + "\n\n"
+
+
+def _patch_loop_basics(monkeypatch):
+    monkeypatch.setattr(al, "get_setting", lambda key, default=None: default, raising=False)
+    monkeypatch.setattr(al, "get_mcp_manager", lambda: None, raising=False)
+    monkeypatch.setattr(al, "estimate_tokens", lambda *a, **k: 10, raising=False)
+
+
+def test_detects_strong_guide_only_turns():
+    assert detect_guide_only_turn("GUIDE-ONLY MODE. DO NOT USE TOOLS.")
+    assert detect_guide_only_turn("NO-TOOLS MODE.")
+    assert detect_guide_only_turn("Ask me before using tools.")
+    assert detect_guide_only_turn("You are not allowed to:\n- use tools\n- execute commands")
+
+
+def test_does_not_treat_ordinary_guidance_as_no_tools():
+    assert detect_guide_only_turn("Can you guide me through fixing this bug?") is None
+    assert detect_guide_only_turn("I have no tools installed in this project.") is None
+    assert detect_guide_only_turn("Write the script in the repo; I'll run it locally.") is None
+    assert detect_guide_only_turn("Do not run commands that write files; inspect the repo first.") is None
+    assert detect_guide_only_turn("Don't execute shell commands unless I approve them.") is None
+
+
+def test_guide_only_policy_blocks_and_hides_tools():
+    policy = build_effective_tool_policy(
+        disabled_tools={"web_search"},
+        last_user_message="GUIDE-ONLY MODE. DO NOT USE TOOLS.",
+    )
+    assert policy.mode == "guide_only"
+    assert policy.disable_mcp is True
+    assert policy.block_all_tool_calls is True
+    for tool in ("bash", "python", "web_search", "read_file"):
+        assert tool in policy.disabled_tools
+        assert tool in policy.hidden_tools
+        assert policy.blocks(tool)
+
+
+def test_normal_policy_preserves_existing_disabled_tools():
+    policy = build_effective_tool_policy(
+        disabled_tools={"web_search"},
+        last_user_message="Please check this normally.",
+    )
+    assert policy.mode == "normal"
+    assert policy.blocks("web_search")
+    assert not policy.blocks("bash")
+
+
+def test_executor_policy_backstop_blocks_tools():
+    policy = build_effective_tool_policy(last_user_message="Do not use tools.")
+    desc, result = asyncio.run(
+        execute_tool_block(ToolBlock("bash", "echo should-not-run"), tool_policy=policy)
+    )
+    assert desc == "bash: BLOCKED"
+    assert result["exit_code"] == 1
+    assert "forbade" in result["error"]
+
+
+def test_agent_loop_blocks_guide_only_fenced_tool_before_start(monkeypatch):
+    _patch_loop_basics(monkeypatch)
+    called = False
+
+    async def _fake_exec(*args, **kwargs):
+        nonlocal called
+        called = True
+        return ("bash", {"output": "ran", "exit_code": 0})
+
+    async def _fake_stream(_candidates, messages, **kwargs):
+        yield _delta_chunk("```bash\necho should-not-run\n```")
+        yield "data: [DONE]\n\n"
+
+    monkeypatch.setattr(al, "execute_tool_block", _fake_exec, raising=False)
+    monkeypatch.setattr(al, "stream_llm_with_fallback", _fake_stream, raising=False)
+
+    policy = build_effective_tool_policy(last_user_message="GUIDE-ONLY MODE. DO NOT USE TOOLS.")
+    chunks = _collect(
+        al.stream_agent_loop(
+            "http://local.test/v1",
+            "local-model",
+            [{"role": "user", "content": "GUIDE-ONLY MODE. DO NOT USE TOOLS."}],
+            max_rounds=1,
+            relevant_tools={"bash"},
+            tool_policy=policy,
+        )
+    )
+    events = _events(chunks)
+    assert called is False
+    assert not any(event.get("type") == "tool_start" for event in events)
+    blocked = [event for event in events if event.get("type") == "tool_output"]
+    assert blocked
+    assert blocked[0]["tool"] == "bash"
+    assert blocked[0]["exit_code"] == 1
+
+
+def test_guide_only_hides_api_function_schemas(monkeypatch):
+    _patch_loop_basics(monkeypatch)
+    sent_tools = []
+
+    async def _fake_stream(_candidates, messages, **kwargs):
+        sent_tools.append(kwargs.get("tools"))
+        yield _delta_chunk("ok")
+        yield "data: [DONE]\n\n"
+
+    monkeypatch.setattr(al, "stream_llm_with_fallback", _fake_stream, raising=False)
+    policy = build_effective_tool_policy(last_user_message="Do not use tools.")
+
+    _collect(
+        al.stream_agent_loop(
+            "https://api.openai.com/v1",
+            "gpt-test",
+            [{"role": "user", "content": "Do not use tools."}],
+            max_rounds=1,
+            relevant_tools={"bash", "web_search"},
+            tool_policy=policy,
+        )
+    )
+
+    assert sent_tools == [None]
+
+
+def test_guide_only_skips_tool_retrieval(monkeypatch):
+    _patch_loop_basics(monkeypatch)
+    sent_tools = []
+
+    async def _fake_stream(_candidates, messages, **kwargs):
+        sent_tools.append(kwargs.get("tools"))
+        yield _delta_chunk("ok")
+        yield "data: [DONE]\n\n"
+
+    def _fail_tool_index():
+        raise AssertionError("guide-only mode must not retrieve tool candidates")
+
+    monkeypatch.setattr(al, "stream_llm_with_fallback", _fake_stream, raising=False)
+    monkeypatch.setitem(
+        sys.modules,
+        "src.tool_index",
+        SimpleNamespace(get_tool_index=_fail_tool_index, ALWAYS_AVAILABLE=set()),
+    )
+    policy = build_effective_tool_policy(last_user_message="Do not use tools.")
+
+    _collect(
+        al.stream_agent_loop(
+            "https://api.openai.com/v1",
+            "gpt-test",
+            [{"role": "user", "content": "Do not use tools."}],
+            max_rounds=1,
+            relevant_tools=None,
+            tool_policy=policy,
+        )
+    )
+
+    assert sent_tools == [None]
+
+
+def test_guide_only_blocks_document_prestream(monkeypatch):
+    _patch_loop_basics(monkeypatch)
+
+    async def _fake_stream(_candidates, messages, **kwargs):
+        yield _delta_chunk("```create_document\nTitle\nmd\nBody\n```")
+        yield "data: [DONE]\n\n"
+
+    monkeypatch.setattr(al, "stream_llm_with_fallback", _fake_stream, raising=False)
+    policy = build_effective_tool_policy(last_user_message="Do not use tools.")
+    chunks = _collect(
+        al.stream_agent_loop(
+            "http://local.test/v1",
+            "local-model",
+            [{"role": "user", "content": "Do not use tools."}],
+            max_rounds=1,
+            relevant_tools={"create_document"},
+            tool_policy=policy,
+        )
+    )
+    events = _events(chunks)
+    assert not any(event.get("type") == "doc_stream_open" for event in events)
+    assert not any(event.get("type") == "tool_start" for event in events)
+    assert any(event.get("type") == "tool_output" and event.get("tool") == "create_document" for event in events)
+
+
+def test_guide_only_blocks_later_round_document_streaming(monkeypatch):
+    _patch_loop_basics(monkeypatch)
+    calls = 0
+
+    async def _fake_stream(_candidates, messages, **kwargs):
+        nonlocal calls
+        calls += 1
+        if calls == 1:
+            yield _delta_chunk("```bash\necho blocked\n```")
+        else:
+            yield _delta_chunk("```create_document\nTitle\nmd\nBody\n```")
+        yield "data: [DONE]\n\n"
+
+    monkeypatch.setattr(al, "stream_llm_with_fallback", _fake_stream, raising=False)
+    policy = build_effective_tool_policy(last_user_message="Do not use tools.")
+    chunks = _collect(
+        al.stream_agent_loop(
+            "http://local.test/v1",
+            "local-model",
+            [{"role": "user", "content": "Do not use tools."}],
+            max_rounds=2,
+            relevant_tools={"bash", "create_document"},
+            tool_policy=policy,
+        )
+    )
+    events = _events(chunks)
+    assert calls == 2
+    assert not any(event.get("type") == "doc_stream_open" for event in events)
+    assert not any(event.get("type") == "doc_stream_delta" for event in events)
+
+
+def test_guide_only_directive_dominates_workspace_prompt(monkeypatch):
+    _patch_loop_basics(monkeypatch)
+    system_prompts = []
+
+    async def _fake_stream(_candidates, messages, **kwargs):
+        system_prompts.append(messages[0]["content"])
+        yield _delta_chunk("ok")
+        yield "data: [DONE]\n\n"
+
+    monkeypatch.setattr(al, "stream_llm_with_fallback", _fake_stream, raising=False)
+    policy = build_effective_tool_policy(last_user_message="Do not use tools.")
+
+    _collect(
+        al.stream_agent_loop(
+            "http://local.test/v1",
+            "local-model",
+            [{"role": "user", "content": "Do not use tools."}],
+            max_rounds=1,
+            relevant_tools={"bash"},
+            tool_policy=policy,
+            workspace="/tmp/project",
+        )
+    )
+
+    assert system_prompts
+    assert system_prompts[0].startswith("## GUIDE-ONLY MODE")
+    assert "ACTIVE WORKSPACE" not in system_prompts[0]
+    assert "ALWAYS start by exploring" not in system_prompts[0]
+
+
+def test_guide_only_skips_intent_without_action_nudge(monkeypatch):
+    _patch_loop_basics(monkeypatch)
+
+    async def _fake_stream(_candidates, messages, **kwargs):
+        yield _delta_chunk("I will check the logs.")
+        yield "data: [DONE]\n\n"
+
+    monkeypatch.setattr(al, "stream_llm_with_fallback", _fake_stream, raising=False)
+    policy = build_effective_tool_policy(last_user_message="Do not use tools.")
+    chunks = _collect(
+        al.stream_agent_loop(
+            "http://local.test/v1",
+            "local-model",
+            [{"role": "user", "content": "Do not use tools."}],
+            max_rounds=2,
+            relevant_tools={"bash"},
+            tool_policy=policy,
+        )
+    )
+    events = _events(chunks)
+    assert not any(event.get("type") == "agent_step" for event in events)
+
+
+def test_guide_only_suppresses_active_document_context(monkeypatch):
+    _patch_loop_basics(monkeypatch)
+    prompt_payloads = []
+
+    async def _fake_stream(_candidates, messages, **kwargs):
+        prompt_payloads.append("\n\n".join(str(msg.get("content", "")) for msg in messages))
+        yield _delta_chunk("ok")
+        yield "data: [DONE]\n\n"
+
+    monkeypatch.setattr(al, "stream_llm_with_fallback", _fake_stream, raising=False)
+    policy = build_effective_tool_policy(last_user_message="Do not use tools.")
+    active_doc = SimpleNamespace(
+        id="doc-1",
+        current_content="SECRET ACTIVE DOCUMENT CONTENT",
+        title="Secret Doc",
+        language="markdown",
+    )
+
+    _collect(
+        al.stream_agent_loop(
+            "http://local.test/v1",
+            "local-model",
+            [{"role": "user", "content": "Do not use tools."}],
+            max_rounds=1,
+            relevant_tools={"edit_document"},
+            tool_policy=policy,
+            active_document=active_doc,
+        )
+    )
+
+    assert prompt_payloads
+    assert "SECRET ACTIVE DOCUMENT CONTENT" not in prompt_payloads[0]
+    assert "ACTIVE DOCUMENT" not in prompt_payloads[0]
+    assert "Relevant skills" not in prompt_payloads[0]
+
+
+def test_guide_only_skips_teacher_escalation(monkeypatch):
+    _patch_loop_basics(monkeypatch)
+
+    async def _fake_stream(_candidates, messages, **kwargs):
+        yield _delta_chunk("Could you tell me what output you see?")
+        yield "data: [DONE]\n\n"
+
+    async def _fail_teacher(*_args, **_kwargs):
+        raise AssertionError("teacher escalation must not run in guide-only mode")
+        yield ""
+
+    monkeypatch.setattr(al, "stream_llm_with_fallback", _fake_stream, raising=False)
+    monkeypatch.setitem(
+        sys.modules,
+        "src.teacher_escalation",
+        SimpleNamespace(run_teacher_inline=_fail_teacher),
+    )
+    policy = build_effective_tool_policy(last_user_message="Do not use tools.")
+
+    chunks = _collect(
+        al.stream_agent_loop(
+            "http://local.test/v1",
+            "local-model",
+            [{"role": "user", "content": "Do not use tools."}],
+            max_rounds=1,
+            relevant_tools={"bash"},
+            tool_policy=policy,
+        )
+    )
+
+    assert any("Could you tell me" in chunk for chunk in chunks)