mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-15 17:25:26 -04:00
0aba00f4cf
Commit e6b1009 removed the workspace feature's entry point (deleted
routes/workspace_routes.py + static/js/workspace.js and dropped the
workspace-param parsing in chat_routes), but left the downstream backend
plumbing dangling: chat_routes passed a hardcoded workspace=None into
stream_agent_loop, which forwarded it to execute_tool_block, so the
workspace value was permanently None and every workspace-gated branch
was unreachable.
Remove the now-dead code (no behavior change, since workspace was always
None):
- src/tool_execution.py: drop _resolve_tool_path_in_workspace and the
workspace params/branches on execute_tool_block, _direct_fallback,
_call_mcp_tool, _do_edit_file, and _resolve_search_root; restore the
bash/python/bg cwd to _AGENT_WORKDIR.
- src/agent_loop.py: drop the workspace param on stream_agent_loop, the
dead 'ACTIVE WORKSPACE' system-prompt block, and the workspace forward.
- routes/chat_routes.py: drop the hardcoded workspace=None arg and var.
- tests: delete test_workspace_confine.py (tested the removed feature) and
the workspace assertion in test_tool_policy.py.
Full suite: 2903 passed, 1 skipped.
331 lines
11 KiB
Python
331 lines
11 KiB
Python
import asyncio
|
|
import json
|
|
import sys
|
|
from types import SimpleNamespace
|
|
|
|
import src.agent_loop as al
|
|
from src.agent_tools import ToolBlock
|
|
from src.tool_execution import execute_tool_block
|
|
from src.tool_policy import build_effective_tool_policy, detect_guide_only_turn
|
|
|
|
|
|
def _collect(gen):
|
|
async def _run():
|
|
return [c async for c in gen]
|
|
|
|
return asyncio.run(_run())
|
|
|
|
|
|
def _events(chunks):
|
|
out = []
|
|
for chunk in chunks:
|
|
if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
|
|
try:
|
|
out.append(json.loads(chunk[6:]))
|
|
except Exception:
|
|
pass
|
|
return out
|
|
|
|
|
|
def _delta_chunk(text):
|
|
return "data: " + json.dumps({"delta": text}) + "\n\n"
|
|
|
|
|
|
def _patch_loop_basics(monkeypatch):
|
|
monkeypatch.setattr(al, "get_setting", lambda key, default=None: default, raising=False)
|
|
monkeypatch.setattr(al, "get_mcp_manager", lambda: None, raising=False)
|
|
monkeypatch.setattr(al, "estimate_tokens", lambda *a, **k: 10, raising=False)
|
|
|
|
|
|
def test_detects_strong_guide_only_turns():
|
|
assert detect_guide_only_turn("GUIDE-ONLY MODE. DO NOT USE TOOLS.")
|
|
assert detect_guide_only_turn("NO-TOOLS MODE.")
|
|
assert detect_guide_only_turn("Ask me before using tools.")
|
|
assert detect_guide_only_turn("You are not allowed to:\n- use tools\n- execute commands")
|
|
|
|
|
|
def test_does_not_treat_ordinary_guidance_as_no_tools():
|
|
assert detect_guide_only_turn("Can you guide me through fixing this bug?") is None
|
|
assert detect_guide_only_turn("I have no tools installed in this project.") is None
|
|
assert detect_guide_only_turn("Write the script in the repo; I'll run it locally.") is None
|
|
assert detect_guide_only_turn("Do not run commands that write files; inspect the repo first.") is None
|
|
assert detect_guide_only_turn("Don't execute shell commands unless I approve them.") is None
|
|
|
|
|
|
def test_guide_only_policy_blocks_and_hides_tools():
|
|
policy = build_effective_tool_policy(
|
|
disabled_tools={"web_search"},
|
|
last_user_message="GUIDE-ONLY MODE. DO NOT USE TOOLS.",
|
|
)
|
|
assert policy.mode == "guide_only"
|
|
assert policy.disable_mcp is True
|
|
assert policy.block_all_tool_calls is True
|
|
for tool in ("bash", "python", "web_search", "read_file"):
|
|
assert tool in policy.disabled_tools
|
|
assert tool in policy.hidden_tools
|
|
assert policy.blocks(tool)
|
|
|
|
|
|
def test_normal_policy_preserves_existing_disabled_tools():
|
|
policy = build_effective_tool_policy(
|
|
disabled_tools={"web_search"},
|
|
last_user_message="Please check this normally.",
|
|
)
|
|
assert policy.mode == "normal"
|
|
assert policy.blocks("web_search")
|
|
assert not policy.blocks("bash")
|
|
|
|
|
|
def test_executor_policy_backstop_blocks_tools():
|
|
policy = build_effective_tool_policy(last_user_message="Do not use tools.")
|
|
desc, result = asyncio.run(
|
|
execute_tool_block(ToolBlock("bash", "echo should-not-run"), tool_policy=policy)
|
|
)
|
|
assert desc == "bash: BLOCKED"
|
|
assert result["exit_code"] == 1
|
|
assert "forbade" in result["error"]
|
|
|
|
|
|
def test_agent_loop_blocks_guide_only_fenced_tool_before_start(monkeypatch):
|
|
_patch_loop_basics(monkeypatch)
|
|
called = False
|
|
|
|
async def _fake_exec(*args, **kwargs):
|
|
nonlocal called
|
|
called = True
|
|
return ("bash", {"output": "ran", "exit_code": 0})
|
|
|
|
async def _fake_stream(_candidates, messages, **kwargs):
|
|
yield _delta_chunk("```bash\necho should-not-run\n```")
|
|
yield "data: [DONE]\n\n"
|
|
|
|
monkeypatch.setattr(al, "execute_tool_block", _fake_exec, raising=False)
|
|
monkeypatch.setattr(al, "stream_llm_with_fallback", _fake_stream, raising=False)
|
|
|
|
policy = build_effective_tool_policy(last_user_message="GUIDE-ONLY MODE. DO NOT USE TOOLS.")
|
|
chunks = _collect(
|
|
al.stream_agent_loop(
|
|
"http://local.test/v1",
|
|
"local-model",
|
|
[{"role": "user", "content": "GUIDE-ONLY MODE. DO NOT USE TOOLS."}],
|
|
max_rounds=1,
|
|
relevant_tools={"bash"},
|
|
tool_policy=policy,
|
|
)
|
|
)
|
|
events = _events(chunks)
|
|
assert called is False
|
|
assert not any(event.get("type") == "tool_start" for event in events)
|
|
blocked = [event for event in events if event.get("type") == "tool_output"]
|
|
assert blocked
|
|
assert blocked[0]["tool"] == "bash"
|
|
assert blocked[0]["exit_code"] == 1
|
|
|
|
|
|
def test_guide_only_hides_api_function_schemas(monkeypatch):
|
|
_patch_loop_basics(monkeypatch)
|
|
sent_tools = []
|
|
|
|
async def _fake_stream(_candidates, messages, **kwargs):
|
|
sent_tools.append(kwargs.get("tools"))
|
|
yield _delta_chunk("ok")
|
|
yield "data: [DONE]\n\n"
|
|
|
|
monkeypatch.setattr(al, "stream_llm_with_fallback", _fake_stream, raising=False)
|
|
policy = build_effective_tool_policy(last_user_message="Do not use tools.")
|
|
|
|
_collect(
|
|
al.stream_agent_loop(
|
|
"https://api.openai.com/v1",
|
|
"gpt-test",
|
|
[{"role": "user", "content": "Do not use tools."}],
|
|
max_rounds=1,
|
|
relevant_tools={"bash", "web_search"},
|
|
tool_policy=policy,
|
|
)
|
|
)
|
|
|
|
assert sent_tools == [None]
|
|
|
|
|
|
def test_guide_only_skips_tool_retrieval(monkeypatch):
|
|
_patch_loop_basics(monkeypatch)
|
|
sent_tools = []
|
|
|
|
async def _fake_stream(_candidates, messages, **kwargs):
|
|
sent_tools.append(kwargs.get("tools"))
|
|
yield _delta_chunk("ok")
|
|
yield "data: [DONE]\n\n"
|
|
|
|
def _fail_tool_index():
|
|
raise AssertionError("guide-only mode must not retrieve tool candidates")
|
|
|
|
monkeypatch.setattr(al, "stream_llm_with_fallback", _fake_stream, raising=False)
|
|
monkeypatch.setitem(
|
|
sys.modules,
|
|
"src.tool_index",
|
|
SimpleNamespace(get_tool_index=_fail_tool_index, ALWAYS_AVAILABLE=set()),
|
|
)
|
|
policy = build_effective_tool_policy(last_user_message="Do not use tools.")
|
|
|
|
_collect(
|
|
al.stream_agent_loop(
|
|
"https://api.openai.com/v1",
|
|
"gpt-test",
|
|
[{"role": "user", "content": "Do not use tools."}],
|
|
max_rounds=1,
|
|
relevant_tools=None,
|
|
tool_policy=policy,
|
|
)
|
|
)
|
|
|
|
assert sent_tools == [None]
|
|
|
|
|
|
def test_guide_only_blocks_document_prestream(monkeypatch):
|
|
_patch_loop_basics(monkeypatch)
|
|
|
|
async def _fake_stream(_candidates, messages, **kwargs):
|
|
yield _delta_chunk("```create_document\nTitle\nmd\nBody\n```")
|
|
yield "data: [DONE]\n\n"
|
|
|
|
monkeypatch.setattr(al, "stream_llm_with_fallback", _fake_stream, raising=False)
|
|
policy = build_effective_tool_policy(last_user_message="Do not use tools.")
|
|
chunks = _collect(
|
|
al.stream_agent_loop(
|
|
"http://local.test/v1",
|
|
"local-model",
|
|
[{"role": "user", "content": "Do not use tools."}],
|
|
max_rounds=1,
|
|
relevant_tools={"create_document"},
|
|
tool_policy=policy,
|
|
)
|
|
)
|
|
events = _events(chunks)
|
|
assert not any(event.get("type") == "doc_stream_open" for event in events)
|
|
assert not any(event.get("type") == "tool_start" for event in events)
|
|
assert any(event.get("type") == "tool_output" and event.get("tool") == "create_document" for event in events)
|
|
|
|
|
|
def test_guide_only_blocks_later_round_document_streaming(monkeypatch):
|
|
_patch_loop_basics(monkeypatch)
|
|
calls = 0
|
|
|
|
async def _fake_stream(_candidates, messages, **kwargs):
|
|
nonlocal calls
|
|
calls += 1
|
|
if calls == 1:
|
|
yield _delta_chunk("```bash\necho blocked\n```")
|
|
else:
|
|
yield _delta_chunk("```create_document\nTitle\nmd\nBody\n```")
|
|
yield "data: [DONE]\n\n"
|
|
|
|
monkeypatch.setattr(al, "stream_llm_with_fallback", _fake_stream, raising=False)
|
|
policy = build_effective_tool_policy(last_user_message="Do not use tools.")
|
|
chunks = _collect(
|
|
al.stream_agent_loop(
|
|
"http://local.test/v1",
|
|
"local-model",
|
|
[{"role": "user", "content": "Do not use tools."}],
|
|
max_rounds=2,
|
|
relevant_tools={"bash", "create_document"},
|
|
tool_policy=policy,
|
|
)
|
|
)
|
|
events = _events(chunks)
|
|
assert calls == 2
|
|
assert not any(event.get("type") == "doc_stream_open" for event in events)
|
|
assert not any(event.get("type") == "doc_stream_delta" for event in events)
|
|
|
|
|
|
def test_guide_only_skips_intent_without_action_nudge(monkeypatch):
|
|
_patch_loop_basics(monkeypatch)
|
|
|
|
async def _fake_stream(_candidates, messages, **kwargs):
|
|
yield _delta_chunk("I will check the logs.")
|
|
yield "data: [DONE]\n\n"
|
|
|
|
monkeypatch.setattr(al, "stream_llm_with_fallback", _fake_stream, raising=False)
|
|
policy = build_effective_tool_policy(last_user_message="Do not use tools.")
|
|
chunks = _collect(
|
|
al.stream_agent_loop(
|
|
"http://local.test/v1",
|
|
"local-model",
|
|
[{"role": "user", "content": "Do not use tools."}],
|
|
max_rounds=2,
|
|
relevant_tools={"bash"},
|
|
tool_policy=policy,
|
|
)
|
|
)
|
|
events = _events(chunks)
|
|
assert not any(event.get("type") == "agent_step" for event in events)
|
|
|
|
|
|
def test_guide_only_suppresses_active_document_context(monkeypatch):
|
|
_patch_loop_basics(monkeypatch)
|
|
prompt_payloads = []
|
|
|
|
async def _fake_stream(_candidates, messages, **kwargs):
|
|
prompt_payloads.append("\n\n".join(str(msg.get("content", "")) for msg in messages))
|
|
yield _delta_chunk("ok")
|
|
yield "data: [DONE]\n\n"
|
|
|
|
monkeypatch.setattr(al, "stream_llm_with_fallback", _fake_stream, raising=False)
|
|
policy = build_effective_tool_policy(last_user_message="Do not use tools.")
|
|
active_doc = SimpleNamespace(
|
|
id="doc-1",
|
|
current_content="SECRET ACTIVE DOCUMENT CONTENT",
|
|
title="Secret Doc",
|
|
language="markdown",
|
|
)
|
|
|
|
_collect(
|
|
al.stream_agent_loop(
|
|
"http://local.test/v1",
|
|
"local-model",
|
|
[{"role": "user", "content": "Do not use tools."}],
|
|
max_rounds=1,
|
|
relevant_tools={"edit_document"},
|
|
tool_policy=policy,
|
|
active_document=active_doc,
|
|
)
|
|
)
|
|
|
|
assert prompt_payloads
|
|
assert "SECRET ACTIVE DOCUMENT CONTENT" not in prompt_payloads[0]
|
|
assert "ACTIVE DOCUMENT" not in prompt_payloads[0]
|
|
assert "Relevant skills" not in prompt_payloads[0]
|
|
|
|
|
|
def test_guide_only_skips_teacher_escalation(monkeypatch):
|
|
_patch_loop_basics(monkeypatch)
|
|
|
|
async def _fake_stream(_candidates, messages, **kwargs):
|
|
yield _delta_chunk("Could you tell me what output you see?")
|
|
yield "data: [DONE]\n\n"
|
|
|
|
async def _fail_teacher(*_args, **_kwargs):
|
|
raise AssertionError("teacher escalation must not run in guide-only mode")
|
|
yield ""
|
|
|
|
monkeypatch.setattr(al, "stream_llm_with_fallback", _fake_stream, raising=False)
|
|
monkeypatch.setitem(
|
|
sys.modules,
|
|
"src.teacher_escalation",
|
|
SimpleNamespace(run_teacher_inline=_fail_teacher),
|
|
)
|
|
policy = build_effective_tool_policy(last_user_message="Do not use tools.")
|
|
|
|
chunks = _collect(
|
|
al.stream_agent_loop(
|
|
"http://local.test/v1",
|
|
"local-model",
|
|
[{"role": "user", "content": "Do not use tools."}],
|
|
max_rounds=1,
|
|
relevant_tools={"bash"},
|
|
tool_policy=policy,
|
|
)
|
|
)
|
|
|
|
assert any("Could you tell me" in chunk for chunk in chunks)
|