mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-28 15:45:22 -04:00
Merge pull request #4280 from GeekLuffy/feat/llm-self-eval
feat(teacher): implement Tier 2 LLM self-evaluation
This commit is contained in:
@@ -152,6 +152,7 @@ DEFAULT_SETTINGS = {
|
|||||||
"utility_model_fallbacks": [],
|
"utility_model_fallbacks": [],
|
||||||
"teacher_model": "",
|
"teacher_model": "",
|
||||||
"teacher_enabled": False,
|
"teacher_enabled": False,
|
||||||
|
"teacher_tier2_enabled": False,
|
||||||
# Skills: minimum self-reported confidence for an auto-written (LLM-authored)
|
# Skills: minimum self-reported confidence for an auto-written (LLM-authored)
|
||||||
# DRAFT skill to be injected into the agent prompt. Published skills always
|
# DRAFT skill to be injected into the agent prompt. Published skills always
|
||||||
# qualify. Keeps low-confidence auto-skills out of context until they're
|
# qualify. Keeps low-confidence auto-skills out of context until they're
|
||||||
|
|||||||
+103
-8
@@ -366,6 +366,71 @@ def _format_trace(tool_results: List[Dict[str, Any]], agent_reply: str) -> str:
|
|||||||
return f"<<<UNTRUSTED_TRACE>>>\n{trace}\n<<<END_UNTRUSTED_TRACE>>>"
|
return f"<<<UNTRUSTED_TRACE>>>\n{trace}\n<<<END_UNTRUSTED_TRACE>>>"
|
||||||
|
|
||||||
|
|
||||||
|
_EVALUATE_TURN_LLM_PROMPT = """\
|
||||||
|
You are an independent auditor evaluating a student AI agent's turn.
|
||||||
|
Given the original request, the trace of tool calls and results, and the agent's final reply, determine whether the agent failed, gave up because it lacks the tools/capability/information, or encountered an error.
|
||||||
|
|
||||||
|
Respond with exactly one of these two words:
|
||||||
|
- "failure" if the agent failed, gave up, encountered an error, or asked the user for clarification/missing tools.
|
||||||
|
- "ok" if the agent successfully completed the task or is making correct progress.
|
||||||
|
|
||||||
|
ORIGINAL USER REQUEST:
|
||||||
|
{user_request}
|
||||||
|
|
||||||
|
AGENT TRACE:
|
||||||
|
{trace}
|
||||||
|
|
||||||
|
AGENT REPLY:
|
||||||
|
{agent_reply}
|
||||||
|
|
||||||
|
EVALUATION:"""
|
||||||
|
|
||||||
|
|
||||||
|
async def evaluate_turn_llm(
|
||||||
|
user_request: str,
|
||||||
|
tool_results: List[Dict[str, Any]],
|
||||||
|
agent_reply: str,
|
||||||
|
student_endpoint_url: str,
|
||||||
|
owner: Optional[str] = None,
|
||||||
|
) -> Tuple[str, Optional[str]]:
|
||||||
|
"""Use a fast LLM (resolved via utility endpoint) to evaluate a turn."""
|
||||||
|
from src.endpoint_resolver import resolve_endpoint
|
||||||
|
from src.llm_core import llm_call_async
|
||||||
|
|
||||||
|
# Resolve utility model (falls back to default model, then student_endpoint_url)
|
||||||
|
url, model, headers = resolve_endpoint(
|
||||||
|
"utility",
|
||||||
|
fallback_url=student_endpoint_url,
|
||||||
|
owner=owner
|
||||||
|
)
|
||||||
|
if not url or not model:
|
||||||
|
return ("ok", None)
|
||||||
|
|
||||||
|
trace_str = _format_trace(tool_results, agent_reply)
|
||||||
|
prompt = _EVALUATE_TURN_LLM_PROMPT.format(
|
||||||
|
user_request=user_request or "(no user request)",
|
||||||
|
trace=trace_str,
|
||||||
|
agent_reply=agent_reply or "(no agent reply)",
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = await llm_call_async(
|
||||||
|
url, model,
|
||||||
|
[{"role": "user", "content": prompt}],
|
||||||
|
headers=headers,
|
||||||
|
timeout=20,
|
||||||
|
)
|
||||||
|
if response:
|
||||||
|
cleaned_response = response.strip().strip("'\"").lower()
|
||||||
|
if cleaned_response == "failure":
|
||||||
|
return ("failure", f"LLM evaluation flagged failure: {response.strip()}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Tier 2 LLM self-eval failed: {e}")
|
||||||
|
|
||||||
|
return ("ok", None)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
async def escalate_and_learn(
|
async def escalate_and_learn(
|
||||||
user_request: str,
|
user_request: str,
|
||||||
tool_results: List[Dict[str, Any]],
|
tool_results: List[Dict[str, Any]],
|
||||||
@@ -459,13 +524,32 @@ def maybe_escalate(
|
|||||||
|
|
||||||
# Gate 3: regex eval — only escalate on detected failure.
|
# Gate 3: regex eval — only escalate on detected failure.
|
||||||
status, reason = evaluate_turn_regex(tool_results, agent_reply)
|
status, reason = evaluate_turn_regex(tool_results, agent_reply)
|
||||||
if status != "failure":
|
if status == "failure":
|
||||||
|
# Fire async — don't block the user's chat.
|
||||||
|
return asyncio.create_task(
|
||||||
|
escalate_and_learn(user_request, tool_results, agent_reply, reason or "", owner),
|
||||||
|
name="teacher_escalation",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Gate 4: Tier 2 LLM self-evaluation requires teacher_tier2_enabled
|
||||||
|
if not get_setting("teacher_tier2_enabled", False):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Fire async — don't block the user's chat.
|
# Tier 2: LLM self-evaluation background task
|
||||||
|
async def evaluate_and_maybe_escalate():
|
||||||
|
llm_status, llm_reason = await evaluate_turn_llm(
|
||||||
|
user_request=user_request,
|
||||||
|
tool_results=tool_results,
|
||||||
|
agent_reply=agent_reply,
|
||||||
|
student_endpoint_url=student_endpoint_url,
|
||||||
|
owner=owner,
|
||||||
|
)
|
||||||
|
if llm_status == "failure":
|
||||||
|
await escalate_and_learn(user_request, tool_results, agent_reply, llm_reason or "", owner)
|
||||||
|
|
||||||
return asyncio.create_task(
|
return asyncio.create_task(
|
||||||
escalate_and_learn(user_request, tool_results, agent_reply, reason or "", owner),
|
evaluate_and_maybe_escalate(),
|
||||||
name="teacher_escalation",
|
name="teacher_escalation_tier2",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -501,10 +585,6 @@ async def run_teacher_inline(
|
|||||||
except Exception:
|
except Exception:
|
||||||
return
|
return
|
||||||
|
|
||||||
status, reason = evaluate_turn_regex(student_tool_events, student_reply)
|
|
||||||
if status != "failure":
|
|
||||||
return
|
|
||||||
|
|
||||||
# Extract original user request — last user-role message
|
# Extract original user request — last user-role message
|
||||||
user_request = ""
|
user_request = ""
|
||||||
for m in reversed(student_messages):
|
for m in reversed(student_messages):
|
||||||
@@ -521,6 +601,21 @@ async def run_teacher_inline(
|
|||||||
)
|
)
|
||||||
break
|
break
|
||||||
|
|
||||||
|
status, reason = evaluate_turn_regex(student_tool_events, student_reply)
|
||||||
|
if status != "failure":
|
||||||
|
# Tier 2: LLM self-evaluation check requires teacher_tier2_enabled
|
||||||
|
if not get_setting("teacher_tier2_enabled", False):
|
||||||
|
return
|
||||||
|
status, reason = await evaluate_turn_llm(
|
||||||
|
user_request=user_request,
|
||||||
|
tool_results=student_tool_events,
|
||||||
|
agent_reply=student_reply,
|
||||||
|
student_endpoint_url=student_endpoint_url,
|
||||||
|
owner=owner,
|
||||||
|
)
|
||||||
|
if status != "failure":
|
||||||
|
return
|
||||||
|
|
||||||
# Resolve teacher endpoint
|
# Resolve teacher endpoint
|
||||||
try:
|
try:
|
||||||
from src.ai_interaction import _resolve_model
|
from src.ai_interaction import _resolve_model
|
||||||
|
|||||||
@@ -0,0 +1,239 @@
|
|||||||
|
import asyncio
|
||||||
|
from types import SimpleNamespace
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
import src.teacher_escalation as teacher_escalation
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_evaluate_turn_llm_ok(monkeypatch):
|
||||||
|
seen = {}
|
||||||
|
|
||||||
|
def fake_resolve_endpoint(prefix, fallback_url=None, owner=None):
|
||||||
|
seen["prefix"] = prefix
|
||||||
|
seen["owner"] = owner
|
||||||
|
return "http://endpoint.local/v1", "utility-model", {}
|
||||||
|
|
||||||
|
async def fake_llm_call_async(url, model, messages, **kwargs):
|
||||||
|
seen["called"] = True
|
||||||
|
return "ok"
|
||||||
|
|
||||||
|
monkeypatch.setattr("src.endpoint_resolver.resolve_endpoint", fake_resolve_endpoint)
|
||||||
|
monkeypatch.setattr("src.llm_core.llm_call_async", fake_llm_call_async)
|
||||||
|
|
||||||
|
status, reason = await teacher_escalation.evaluate_turn_llm(
|
||||||
|
user_request="test request",
|
||||||
|
tool_results=[],
|
||||||
|
agent_reply="test reply",
|
||||||
|
student_endpoint_url="http://student.local/v1",
|
||||||
|
owner="alice",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert status == "ok"
|
||||||
|
assert reason is None
|
||||||
|
assert seen["prefix"] == "utility"
|
||||||
|
assert seen["owner"] == "alice"
|
||||||
|
assert seen["called"] is True
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_evaluate_turn_llm_failure(monkeypatch):
|
||||||
|
def fake_resolve_endpoint(prefix, fallback_url=None, owner=None):
|
||||||
|
return "http://endpoint.local/v1", "utility-model", {}
|
||||||
|
|
||||||
|
async def fake_llm_call_async(url, model, messages, **kwargs):
|
||||||
|
return " \"Failure\" "
|
||||||
|
|
||||||
|
monkeypatch.setattr("src.endpoint_resolver.resolve_endpoint", fake_resolve_endpoint)
|
||||||
|
monkeypatch.setattr("src.llm_core.llm_call_async", fake_llm_call_async)
|
||||||
|
|
||||||
|
status, reason = await teacher_escalation.evaluate_turn_llm(
|
||||||
|
user_request="test request",
|
||||||
|
tool_results=[],
|
||||||
|
agent_reply="test reply",
|
||||||
|
student_endpoint_url="http://student.local/v1",
|
||||||
|
owner="alice",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert status == "failure"
|
||||||
|
assert "LLM evaluation flagged failure" in reason
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_evaluate_turn_llm_contains_failure_but_not_exact_match(monkeypatch):
|
||||||
|
def fake_resolve_endpoint(prefix, fallback_url=None, owner=None):
|
||||||
|
return "http://endpoint.local/v1", "utility-model", {}
|
||||||
|
|
||||||
|
async def fake_llm_call_async(url, model, messages, **kwargs):
|
||||||
|
return "this agent execution is not a failure"
|
||||||
|
|
||||||
|
monkeypatch.setattr("src.endpoint_resolver.resolve_endpoint", fake_resolve_endpoint)
|
||||||
|
monkeypatch.setattr("src.llm_core.llm_call_async", fake_llm_call_async)
|
||||||
|
|
||||||
|
status, reason = await teacher_escalation.evaluate_turn_llm(
|
||||||
|
user_request="test request",
|
||||||
|
tool_results=[],
|
||||||
|
agent_reply="test reply",
|
||||||
|
student_endpoint_url="http://student.local/v1",
|
||||||
|
owner="alice",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert status == "ok"
|
||||||
|
assert reason is None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_evaluate_turn_llm_exception_handling(monkeypatch):
|
||||||
|
def fake_resolve_endpoint(prefix, fallback_url=None, owner=None):
|
||||||
|
return "http://endpoint.local/v1", "utility-model", {}
|
||||||
|
|
||||||
|
async def fake_llm_call_async(url, model, messages, **kwargs):
|
||||||
|
raise RuntimeError("model timeout")
|
||||||
|
|
||||||
|
monkeypatch.setattr("src.endpoint_resolver.resolve_endpoint", fake_resolve_endpoint)
|
||||||
|
monkeypatch.setattr("src.llm_core.llm_call_async", fake_llm_call_async)
|
||||||
|
|
||||||
|
# Should degrade gracefully to "ok"
|
||||||
|
status, reason = await teacher_escalation.evaluate_turn_llm(
|
||||||
|
user_request="test request",
|
||||||
|
tool_results=[],
|
||||||
|
agent_reply="test reply",
|
||||||
|
student_endpoint_url="http://student.local/v1",
|
||||||
|
owner="alice",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert status == "ok"
|
||||||
|
assert reason is None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_maybe_escalate_triggers_tier2_background_task(monkeypatch):
|
||||||
|
# Enable teacher settings
|
||||||
|
monkeypatch.setattr("src.settings.get_setting", lambda key, default=None: {"teacher_enabled": True, "teacher_model": "teacher-model", "teacher_tier2_enabled": True}.get(key, default))
|
||||||
|
|
||||||
|
# Regex check says OK
|
||||||
|
monkeypatch.setattr("src.teacher_escalation.evaluate_turn_regex", lambda *args: ("ok", None))
|
||||||
|
|
||||||
|
llm_eval_called = []
|
||||||
|
async def fake_evaluate_turn_llm(*args, **kwargs):
|
||||||
|
llm_eval_called.append(True)
|
||||||
|
return "failure", "LLM flagged failure"
|
||||||
|
|
||||||
|
monkeypatch.setattr("src.teacher_escalation.evaluate_turn_llm", fake_evaluate_turn_llm)
|
||||||
|
|
||||||
|
escalate_called = []
|
||||||
|
async def fake_escalate_and_learn(user_request, tool_results, agent_reply, failure_reason, owner):
|
||||||
|
escalate_called.append(failure_reason)
|
||||||
|
return "skill-slug"
|
||||||
|
|
||||||
|
monkeypatch.setattr("src.teacher_escalation.escalate_and_learn", fake_escalate_and_learn)
|
||||||
|
|
||||||
|
# Call maybe_escalate
|
||||||
|
task = teacher_escalation.maybe_escalate(
|
||||||
|
student_endpoint_url="http://student.local/v1",
|
||||||
|
mode="agent",
|
||||||
|
user_request="test request",
|
||||||
|
tool_results=[],
|
||||||
|
agent_reply="test reply",
|
||||||
|
owner="alice",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert task is not None
|
||||||
|
assert task.get_name() == "teacher_escalation_tier2"
|
||||||
|
|
||||||
|
# Await the background task execution
|
||||||
|
await task
|
||||||
|
|
||||||
|
assert llm_eval_called == [True]
|
||||||
|
assert escalate_called == ["LLM flagged failure"]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_maybe_escalate_tier2_disabled_by_default(monkeypatch):
|
||||||
|
# Enable teacher settings, but keep tier2 disabled
|
||||||
|
monkeypatch.setattr("src.settings.get_setting", lambda key, default=None: {"teacher_enabled": True, "teacher_model": "teacher-model", "teacher_tier2_enabled": False}.get(key, default))
|
||||||
|
|
||||||
|
# Regex check says OK
|
||||||
|
monkeypatch.setattr("src.teacher_escalation.evaluate_turn_regex", lambda *args: ("ok", None))
|
||||||
|
|
||||||
|
# Call maybe_escalate
|
||||||
|
task = teacher_escalation.maybe_escalate(
|
||||||
|
student_endpoint_url="http://student.local/v1",
|
||||||
|
mode="agent",
|
||||||
|
user_request="test request",
|
||||||
|
tool_results=[],
|
||||||
|
agent_reply="test reply",
|
||||||
|
owner="alice",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Should not start any background task since Tier 2 is disabled
|
||||||
|
assert task is None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_run_teacher_inline_triggers_tier2_escalation(monkeypatch):
|
||||||
|
# Settings and gates
|
||||||
|
monkeypatch.setattr("src.settings.get_setting", lambda key, default=None: {"teacher_enabled": True, "teacher_model": "teacher-model", "teacher_tier2_enabled": True}.get(key, default))
|
||||||
|
monkeypatch.setattr("src.ai_interaction._resolve_model", lambda spec, owner=None: ("http://teacher.local/v1", "teacher-model", {}))
|
||||||
|
|
||||||
|
# Regex evaluation says "ok"
|
||||||
|
monkeypatch.setattr("src.teacher_escalation.evaluate_turn_regex", lambda *args: ("ok", None))
|
||||||
|
|
||||||
|
# LLM evaluation flags "failure"
|
||||||
|
async def fake_evaluate_turn_llm(*args, **kwargs):
|
||||||
|
return "failure", "LLM flagged failure"
|
||||||
|
monkeypatch.setattr("src.teacher_escalation.evaluate_turn_llm", fake_evaluate_turn_llm)
|
||||||
|
|
||||||
|
# Mock stream_agent_loop recursively called by run_teacher_inline
|
||||||
|
async def fake_stream_agent_loop(*args, **kwargs):
|
||||||
|
yield "data: {\"type\": \"tool_output\", \"tool\": \"bash\"}\n\n"
|
||||||
|
yield "data: {\"type\": \"text\", \"delta\": \"Teacher reply\"}\n\n"
|
||||||
|
yield "data: [DONE]\n\n"
|
||||||
|
monkeypatch.setattr("src.agent_loop.stream_agent_loop", fake_stream_agent_loop)
|
||||||
|
|
||||||
|
# Mock _call_teacher returning a skill definition
|
||||||
|
async def fake_call_teacher(spec, prompt, owner=None):
|
||||||
|
return '```json\n{"action": "add", "name": "test-skill"}\n```'
|
||||||
|
monkeypatch.setattr("src.teacher_escalation._call_teacher", fake_call_teacher)
|
||||||
|
|
||||||
|
# Mock do_manage_skills
|
||||||
|
async def fake_do_manage_skills(skill_json, owner=None):
|
||||||
|
return {"success": True}
|
||||||
|
monkeypatch.setattr("src.tool_implementations.do_manage_skills", fake_do_manage_skills)
|
||||||
|
|
||||||
|
events = []
|
||||||
|
async for evt in teacher_escalation.run_teacher_inline(
|
||||||
|
student_endpoint_url="http://student.local/v1",
|
||||||
|
student_messages=[{"role": "user", "content": "test request"}],
|
||||||
|
student_tool_events=[],
|
||||||
|
student_reply="student reply",
|
||||||
|
owner="alice",
|
||||||
|
):
|
||||||
|
events.append(evt)
|
||||||
|
|
||||||
|
# Make sure teacher takeover was announced and executed
|
||||||
|
assert any("teacher_takeover" in evt for evt in events)
|
||||||
|
assert any("tool_output" in evt for evt in events)
|
||||||
|
assert any("skill_saved" in evt for evt in events)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_run_teacher_inline_tier2_disabled_by_default(monkeypatch):
|
||||||
|
# Settings and gates (Tier 2 disabled)
|
||||||
|
monkeypatch.setattr("src.settings.get_setting", lambda key, default=None: {"teacher_enabled": True, "teacher_model": "teacher-model", "teacher_tier2_enabled": False}.get(key, default))
|
||||||
|
|
||||||
|
# Regex evaluation says "ok"
|
||||||
|
monkeypatch.setattr("src.teacher_escalation.evaluate_turn_regex", lambda *args: ("ok", None))
|
||||||
|
|
||||||
|
events = []
|
||||||
|
async for evt in teacher_escalation.run_teacher_inline(
|
||||||
|
student_endpoint_url="http://student.local/v1",
|
||||||
|
student_messages=[{"role": "user", "content": "test request"}],
|
||||||
|
student_tool_events=[],
|
||||||
|
student_reply="student reply",
|
||||||
|
owner="alice",
|
||||||
|
):
|
||||||
|
events.append(evt)
|
||||||
|
|
||||||
|
# Should exit early without any events (no takeover)
|
||||||
|
assert len(events) == 0
|
||||||
Reference in New Issue
Block a user