mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-28 15:45:22 -04:00
feat(teacher): implement Tier 2 LLM self-evaluation
This commit is contained in:
@@ -366,6 +366,69 @@ def _format_trace(tool_results: List[Dict[str, Any]], agent_reply: str) -> str:
|
||||
return f"<<<UNTRUSTED_TRACE>>>\n{trace}\n<<<END_UNTRUSTED_TRACE>>>"
|
||||
|
||||
|
||||
_EVALUATE_TURN_LLM_PROMPT = """\
|
||||
You are an independent auditor evaluating a student AI agent's turn.
|
||||
Given the original request, the trace of tool calls and results, and the agent's final reply, determine whether the agent failed, gave up because it lacks the tools/capability/information, or encountered an error.
|
||||
|
||||
Respond with exactly one of these two words:
|
||||
- "failure" if the agent failed, gave up, encountered an error, or asked the user for clarification/missing tools.
|
||||
- "ok" if the agent successfully completed the task or is making correct progress.
|
||||
|
||||
ORIGINAL USER REQUEST:
|
||||
{user_request}
|
||||
|
||||
AGENT TRACE:
|
||||
{trace}
|
||||
|
||||
AGENT REPLY:
|
||||
{agent_reply}
|
||||
|
||||
EVALUATION:"""
|
||||
|
||||
|
||||
async def evaluate_turn_llm(
|
||||
user_request: str,
|
||||
tool_results: List[Dict[str, Any]],
|
||||
agent_reply: str,
|
||||
student_endpoint_url: str,
|
||||
owner: Optional[str] = None,
|
||||
) -> Tuple[str, Optional[str]]:
|
||||
"""Use a fast LLM (resolved via utility endpoint) to evaluate a turn."""
|
||||
from src.endpoint_resolver import resolve_endpoint
|
||||
from src.llm_core import llm_call_async
|
||||
|
||||
# Resolve utility model (falls back to default model, then student_endpoint_url)
|
||||
url, model, headers = resolve_endpoint(
|
||||
"utility",
|
||||
fallback_url=student_endpoint_url,
|
||||
owner=owner
|
||||
)
|
||||
if not url or not model:
|
||||
return ("ok", None)
|
||||
|
||||
trace_str = _format_trace(tool_results, agent_reply)
|
||||
prompt = _EVALUATE_TURN_LLM_PROMPT.format(
|
||||
user_request=user_request or "(no user request)",
|
||||
trace=trace_str,
|
||||
agent_reply=agent_reply or "(no agent reply)",
|
||||
)
|
||||
|
||||
try:
|
||||
response = await llm_call_async(
|
||||
url, model,
|
||||
[{"role": "user", "content": prompt}],
|
||||
headers=headers,
|
||||
timeout=20,
|
||||
)
|
||||
if response and "failure" in response.lower():
|
||||
return ("failure", f"LLM evaluation flagged failure: {response.strip()}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Tier 2 LLM self-eval failed: {e}")
|
||||
|
||||
return ("ok", None)
|
||||
|
||||
|
||||
|
||||
async def escalate_and_learn(
|
||||
user_request: str,
|
||||
tool_results: List[Dict[str, Any]],
|
||||
@@ -459,13 +522,28 @@ def maybe_escalate(
|
||||
|
||||
# Gate 3: regex eval — only escalate on detected failure.
|
||||
status, reason = evaluate_turn_regex(tool_results, agent_reply)
|
||||
if status != "failure":
|
||||
return None
|
||||
if status == "failure":
|
||||
# Fire async — don't block the user's chat.
|
||||
return asyncio.create_task(
|
||||
escalate_and_learn(user_request, tool_results, agent_reply, reason or "", owner),
|
||||
name="teacher_escalation",
|
||||
)
|
||||
|
||||
# Tier 2: LLM self-evaluation background task
|
||||
async def evaluate_and_maybe_escalate():
|
||||
llm_status, llm_reason = await evaluate_turn_llm(
|
||||
user_request=user_request,
|
||||
tool_results=tool_results,
|
||||
agent_reply=agent_reply,
|
||||
student_endpoint_url=student_endpoint_url,
|
||||
owner=owner,
|
||||
)
|
||||
if llm_status == "failure":
|
||||
await escalate_and_learn(user_request, tool_results, agent_reply, llm_reason or "", owner)
|
||||
|
||||
# Fire async — don't block the user's chat.
|
||||
return asyncio.create_task(
|
||||
escalate_and_learn(user_request, tool_results, agent_reply, reason or "", owner),
|
||||
name="teacher_escalation",
|
||||
evaluate_and_maybe_escalate(),
|
||||
name="teacher_escalation_tier2",
|
||||
)
|
||||
|
||||
|
||||
@@ -501,10 +579,6 @@ async def run_teacher_inline(
|
||||
except Exception:
|
||||
return
|
||||
|
||||
status, reason = evaluate_turn_regex(student_tool_events, student_reply)
|
||||
if status != "failure":
|
||||
return
|
||||
|
||||
# Extract original user request — last user-role message
|
||||
user_request = ""
|
||||
for m in reversed(student_messages):
|
||||
@@ -521,6 +595,19 @@ async def run_teacher_inline(
|
||||
)
|
||||
break
|
||||
|
||||
status, reason = evaluate_turn_regex(student_tool_events, student_reply)
|
||||
if status != "failure":
|
||||
# Tier 2: LLM self-evaluation check
|
||||
status, reason = await evaluate_turn_llm(
|
||||
user_request=user_request,
|
||||
tool_results=student_tool_events,
|
||||
agent_reply=student_reply,
|
||||
student_endpoint_url=student_endpoint_url,
|
||||
owner=owner,
|
||||
)
|
||||
if status != "failure":
|
||||
return
|
||||
|
||||
# Resolve teacher endpoint
|
||||
try:
|
||||
from src.ai_interaction import _resolve_model
|
||||
|
||||
Reference in New Issue
Block a user