diff --git a/src/teacher_escalation.py b/src/teacher_escalation.py index 62cb68ced..e4a75a026 100644 --- a/src/teacher_escalation.py +++ b/src/teacher_escalation.py @@ -366,6 +366,69 @@ def _format_trace(tool_results: List[Dict[str, Any]], agent_reply: str) -> str: return f"<<>>\n{trace}\n<<>>" +_EVALUATE_TURN_LLM_PROMPT = """\ +You are an independent auditor evaluating a student AI agent's turn. +Given the original request, the trace of tool calls and results, and the agent's final reply, determine whether the agent failed, gave up because it lacks the tools/capability/information, or encountered an error. + +Respond with exactly one of these two words: +- "failure" if the agent failed, gave up, encountered an error, or asked the user for clarification/missing tools. +- "ok" if the agent successfully completed the task or is making correct progress. + +ORIGINAL USER REQUEST: +{user_request} + +AGENT TRACE: +{trace} + +AGENT REPLY: +{agent_reply} + +EVALUATION:""" + + +async def evaluate_turn_llm( + user_request: str, + tool_results: List[Dict[str, Any]], + agent_reply: str, + student_endpoint_url: str, + owner: Optional[str] = None, +) -> Tuple[str, Optional[str]]: + """Use a fast LLM (resolved via utility endpoint) to evaluate a turn.""" + from src.endpoint_resolver import resolve_endpoint + from src.llm_core import llm_call_async + + # Resolve utility model (falls back to default model, then student_endpoint_url) + url, model, headers = resolve_endpoint( + "utility", + fallback_url=student_endpoint_url, + owner=owner + ) + if not url or not model: + return ("ok", None) + + trace_str = _format_trace(tool_results, agent_reply) + prompt = _EVALUATE_TURN_LLM_PROMPT.format( + user_request=user_request or "(no user request)", + trace=trace_str, + agent_reply=agent_reply or "(no agent reply)", + ) + + try: + response = await llm_call_async( + url, model, + [{"role": "user", "content": prompt}], + headers=headers, + timeout=20, + ) + if response and "failure" in response.lower(): + return ("failure", f"LLM evaluation flagged failure: {response.strip()}") + except Exception as e: + logger.warning(f"Tier 2 LLM self-eval failed: {e}") + + return ("ok", None) + + + async def escalate_and_learn( user_request: str, tool_results: List[Dict[str, Any]], @@ -459,13 +522,28 @@ def maybe_escalate( # Gate 3: regex eval — only escalate on detected failure. status, reason = evaluate_turn_regex(tool_results, agent_reply) - if status != "failure": - return None + if status == "failure": + # Fire async — don't block the user's chat. + return asyncio.create_task( + escalate_and_learn(user_request, tool_results, agent_reply, reason or "", owner), + name="teacher_escalation", + ) + + # Tier 2: LLM self-evaluation background task + async def evaluate_and_maybe_escalate(): + llm_status, llm_reason = await evaluate_turn_llm( + user_request=user_request, + tool_results=tool_results, + agent_reply=agent_reply, + student_endpoint_url=student_endpoint_url, + owner=owner, + ) + if llm_status == "failure": + await escalate_and_learn(user_request, tool_results, agent_reply, llm_reason or "", owner) - # Fire async — don't block the user's chat. return asyncio.create_task( - escalate_and_learn(user_request, tool_results, agent_reply, reason or "", owner), - name="teacher_escalation", + evaluate_and_maybe_escalate(), + name="teacher_escalation_tier2", ) @@ -501,10 +579,6 @@ async def run_teacher_inline( except Exception: return - status, reason = evaluate_turn_regex(student_tool_events, student_reply) - if status != "failure": - return - # Extract original user request — last user-role message user_request = "" for m in reversed(student_messages): @@ -521,6 +595,19 @@ async def run_teacher_inline( ) break + status, reason = evaluate_turn_regex(student_tool_events, student_reply) + if status != "failure": + # Tier 2: LLM self-evaluation check + status, reason = await evaluate_turn_llm( + user_request=user_request, + tool_results=student_tool_events, + agent_reply=student_reply, + student_endpoint_url=student_endpoint_url, + owner=owner, + ) + if status != "failure": + return + # Resolve teacher endpoint try: from src.ai_interaction import _resolve_model diff --git a/tests/test_teacher_eval_tier2.py b/tests/test_teacher_eval_tier2.py new file mode 100644 index 000000000..3e49fef19 --- /dev/null +++ b/tests/test_teacher_eval_tier2.py @@ -0,0 +1,172 @@ +import asyncio +from types import SimpleNamespace +import pytest + +import src.teacher_escalation as teacher_escalation + + +@pytest.mark.asyncio +async def test_evaluate_turn_llm_ok(monkeypatch): + seen = {} + + def fake_resolve_endpoint(prefix, fallback_url=None, owner=None): + seen["prefix"] = prefix + seen["owner"] = owner + return "http://endpoint.local/v1", "utility-model", {} + + async def fake_llm_call_async(url, model, messages, **kwargs): + seen["called"] = True + return "ok" + + monkeypatch.setattr("src.endpoint_resolver.resolve_endpoint", fake_resolve_endpoint) + monkeypatch.setattr("src.llm_core.llm_call_async", fake_llm_call_async) + + status, reason = await teacher_escalation.evaluate_turn_llm( + user_request="test request", + tool_results=[], + agent_reply="test reply", + student_endpoint_url="http://student.local/v1", + owner="alice", + ) + + assert status == "ok" + assert reason is None + assert seen["prefix"] == "utility" + assert seen["owner"] == "alice" + assert seen["called"] is True + + +@pytest.mark.asyncio +async def test_evaluate_turn_llm_failure(monkeypatch): + def fake_resolve_endpoint(prefix, fallback_url=None, owner=None): + return "http://endpoint.local/v1", "utility-model", {} + + async def fake_llm_call_async(url, model, messages, **kwargs): + return "this agent execution is a failure" + + monkeypatch.setattr("src.endpoint_resolver.resolve_endpoint", fake_resolve_endpoint) + monkeypatch.setattr("src.llm_core.llm_call_async", fake_llm_call_async) + + status, reason = await teacher_escalation.evaluate_turn_llm( + user_request="test request", + tool_results=[], + agent_reply="test reply", + student_endpoint_url="http://student.local/v1", + owner="alice", + ) + + assert status == "failure" + assert "LLM evaluation flagged failure" in reason + + +@pytest.mark.asyncio +async def test_evaluate_turn_llm_exception_handling(monkeypatch): + def fake_resolve_endpoint(prefix, fallback_url=None, owner=None): + return "http://endpoint.local/v1", "utility-model", {} + + async def fake_llm_call_async(url, model, messages, **kwargs): + raise RuntimeError("model timeout") + + monkeypatch.setattr("src.endpoint_resolver.resolve_endpoint", fake_resolve_endpoint) + monkeypatch.setattr("src.llm_core.llm_call_async", fake_llm_call_async) + + # Should degrade gracefully to "ok" + status, reason = await teacher_escalation.evaluate_turn_llm( + user_request="test request", + tool_results=[], + agent_reply="test reply", + student_endpoint_url="http://student.local/v1", + owner="alice", + ) + + assert status == "ok" + assert reason is None + + +@pytest.mark.asyncio +async def test_maybe_escalate_triggers_tier2_background_task(monkeypatch): + # Enable teacher settings + monkeypatch.setattr("src.settings.get_setting", lambda key, default=None: {"teacher_enabled": True, "teacher_model": "teacher-model"}.get(key, default)) + + # Regex check says OK + monkeypatch.setattr("src.teacher_escalation.evaluate_turn_regex", lambda *args: ("ok", None)) + + llm_eval_called = [] + async def fake_evaluate_turn_llm(*args, **kwargs): + llm_eval_called.append(True) + return "failure", "LLM flagged failure" + + monkeypatch.setattr("src.teacher_escalation.evaluate_turn_llm", fake_evaluate_turn_llm) + + escalate_called = [] + async def fake_escalate_and_learn(user_request, tool_results, agent_reply, failure_reason, owner): + escalate_called.append(failure_reason) + return "skill-slug" + + monkeypatch.setattr("src.teacher_escalation.escalate_and_learn", fake_escalate_and_learn) + + # Call maybe_escalate + task = teacher_escalation.maybe_escalate( + student_endpoint_url="http://student.local/v1", + mode="agent", + user_request="test request", + tool_results=[], + agent_reply="test reply", + owner="alice", + ) + + assert task is not None + assert task.get_name() == "teacher_escalation_tier2" + + # Await the background task execution + await task + + assert llm_eval_called == [True] + assert escalate_called == ["LLM flagged failure"] + + +@pytest.mark.asyncio +async def test_run_teacher_inline_triggers_tier2_escalation(monkeypatch): + # Settings and gates + monkeypatch.setattr("src.settings.get_setting", lambda key, default=None: {"teacher_enabled": True, "teacher_model": "teacher-model"}.get(key, default)) + monkeypatch.setattr("src.ai_interaction._resolve_model", lambda spec, owner=None: ("http://teacher.local/v1", "teacher-model", {})) + + # Regex evaluation says "ok" + monkeypatch.setattr("src.teacher_escalation.evaluate_turn_regex", lambda *args: ("ok", None)) + + # LLM evaluation flags "failure" + async def fake_evaluate_turn_llm(*args, **kwargs): + return "failure", "LLM flagged failure" + monkeypatch.setattr("src.teacher_escalation.evaluate_turn_llm", fake_evaluate_turn_llm) + + # Mock stream_agent_loop recursively called by run_teacher_inline + async def fake_stream_agent_loop(*args, **kwargs): + yield "data: {\"type\": \"tool_output\", \"tool\": \"bash\"}\n\n" + yield "data: {\"type\": \"text\", \"delta\": \"Teacher reply\"}\n\n" + yield "data: [DONE]\n\n" + monkeypatch.setattr("src.agent_loop.stream_agent_loop", fake_stream_agent_loop) + + # Mock _call_teacher returning a skill definition + async def fake_call_teacher(spec, prompt, owner=None): + return '```json\n{"action": "add", "name": "test-skill"}\n```' + monkeypatch.setattr("src.teacher_escalation._call_teacher", fake_call_teacher) + + # Mock do_manage_skills + async def fake_do_manage_skills(skill_json, owner=None): + return {"success": True} + monkeypatch.setattr("src.tool_implementations.do_manage_skills", fake_do_manage_skills) + + events = [] + async for evt in teacher_escalation.run_teacher_inline( + student_endpoint_url="http://student.local/v1", + student_messages=[{"role": "user", "content": "test request"}], + student_tool_events=[], + student_reply="student reply", + owner="alice", + ): + events.append(evt) + + # Make sure teacher takeover was announced and executed + assert any("teacher_takeover" in evt for evt in events) + assert any("tool_output" in evt for evt in events) + assert any("skill_saved" in evt for evt in events)