diff --git a/src/settings.py b/src/settings.py index 2c6ffcfd3..f0db0e69e 100644 --- a/src/settings.py +++ b/src/settings.py @@ -152,6 +152,7 @@ DEFAULT_SETTINGS = { "utility_model_fallbacks": [], "teacher_model": "", "teacher_enabled": False, + "teacher_tier2_enabled": False, # Skills: minimum self-reported confidence for an auto-written (LLM-authored) # DRAFT skill to be injected into the agent prompt. Published skills always # qualify. Keeps low-confidence auto-skills out of context until they're diff --git a/src/teacher_escalation.py b/src/teacher_escalation.py index e4a75a026..bd1930325 100644 --- a/src/teacher_escalation.py +++ b/src/teacher_escalation.py @@ -420,8 +420,10 @@ async def evaluate_turn_llm( headers=headers, timeout=20, ) - if response and "failure" in response.lower(): - return ("failure", f"LLM evaluation flagged failure: {response.strip()}") + if response: + cleaned_response = response.strip().strip("'\"").lower() + if cleaned_response == "failure": + return ("failure", f"LLM evaluation flagged failure: {response.strip()}") except Exception as e: logger.warning(f"Tier 2 LLM self-eval failed: {e}") @@ -529,6 +531,10 @@ def maybe_escalate( name="teacher_escalation", ) + # Gate 4: Tier 2 LLM self-evaluation requires teacher_tier2_enabled + if not get_setting("teacher_tier2_enabled", False): + return None + # Tier 2: LLM self-evaluation background task async def evaluate_and_maybe_escalate(): llm_status, llm_reason = await evaluate_turn_llm( @@ -597,7 +603,9 @@ async def run_teacher_inline( status, reason = evaluate_turn_regex(student_tool_events, student_reply) if status != "failure": - # Tier 2: LLM self-evaluation check + # Tier 2: LLM self-evaluation check requires teacher_tier2_enabled + if not get_setting("teacher_tier2_enabled", False): + return status, reason = await evaluate_turn_llm( user_request=user_request, tool_results=student_tool_events, diff --git a/tests/test_teacher_eval_tier2.py b/tests/test_teacher_eval_tier2.py index 3e49fef19..c3eb2a643 100644 --- a/tests/test_teacher_eval_tier2.py +++ b/tests/test_teacher_eval_tier2.py @@ -42,7 +42,7 @@ async def test_evaluate_turn_llm_failure(monkeypatch): return "http://endpoint.local/v1", "utility-model", {} async def fake_llm_call_async(url, model, messages, **kwargs): - return "this agent execution is a failure" + return " \"Failure\" " monkeypatch.setattr("src.endpoint_resolver.resolve_endpoint", fake_resolve_endpoint) monkeypatch.setattr("src.llm_core.llm_call_async", fake_llm_call_async) @@ -59,6 +59,29 @@ async def test_evaluate_turn_llm_failure(monkeypatch): assert "LLM evaluation flagged failure" in reason +@pytest.mark.asyncio +async def test_evaluate_turn_llm_contains_failure_but_not_exact_match(monkeypatch): + def fake_resolve_endpoint(prefix, fallback_url=None, owner=None): + return "http://endpoint.local/v1", "utility-model", {} + + async def fake_llm_call_async(url, model, messages, **kwargs): + return "this agent execution is not a failure" + + monkeypatch.setattr("src.endpoint_resolver.resolve_endpoint", fake_resolve_endpoint) + monkeypatch.setattr("src.llm_core.llm_call_async", fake_llm_call_async) + + status, reason = await teacher_escalation.evaluate_turn_llm( + user_request="test request", + tool_results=[], + agent_reply="test reply", + student_endpoint_url="http://student.local/v1", + owner="alice", + ) + + assert status == "ok" + assert reason is None + + @pytest.mark.asyncio async def test_evaluate_turn_llm_exception_handling(monkeypatch): def fake_resolve_endpoint(prefix, fallback_url=None, owner=None): @@ -86,7 +109,7 @@ async def test_evaluate_turn_llm_exception_handling(monkeypatch): @pytest.mark.asyncio async def test_maybe_escalate_triggers_tier2_background_task(monkeypatch): # Enable teacher settings - monkeypatch.setattr("src.settings.get_setting", lambda key, default=None: {"teacher_enabled": True, "teacher_model": "teacher-model"}.get(key, default)) + monkeypatch.setattr("src.settings.get_setting", lambda key, default=None: {"teacher_enabled": True, "teacher_model": "teacher-model", "teacher_tier2_enabled": True}.get(key, default)) # Regex check says OK monkeypatch.setattr("src.teacher_escalation.evaluate_turn_regex", lambda *args: ("ok", None)) @@ -125,10 +148,32 @@ async def test_maybe_escalate_triggers_tier2_background_task(monkeypatch): assert escalate_called == ["LLM flagged failure"] +@pytest.mark.asyncio +async def test_maybe_escalate_tier2_disabled_by_default(monkeypatch): + # Enable teacher settings, but keep tier2 disabled + monkeypatch.setattr("src.settings.get_setting", lambda key, default=None: {"teacher_enabled": True, "teacher_model": "teacher-model", "teacher_tier2_enabled": False}.get(key, default)) + + # Regex check says OK + monkeypatch.setattr("src.teacher_escalation.evaluate_turn_regex", lambda *args: ("ok", None)) + + # Call maybe_escalate + task = teacher_escalation.maybe_escalate( + student_endpoint_url="http://student.local/v1", + mode="agent", + user_request="test request", + tool_results=[], + agent_reply="test reply", + owner="alice", + ) + + # Should not start any background task since Tier 2 is disabled + assert task is None + + @pytest.mark.asyncio async def test_run_teacher_inline_triggers_tier2_escalation(monkeypatch): # Settings and gates - monkeypatch.setattr("src.settings.get_setting", lambda key, default=None: {"teacher_enabled": True, "teacher_model": "teacher-model"}.get(key, default)) + monkeypatch.setattr("src.settings.get_setting", lambda key, default=None: {"teacher_enabled": True, "teacher_model": "teacher-model", "teacher_tier2_enabled": True}.get(key, default)) monkeypatch.setattr("src.ai_interaction._resolve_model", lambda spec, owner=None: ("http://teacher.local/v1", "teacher-model", {})) # Regex evaluation says "ok" @@ -170,3 +215,25 @@ async def test_run_teacher_inline_triggers_tier2_escalation(monkeypatch): assert any("teacher_takeover" in evt for evt in events) assert any("tool_output" in evt for evt in events) assert any("skill_saved" in evt for evt in events) + + +@pytest.mark.asyncio +async def test_run_teacher_inline_tier2_disabled_by_default(monkeypatch): + # Settings and gates (Tier 2 disabled) + monkeypatch.setattr("src.settings.get_setting", lambda key, default=None: {"teacher_enabled": True, "teacher_model": "teacher-model", "teacher_tier2_enabled": False}.get(key, default)) + + # Regex evaluation says "ok" + monkeypatch.setattr("src.teacher_escalation.evaluate_turn_regex", lambda *args: ("ok", None)) + + events = [] + async for evt in teacher_escalation.run_teacher_inline( + student_endpoint_url="http://student.local/v1", + student_messages=[{"role": "user", "content": "test request"}], + student_tool_events=[], + student_reply="student reply", + owner="alice", + ): + events.append(evt) + + # Should exit early without any events (no takeover) + assert len(events) == 0