Odysseus v1.0

2026-06-16 17:55:26 -04:00 · 2026-05-31 23:58:26 +09:00
commit e5c99a5eee
421 changed files with 271349 additions and 0 deletions
@@ -0,0 +1,644 @@
+"""Teacher-escalation loop for self-hosted models in agent mode.
+
+When the student (self-hosted) model finishes a turn, evaluate whether
+it succeeded. If it didn't, escalate to a SOTA teacher endpoint, which
+both produces a corrective reply AND writes a SKILL.md procedure so
+the student can do it itself next time.
+
+Trigger conditions (ALL must hold):
+  1. Agent mode (not chat mode).
+  2. The student's endpoint is self-hosted (not a known SOTA cloud API).
+  3. `teacher_model` setting is configured.
+
+Detection tiers:
+  Tier 1: regex on tool outputs + agent reply. Catches the "Unknown
+          action 'switch'" / "I don't have a tool" / "Could you tell
+          me which one?" type failures. Free, instant.
+  Tier 2 (TODO): LLM self-eval for ambiguous cases. Not in first cut.
+
+If Tier 1 fires FAILURE, call the teacher with the full failed
+context. Skill is only saved if the teacher's response itself passes
+the same regex eval — no point persisting a procedure the teacher
+itself wasn't confident about.
+"""
+from __future__ import annotations
+
+import asyncio
+import logging
+import re
+from typing import Any, Dict, List, Optional, Tuple
+from urllib.parse import urlparse
+
+logger = logging.getLogger(__name__)
+
+
+# Hosts considered SOTA / paid APIs — if the student's endpoint URL
+# hits one of these, the loop is OFF (the user is already paying for
+# a top-tier model; no need to escalate).
+_SOTA_HOSTS = frozenset({
+    "api.openai.com", "api.anthropic.com",
+    "api.deepseek.com", "deepseek.com",
+    "api.mistral.ai", "api.cohere.com",
+    "api.together.xyz", "api.fireworks.ai",
+    "api.perplexity.ai", "api.x.ai",
+    "generativelanguage.googleapis.com", "api.groq.com",
+})
+
+
+def is_self_hosted(endpoint_url: str) -> bool:
+    """True if the endpoint is NOT a known SOTA cloud API.
+
+    Conservative — anything we don't positively recognise as SOTA is
+    treated as self-hosted. Better to over-escalate than to silently
+    add latency to a paid-API user's chat.
+    """
+    if not endpoint_url:
+        return True
+    try:
+        host = (urlparse(endpoint_url).hostname or "").lower()
+    except Exception:
+        return True
+    if not host:
+        return True
+    return host not in _SOTA_HOSTS
+
+
+# ── Tier 1: regex-based failure detection ──────────────────────────
+
+# Patterns that show up in tool RESULTS when the call failed.
+_TOOL_ERROR_PATTERNS = [
+    re.compile(r"^Unknown action\b", re.IGNORECASE),
+    re.compile(r"^Failed to\b", re.IGNORECASE),
+    re.compile(r"\bnot found\b", re.IGNORECASE),
+    re.compile(r"^Invalid\b", re.IGNORECASE),
+    re.compile(r"\berror:\s", re.IGNORECASE),
+]
+
+# Patterns that show up in the AGENT'S REPLY when it gave up or
+# couldn't pick a path. Different list — these aren't tool errors,
+# they're the model verbally admitting it doesn't know.
+_REPLY_GIVE_UP_PATTERNS = [
+    re.compile(r"\bI don't have (?:a )?tool\b", re.IGNORECASE),
+    re.compile(r"\bI can(?:'t|not) (?:do|find|figure)\b", re.IGNORECASE),
+    re.compile(r"\bI'?m not sure (?:which|how|what)\b", re.IGNORECASE),
+    re.compile(r"\b[Cc]ould you (?:tell me|specify|clarify)\b"),
+    re.compile(r"\bunable to (?:open|find|switch|complete)\b", re.IGNORECASE),
+    re.compile(r"\bdoesn'?t (?:exist|appear to be|seem to)\b", re.IGNORECASE),
+]
+
+
+def evaluate_turn_regex(
+    tool_results: List[Dict[str, Any]],
+    agent_reply: str,
+) -> Tuple[str, Optional[str]]:
+    """Cheap regex check on a finished turn.
+
+    Returns ("failure", reason) on a detected problem, ("ok", None)
+    otherwise. The caller decides whether to short-circuit or fall
+    back to an LLM self-eval.
+    """
+    # Any tool returned an explicit error field?
+    for r in tool_results or []:
+        if not isinstance(r, dict):
+            continue
+        if r.get("error"):
+            return ("failure", f"tool returned error: {r.get('error')!r}")
+        text = r.get("results") or r.get("output") or r.get("response") or ""
+        if isinstance(text, str):
+            for pat in _TOOL_ERROR_PATTERNS:
+                if pat.search(text):
+                    snippet = text[:120].strip()
+                    return ("failure", f"tool result matched error pattern {pat.pattern!r}: {snippet!r}")
+
+    # Agent verbally gave up?
+    if agent_reply:
+        for pat in _REPLY_GIVE_UP_PATTERNS:
+            m = pat.search(agent_reply)
+            if m:
+                return ("failure", f"agent reply matched give-up pattern {pat.pattern!r}")
+
+    return ("ok", None)
+
+
+# ── Teacher escalation ────────────────────────────────────────────
+
+# Prompt template the teacher gets. The teacher is expected to (a)
+# describe how it would solve the task, and (b) emit a JSON skill
+# blob the caller can pass straight to manage_skills(add).
+_TEACHER_ESCALATION_PROMPT = """\
+You are the senior teacher model for an AI agent that runs on a smaller, \
+self-hosted student model. The student just failed at a task. Your job \
+is to write a permanent SKILL.md procedure so the student succeeds next \
+time.
+
+The student's tools include (non-exhaustive): bash, python, web_search, \
+read_file, write_file, create_document, edit_document, manage_session \
+(list/switch/rename/archive/delete/important/truncate/fork), \
+list_sessions, manage_memory, manage_notes, manage_calendar, \
+send_email, list_emails, manage_settings, manage_skills, \
+manage_tasks, ui_control. The student also understands the markdown \
+anchor convention [Name](#session-<id>) / [Title](#document-<id>) for \
+clickable jump links.
+
+THE TASK
+{user_request}
+
+WHY THE STUDENT FAILED
+{failure_reason}
+
+WHAT THE STUDENT TRIED (tool calls + replies in order)
+{trace}
+
+YOUR JOB
+Respond with TWO sections, in this exact order:
+
+1. A short paragraph explaining the correct procedure in plain English.
+
+2. A fenced JSON code block matching this schema for manage_skills(add):
+
+```json
+{{
+  "action": "add",
+  "name": "<short-kebab-case-slug>",
+  "description": "<one-line summary of what this skill teaches>",
+  "when_to_use": "<the trigger pattern: e.g. 'When the user says \\"open my X chat\\"'>",
+  "procedure": [
+    "Step 1: ...",
+    "Step 2: ...",
+    "Step 3: ..."
+  ],
+  "pitfalls": ["..."],
+  "verification": ["..."],
+  "category": "<single category word>",
+  "status": "draft",
+  "confidence": 0.8,
+  "source": "teacher-escalation"
+}}
+```
+
+The procedure steps should reference SPECIFIC tool names and argument \
+shapes the student can copy. Be concrete — not "use the right tool", \
+but "call list_sessions, find the row whose name contains <X>, then \
+respond with `[Name](#session-<id>)`".
+
+**PORTABILITY — CRITICAL.** Skills are shared across users. Do NOT \
+hardcode anything user-specific into the procedure:
+  - NO hostnames or IPs (e.g. `gpu-box`, `user@192.0.2.10`) — \
+    use placeholders like `<gpu_host>` or call `list_serve_presets` / \
+    `list_cached_models` to discover them at runtime.
+  - NO absolute filesystem paths tied to one machine (e.g. \
+    `/home/<user>/vllm-env/bin/vllm`) — say "use the user's vLLM \
+    install" or call the wrapped tool that picks the right binary.
+  - NO model repo IDs the user happened to pick this time unless the \
+    skill is specifically about THAT model — generalise to "the model \
+    the user named, looked up via list_cached_models / search_hf_models".
+  - NO tmux session names invented in the failed trace — these are \
+    one-shot artefacts. The named tool (`serve_model`, `stop_served_model`) \
+    owns session naming.
+  - NO direct `ssh <host> 'tmux ...'` shell incantations even if that's \
+    what the failed trace did — those bypass the cookbook's state \
+    tracker. The skill must use `serve_model` / `stop_served_model` \
+    / `serve_preset`, not bash.
+
+If you do NOT believe the task is solvable with the available tools, \
+output the explanation paragraph but OMIT the JSON block entirely. \
+A bad procedure is worse than no procedure — only emit the JSON if \
+you are confident the steps will actually work AND the steps are \
+portable across users / hosts.
+"""
+
+
+async def _call_teacher(teacher_model_spec: str, prompt: str) -> Optional[str]:
+    """Call the configured teacher endpoint with the escalation prompt."""
+    from src.llm_core import llm_call_async
+    from src.ai_interaction import _resolve_model, _TEACHER_SYSTEM_PROMPT
+    try:
+        url, model, headers = _resolve_model(teacher_model_spec)
+    except Exception as e:
+        logger.warning(f"teacher endpoint not resolvable ({teacher_model_spec!r}): {e}")
+        return None
+    try:
+        return await llm_call_async(
+            url, model,
+            [
+                {"role": "system", "content": _TEACHER_SYSTEM_PROMPT},
+                {"role": "user", "content": prompt},
+            ],
+            headers=headers,
+            timeout=120,
+        )
+    except Exception as e:
+        logger.warning(f"teacher call failed: {e}")
+        return None
+
+
+# Prompt used AFTER the teacher itself ran and succeeded — distill the
+# successful trace into a reusable SKILL.md. Different framing from the
+# original "you have to plan it" prompt because here the teacher has
+# already proven the steps work.
+_TEACHER_SKILL_FROM_TRACE_PROMPT = """\
+You are distilling a successful tool-use trace into a permanent \
+SKILL.md procedure so a smaller student model can reproduce it.
+
+ORIGINAL USER REQUEST
+{user_request}
+
+WHY THE STUDENT FAILED (you, the teacher, just succeeded where it didn't)
+{failure_reason}
+
+YOUR SUCCESSFUL TRACE (tool calls + your final reply, in order)
+{trace}
+
+Output ONE fenced JSON code block matching this schema and nothing else:
+
+```json
+{{
+  "action": "add",
+  "name": "<short-kebab-case-slug>",
+  "description": "<one-line summary of what this skill teaches>",
+  "when_to_use": "<the trigger pattern: 'When the user says X'>",
+  "procedure": [
+    "Step 1: <specific tool name and arg shape>",
+    "Step 2: ...",
+    "Step 3: ..."
+  ],
+  "pitfalls": ["..."],
+  "verification": ["..."],
+  "category": "<single category word>",
+  "status": "draft",
+  "confidence": 0.8,
+  "source": "teacher-escalation"
+}}
+```
+
+The procedure must be the steps that ACTUALLY worked in the trace, \
+generalised away from this specific request. Each step references a \
+SPECIFIC tool name and argument shape the student can copy.
+
+**PORTABILITY — CRITICAL.** Skills are shared across users. Strip every \
+user-specific token from your trace before writing the procedure:
+  - Replace hostnames/IPs with placeholders (`<gpu_host>` etc.) or \
+    instruct the student to discover them via `list_serve_presets` / \
+    `list_cached_models` at runtime.
+  - Replace user-specific paths (`/home/<user>/...`) with the wrapped \
+    tool that picks the right binary on whatever machine runs the skill.
+  - Don't bake in the specific model repo_id you happened to use unless \
+    the skill is about that exact model.
+  - Reference the high-level tools (`serve_model`, `stop_served_model`, \
+    `serve_preset`, `list_cached_models`, `search_hf_models`, etc.) \
+    rather than `ssh <host> 'tmux new-session ... vllm serve ...'` \
+    shell incantations — even if THAT'S what worked in the trace. Raw \
+    shell launches bypass the cookbook tracker and don't reproduce on \
+    another user's box.
+
+If the trace did NOT genuinely solve the user's problem (e.g. you also \
+gave up, or the underlying issue was external infrastructure that no \
+procedure can fix), output the single token NO_SKILL and nothing else.
+"""
+
+
+def _extract_skill_json(teacher_response: str) -> Optional[Dict[str, Any]]:
+    """Find the first ```json {...}``` block and parse it.
+
+    Returns None if no block found or JSON is malformed — both
+    treated as "teacher declined to write a skill", per the prompt
+    contract.
+    """
+    if not teacher_response:
+        return None
+    import json
+    m = re.search(r"```(?:json)?\s*\n(\{[\s\S]*?\})\s*\n```", teacher_response)
+    if not m:
+        return None
+    try:
+        data = json.loads(m.group(1))
+        if not isinstance(data, dict):
+            return None
+        return data
+    except Exception:
+        return None
+
+
+def _format_trace(tool_results: List[Dict[str, Any]], agent_reply: str) -> str:
+    """Render the turn's tool calls + final reply for the teacher prompt."""
+    lines = []
+    for i, r in enumerate(tool_results or []):
+        if not isinstance(r, dict):
+            continue
+        tool = r.get("tool") or r.get("action") or "(unknown tool)"
+        if r.get("error"):
+            lines.append(f"- {tool}: ERROR {r['error']!r}")
+            continue
+        out = r.get("results") or r.get("output") or r.get("response") or ""
+        if isinstance(out, str) and len(out) > 400:
+            out = out[:400] + "..."
+        lines.append(f"- {tool}: {out!r}")
+    trace = "\n".join(lines) if lines else "(no tools called)"
+    if agent_reply:
+        snippet = agent_reply if len(agent_reply) < 800 else agent_reply[:800] + "..."
+        trace += f"\n\nFinal reply: {snippet!r}"
+    return trace
+
+
+async def escalate_and_learn(
+    user_request: str,
+    tool_results: List[Dict[str, Any]],
+    agent_reply: str,
+    failure_reason: str,
+    owner: Optional[str] = None,
+) -> Optional[str]:
+    """Call the teacher, evaluate ITS attempt, save a skill on success.
+
+    Returns the saved skill name (or None if the teacher couldn't
+    write one). Logs but doesn't raise — escalation is best-effort.
+    """
+    from src.settings import get_setting
+    teacher_spec = (get_setting("teacher_model", "") or "").strip()
+    if not teacher_spec:
+        return None
+
+    prompt = _TEACHER_ESCALATION_PROMPT.format(
+        user_request=user_request or "(no user request captured)",
+        failure_reason=failure_reason or "(failure reason not captured)",
+        trace=_format_trace(tool_results, agent_reply),
+    )
+    response = await _call_teacher(teacher_spec, prompt)
+    if not response:
+        return None
+
+    skill = _extract_skill_json(response)
+    if not skill:
+        # Teacher chose not to write a skill — see prompt contract.
+        logger.info("teacher declined to write a skill for this failure")
+        return None
+
+    # Same regex eval applied to the teacher's response — if the
+    # teacher itself sounded uncertain ("I don't have a tool"), drop
+    # the skill rather than persist a sketchy one.
+    status, reason = evaluate_turn_regex([], response)
+    if status == "failure":
+        logger.info(f"teacher response failed eval, skipping skill save: {reason}")
+        return None
+
+    # Tag the skill with the escalation source for auditability.
+    skill.setdefault("source", "teacher-escalation")
+    skill.setdefault("teacher_model", teacher_spec)
+    # Force action=add regardless of what the teacher wrote.
+    skill["action"] = "add"
+
+    import json
+    from src.tool_implementations import do_manage_skills
+    try:
+        result = await do_manage_skills(json.dumps(skill), owner=owner)
+        if isinstance(result, dict) and not result.get("error"):
+            logger.info(f"teacher wrote skill: {skill.get('name')}")
+            return skill.get("name")
+        logger.warning(f"skill save failed: {result}")
+    except Exception as e:
+        logger.warning(f"skill save raised: {e}")
+    return None
+
+
+def maybe_escalate(
+    *,
+    student_endpoint_url: str,
+    mode: str,
+    user_request: str,
+    tool_results: List[Dict[str, Any]],
+    agent_reply: str,
+    owner: Optional[str] = None,
+) -> Optional[asyncio.Task]:
+    """Fire-and-forget entrypoint called by the agent loop end-of-turn.
+
+    Returns the created asyncio.Task (so tests can await it) or None
+    if escalation didn't fire. Safe to call unconditionally — does
+    its own gating.
+    """
+    # Gate 1: only in agent mode.
+    if mode != "agent":
+        return None
+
+    # Gate 2: feature is enabled AND a teacher endpoint is configured.
+    # (No self-hosted-only gate — users run cheap cloud students like
+    # deepseek-v4-flash with a SOTA teacher; the toggle is the control.)
+    try:
+        from src.settings import get_setting
+        if not get_setting("teacher_enabled", False):
+            return None
+        if not (get_setting("teacher_model", "") or "").strip():
+            return None
+    except Exception:
+        return None
+
+    # Gate 3: regex eval — only escalate on detected failure.
+    status, reason = evaluate_turn_regex(tool_results, agent_reply)
+    if status != "failure":
+        return None
+
+    # Fire async — don't block the user's chat.
+    return asyncio.create_task(
+        escalate_and_learn(user_request, tool_results, agent_reply, reason or "", owner),
+        name="teacher_escalation",
+    )
+
+
+# ── Inline teacher takeover (visible in chat stream) ───────────────
+
+async def run_teacher_inline(
+    *,
+    student_endpoint_url: str,
+    student_messages: List[Dict[str, Any]],
+    student_tool_events: List[Dict[str, Any]],
+    student_reply: str,
+    owner: Optional[str] = None,
+):
+    """Async generator. Yields SSE event strings.
+
+    If escalation gates pass, runs the teacher inside the same chat
+    stream — the user sees the teacher's tool calls and reply live.
+    Saves a skill only if the teacher actually succeeded.
+
+    Gates (all must hold): agent mode (caller guarantees), teacher
+    toggle on, teacher_model configured, Tier 1 regex flags failure.
+    """
+    import json
+    from src.settings import get_setting
+
+    # Gates
+    try:
+        if not get_setting("teacher_enabled", False):
+            return
+        teacher_spec = (get_setting("teacher_model", "") or "").strip()
+        if not teacher_spec:
+            return
+    except Exception:
+        return
+
+    status, reason = evaluate_turn_regex(student_tool_events, student_reply)
+    if status != "failure":
+        return
+
+    # Extract original user request — last user-role message
+    user_request = ""
+    for m in reversed(student_messages):
+        if m.get("role") != "user":
+            continue
+        c = m.get("content")
+        if isinstance(c, str):
+            user_request = c
+        elif isinstance(c, list):
+            user_request = next(
+                (p.get("text", "") for p in c
+                 if isinstance(p, dict) and p.get("type") == "text"),
+                "",
+            )
+        break
+
+    # Resolve teacher endpoint
+    try:
+        from src.ai_interaction import _resolve_model
+        teacher_url, teacher_model, teacher_headers = _resolve_model(teacher_spec)
+    except Exception as e:
+        logger.warning(f"teacher endpoint not resolvable ({teacher_spec!r}): {e}")
+        yield (
+            'data: ' + json.dumps({
+                "type": "escalation_failed",
+                "reason": f"teacher endpoint not resolvable: {e}",
+            }) + '\n\n'
+        )
+        return
+
+    # Announce takeover so the frontend can render a banner
+    yield (
+        'data: ' + json.dumps({
+            "type": "teacher_takeover",
+            "teacher_model": teacher_spec,
+            "student_failure": reason,
+        }) + '\n\n'
+    )
+
+    # Build teacher messages. Strip the student's leading system
+    # prompts (the teacher's run will build its own fresh) but keep the
+    # user/assistant/tool history so the teacher sees what the student
+    # tried. The appended note leads with the user request text so RAG
+    # tool selection picks the right tools for the teacher's turn.
+    history = [m for m in student_messages if m.get("role") != "system"]
+    note_content = (
+        f"{user_request or '(no user request captured)'}\n\n"
+        "[teacher-takeover] The previous attempt by the student model "
+        f"failed.\nFailure signal: {reason}\n"
+        "Please solve the request above using your own tools. The user "
+        "is watching your tool calls live."
+    )
+    teacher_messages = history + [{"role": "user", "content": note_content}]
+
+    # Recursively invoke the agent loop with the teacher's params.
+    # The _is_teacher_run flag prevents infinite recursion (the teacher
+    # run will skip its own escalation hook).
+    from src.agent_loop import stream_agent_loop
+    captured_tool_events: List[Dict[str, Any]] = []
+    captured_text_parts: List[str] = []
+
+    async for evt_str in stream_agent_loop(
+        endpoint_url=teacher_url,
+        model=teacher_model,
+        messages=teacher_messages,
+        headers=teacher_headers,
+        owner=owner,
+        _is_teacher_run=True,
+    ):
+        # Swallow teacher's own [DONE] — outer loop emits the real one
+        if "[DONE]" in evt_str:
+            continue
+        if evt_str.startswith("data: "):
+            try:
+                payload = json.loads(evt_str[6:].strip())
+            except Exception:
+                yield evt_str
+                continue
+            if isinstance(payload, dict):
+                payload["teacher"] = True
+                typ = payload.get("type")
+                if typ == "tool_output":
+                    captured_tool_events.append({
+                        "tool": payload.get("tool"),
+                        "command": payload.get("command"),
+                        "output": payload.get("output"),
+                        "exit_code": payload.get("exit_code"),
+                    })
+                if "delta" in payload and isinstance(payload["delta"], str):
+                    captured_text_parts.append(payload["delta"])
+                yield 'data: ' + json.dumps(payload) + '\n\n'
+                continue
+        yield evt_str
+
+    teacher_text = "".join(captured_text_parts).strip()
+    t_status, t_reason = evaluate_turn_regex(captured_tool_events, teacher_text)
+    if t_status == "failure":
+        logger.info(f"teacher also failed: {t_reason}")
+        yield (
+            'data: ' + json.dumps({
+                "type": "escalation_failed",
+                "reason": t_reason,
+            }) + '\n\n'
+        )
+        return
+
+    # Teacher succeeded — distill its successful trace into a skill
+    prompt = _TEACHER_SKILL_FROM_TRACE_PROMPT.format(
+        user_request=user_request or "(no user request captured)",
+        failure_reason=reason or "",
+        trace=_format_trace(captured_tool_events, teacher_text),
+    )
+    skill_response = await _call_teacher(teacher_spec, prompt)
+    if skill_response and "NO_SKILL" in skill_response and not _extract_skill_json(skill_response):
+        logger.info("teacher declined to write a skill (NO_SKILL)")
+        yield (
+            'data: ' + json.dumps({
+                "type": "skill_save_failed",
+                "reason": "teacher said NO_SKILL (problem not reproducible)",
+            }) + '\n\n'
+        )
+        return
+    skill = _extract_skill_json(skill_response) if skill_response else None
+    if not skill:
+        yield (
+            'data: ' + json.dumps({
+                "type": "skill_save_failed",
+                "reason": "teacher did not emit valid skill JSON",
+            }) + '\n\n'
+        )
+        return
+
+    skill["action"] = "add"
+    skill.setdefault("source", "teacher-escalation")
+    skill.setdefault("teacher_model", teacher_spec)
+
+    import json as _json
+    from src.tool_implementations import do_manage_skills
+    try:
+        result = await do_manage_skills(_json.dumps(skill), owner=owner)
+        if isinstance(result, dict) and not result.get("error"):
+            logger.info(f"teacher succeeded; saved skill: {skill.get('name')}")
+            yield (
+                'data: ' + json.dumps({
+                    "type": "skill_saved",
+                    "name": skill.get("name"),
+                    "category": skill.get("category", "general"),
+                }) + '\n\n'
+            )
+        else:
+            yield (
+                'data: ' + json.dumps({
+                    "type": "skill_save_failed",
+                    "reason": str(result),
+                }) + '\n\n'
+            )
+    except Exception as e:
+        logger.warning(f"skill save raised: {e}")
+        yield (
+            'data: ' + json.dumps({
+                "type": "skill_save_failed",
+                "reason": str(e),
+            }) + '\n\n'
+        )