Merge remote-tracking branch 'upstream/dev' into feat/llm-self-eval

2026-06-30 08:32:07 -04:00 · 2026-06-24 13:07:10 +05:30
parent 8fa10f9866 5ce2056521
commit 413e628a30
332 changed files with 30741 additions and 5444 deletions
@@ -267,6 +267,10 @@ _DOMAIN_RULES = {
 - Use `resolve_contact` to look up a contact's email or phone number by name. Searches the CardDAV address book and sent email history.
 - Use `manage_contact` to list, add, update, or delete contacts in the address book.
 - Do NOT use `manage_memory` for contact lookups — contact details live in the address book, not memory.""",
+    "integrations": """\
+## Integration/API rules
+- To query or control a configured service integration (Home Assistant, Miniflux, Gitea, Linkding, Jellyfin, or any other registered service), use `api_call` with the integration name, HTTP method, path, and optional JSON body.
+- Do not use shell, curl, or `app_api` to reach a user's connected integration when `api_call` is available.""",
 }

 _DOMAIN_TOOL_MAP = {
@@ -277,9 +281,10 @@ _DOMAIN_TOOL_MAP = {
    "notes_calendar_tasks": {"manage_notes", "manage_calendar", "manage_tasks"},
    "ui": {"ui_control"},
    "sessions": {"create_session", "list_sessions", "manage_session", "send_to_session", "search_chats"},
-    "files": {"bash", "python", "read_file", "write_file", "edit_file", "grep", "glob", "ls", "get_workspace"},
+    "files": {"bash", "python", "read_file", "write_file", "edit_file", "grep", "glob", "ls", "get_workspace", "manage_bg_jobs"},
    "settings": {"manage_settings", "manage_endpoints", "manage_mcp", "manage_webhooks", "manage_tokens", "app_api"},
    "contacts": {"resolve_contact", "manage_contact"},
+    "integrations": {"api_call"},
 }

 def _domain_rules_for_tools(tool_names: set) -> list[str]:
@@ -408,7 +413,7 @@ Generate an image. Line 1 = description, line 2 = model name, line 3 = WxH (e.g.
    "ask_teacher": "- ```ask_teacher``` — Escalate a hard question to a more capable model. Line 1 = model name or 'auto', rest = the question. Use when stuck or need expert knowledge.",
    "list_models": "- ```list_models``` — Show all available AI models across all endpoints. Use when user asks what models are available.",
    "manage_session": "- ```manage_session``` — Rename, archive, delete, fork, switch, or `list` chats (the UI calls them 'chats'; 'session' is internal). Line 1 = action (list/switch/rename/archive/unarchive/delete/important/unimportant/truncate/fork), Line 2 = exact chat id from `list_sessions` (or `current` where supported). For delete/archive/truncate, always list first and reuse the exact id; never invent placeholder ids. `switch`/`open` returns a clickable anchor link the user can tap to open the chat — use for \"open my X chat\".",
-    "manage_memory": "- ```manage_memory``` — Manage the user's persistent memory (facts, identity, preferences, context that persists across chats). Line 1 = action (list/add/edit/delete/search), rest = content. Use when user says 'remember this', states identity facts like 'my name is <name>' / 'call me <name>' / 'I live in <place>', or asks about stored memories.",
+    "manage_memory": "- ```manage_memory``` — Manage the user's persistent memory (facts about the USER themselves, their preferences, context that persists across chats). Line 1 = action (list/add/edit/delete/search), rest = content. Use when user says 'remember this' about themselves, states identity facts like 'my name is <name>' / 'call me <name>' / 'I live in <place>', or asks about stored memories. DO NOT use for info about another person (their address, phone, email, birthday) — that goes in `manage_contact`. If the user pastes an address/phone with a name and says 'save this for <person>', use `manage_contact add` with the address arg, NOT manage_memory.",
    "manage_skills": "- ```manage_skills``` — Skill registry (SKILL.md format). Args (JSON): {\"action\": \"list|view|view_ref|search|add|edit|patch|publish|delete\", ...}. `list` returns the index of available skills (published + teacher-escalation drafts); `view name=foo` fetches the full SKILL.md; `view_ref name=foo path=...` loads a reference file under the skill directory. For `add`, provide an explicit kebab-case `name` and only report the exact returned name, because storage may normalize or dedupe it. Use this BEFORE doing domain work — there may already be a procedure (published or draft) that prescribes the correct steps. Drafts written by the teacher loop are authoritative guidance even though they're not yet published.",
    "manage_tasks": "- ```manage_tasks``` — Create and manage scheduled background tasks (recurring AI jobs). Args (JSON): {\"action\": \"list|create|edit|delete|pause|resume|run\", ...}",
    "manage_endpoints": "- ```manage_endpoints``` — Add, remove, or configure AI model API endpoints. Args (JSON): {\"action\": \"list|add|delete|enable|disable\", ...}. Use when user wants to add a new AI provider.",
@@ -428,7 +433,9 @@ Notes, checklists, AND user reminders. Use this for "create/add/write a note", t
 ```send_email
 {"to": "recipient@example.com", "subject": "Re: Your question", "body": "Hi, ...", "account": "gmail"}
 ```
-Send a new email via SMTP. Use `resolve_contact` first if you only have a name. If multiple email accounts exist, call `list_email_accounts` first and pass the chosen `account`.""",
+Send a new email via SMTP. Use `resolve_contact` first if you only have a name. If multiple email accounts exist, call `list_email_accounts` first and pass the chosen `account`.
+
+CRITICAL — signatures: DO NOT invent a sign-off name. End the body with just `Thanks,` or similar — never type a person's name unless the user explicitly told you what to sign as. When `agent_email_confirm` is on (default), the tool returns `{pending: true, pending_id: ...}` and stages the email for the user to approve in the chat UI instead of SMTPing immediately.""",
    "list_emails": """\
 ```list_emails
 {"folder": "INBOX", "max_results": 20, "unread_only": false, "account": "gmail"}
@@ -439,7 +446,9 @@ List recent emails from a folder, newest first, including read messages by defau
 ```reply_to_email
 {"uid": "1234", "body": "Sounds good — talk Friday.", "account": "gmail"}
 ```
-SEND a reply email immediately by UID. Do not use this for "open a reply" or "start a reply" — those should use `ui_control` with `open_email_reply <uid> <folder> reply` to open the email draft document. For follow-up requests like "reply ..." after reading/listing email where the user clearly wants to send now, use the exact UID and account from the latest `read_email`/`list_emails` result. Never invent UID `1`. Threads automatically (In-Reply-To/References handled).""",
+SEND a reply email immediately by UID. Do not use this for "open a reply" or "start a reply" — those should use `ui_control` with `open_email_reply <uid> <folder> reply` to open the email draft document. For follow-up requests like "reply ..." after reading/listing email where the user clearly wants to send now, use the exact UID and account from the latest `read_email`/`list_emails` result. Never invent UID `1`. Threads automatically (In-Reply-To/References handled).
+
+CRITICAL — signatures: DO NOT invent a sign-off name. End the body with just `Thanks,` or similar — never type a person's name unless the user explicitly told you what to sign as. When `agent_email_confirm` is on (default), the tool returns `{pending: true, pending_id: ...}` and stages the email for the user to approve in the chat UI instead of SMTPing immediately.""",
    "bulk_email": """\
 ```bulk_email
 {"action": "delete", "uids": ["10997", "10998"], "folder": "INBOX", "account": "Gmail"}
@@ -449,7 +458,7 @@ Bulk delete/archive/mark emails. Use this for "delete all those" after listing e
    "archive_email": "- ```archive_email``` — Archive one email by UID. Args (JSON): {\"uid\":\"...\", \"folder\":\"INBOX\", \"account\":\"Gmail\"}. For multiple messages use bulk_email.",
    "mark_email_read": "- ```mark_email_read``` — Mark one email read/unread. Args (JSON): {\"uid\":\"...\", \"read\":true, \"folder\":\"INBOX\", \"account\":\"Gmail\"}. For multiple messages use bulk_email.",
    "resolve_contact": "- ```resolve_contact``` — Look up a contact's email by name. Searches CardDAV address book + sent email history. Args (JSON): {\"name\": \"...\"}. Use BEFORE send_email when the user gives only a name.",
-    "manage_contact": "- ```manage_contact``` — Create/update/delete/list CardDAV contacts. Args (JSON): {\"action\": \"list|add|update|delete\", \"name\": \"...\", \"email\": \"...\", \"uid\": \"...\"}. Use only for explicit address-book/contact requests with contact details. Do NOT use for user identity facts like 'my name is <name>'; save those with manage_memory. For update/delete, call action=list first to get the uid.",
+    "manage_contact": "- ```manage_contact``` — Create/update/delete/list CardDAV contacts. Args (JSON): {\"action\": \"list|add|update|delete\", \"name\": \"...\", \"email\": \"...\", \"phones\": [...], \"address\": \"...\", \"uid\": \"...\"}. Use for info about another person: email, phone, postal address. For 'save this for <person>' / address paste / phone next to a name, use this — NOT manage_memory. Do NOT use for user identity facts ('my name is X'); those are manage_memory. For update/delete, call action=list first for the uid.",
    "manage_calendar": """\
 ```manage_calendar
 {"action": "create_event", "summary": "<event title>", "dtstart": "<natural language or ISO datetime>"}
@@ -520,7 +529,7 @@ def get_builtin_overrides() -> dict:
        ov = get_setting("builtin_tool_overrides", {})
        return ov if isinstance(ov, dict) else {}
    except Exception as e:
-        logger.warning('Failed to load builtin tool overrides: %s', e)
+        logger.warning("Failed to load builtin tool overrides, using defaults", exc_info=e)
        return {}


@@ -532,17 +541,44 @@ def _section_text(name: str, default: str) -> str:
    return val if isinstance(val, str) and val.strip() else default


+def _compact_tool_line(name: str, section: str) -> str:
+    """One-line fenced-tool usage hint for compact/local prompts."""
+    text = (section or "").strip()
+    if not text:
+        return f"- `{name}`"
+    if text.startswith("- "):
+        return text
+    lines = [ln.strip() for ln in text.splitlines() if ln.strip()]
+    usage = []
+    in_fence = False
+    for ln in lines:
+        if ln.startswith("```"):
+            usage.append(ln)
+            in_fence = not in_fence
+            if len(usage) >= 3:
+                break
+            continue
+        if in_fence and len(usage) < 3:
+            usage.append(ln)
+    if usage:
+        return f"- `{name}` — " + " ".join(usage)
+    return f"- `{name}` — " + lines[0][:160]
+
+
 def _assemble_prompt(tool_names: set, disabled_tools: set = None, compact: bool = False) -> str:
    """Build the system prompt with only the specified tools included."""
    disabled = disabled_tools or set()
    included = tool_names - disabled

    if compact:
-        tool_list = ", ".join(sorted(included)) if included else "none"
+        tool_lines = []
+        for name, _default_section in TOOL_SECTIONS.items():
+            if name in included:
+                tool_lines.append(_compact_tool_line(name, _section_text(name, _default_section)))
        parts = [
-            "You are an AI assistant with tool access.",
-            f"Available tools: {tool_list}.",
-            _API_AGENT_RULES,
+            _AGENT_PREAMBLE,
+            "## Available tools\n" + ("\n".join(tool_lines) if tool_lines else "none"),
+            _AGENT_RULES,
        ]
        parts.extend(_domain_rules_for_tools(included))
        return "\n\n".join(parts)
@@ -608,11 +644,6 @@ _API_HOSTS = frozenset([
    "api.perplexity.ai", "api.x.ai",
    "ollama.com", "api.venice.ai", "api.kimi.com",
    "api.githubcopilot.com",
-    # Local OpenAI-compatible endpoints (llama.cpp, vLLM, LM Studio, etc.).
-    # Without these, `_is_api_model` falls back to keyword sniffing on the
-    # model name, so well-behaved local servers don't get native tool
-    # schemas and the agent silently degrades to fenced-block parsing.
-    "localhost", "127.0.0.1", "host.docker.internal",
 ])
 _MCP_KEYWORDS = frozenset(["mcp", "browse", "browser", "website", "calendar", "event", "email",
                           "gmail", "screenshot", "navigate", "click", "miniflux", "rss", "feed"])
@@ -640,6 +671,28 @@ def _is_ollama_openai_compat_url(endpoint_url: str) -> bool:
    return parsed.port == 11434 and (path == "/v1" or path.startswith("/v1/"))


+def _is_local_openai_compat_url(endpoint_url: str) -> bool:
+    try:
+        parsed = urlparse(endpoint_url or "")
+    except Exception:
+        return False
+    host = (parsed.hostname or "").lower()
+    path = (parsed.path or "").rstrip("/")
+    if not (path == "/v1" or path.startswith("/v1/")):
+        return False
+    if host in {"localhost", "127.0.0.1", "0.0.0.0", "host.docker.internal"}:
+        return True
+    if host.startswith("192.168.") or host.startswith("10."):
+        return True
+    if host.startswith("172."):
+        try:
+            second = int(host.split(".")[1])
+            return 16 <= second <= 31
+        except Exception:
+            return False
+    return False
+
+
 def _endpoint_lookup_keys(endpoint_url: str) -> List[str]:
    """Candidate ModelEndpoint.base_url keys for a runtime chat URL."""
    raw = (endpoint_url or "").strip()
@@ -703,6 +756,17 @@ def _extract_last_user_message(messages: List[Dict]) -> str:


 _LOW_SIGNAL_RE = re.compile(r"^[\W_]*$", re.UNICODE)
+_CASUAL_OPENING_RE = re.compile(
+    r"^\s*(?:h+i+|hey+|hello+|yo+|sup+|what'?s up|wass?up|hiya|howdy|"
+    r"lol|lmao|haha+|hehe+|thanks?|thank you|ty|idk|dunno|meh|bruh|bro)\b(?P<tail>.*)$",
+    re.IGNORECASE,
+)
+_CASUAL_BLOCKLIST_RE = re.compile(
+    r"\b(?:cookbook|serve|serving|launch|start|vllm|sglang|llama\.?cpp|ollama|"
+    r"download|model|email|document|doc|note|calendar|task|search|web|research|"
+    r"file|folder|repo|git|settings?|endpoint|api|token|mcp)\b",
+    re.IGNORECASE,
+)
 _EXPLICIT_CONTINUATION_RE = re.compile(
    r"^\s*(?:"
    r"yes|y|yeah|yep|ok|okay|sure|do it|go ahead|continue|carry on|"
@@ -712,6 +776,17 @@ _EXPLICIT_CONTINUATION_RE = re.compile(
    r")\s*[.!?]*\s*$",
    re.IGNORECASE,
 )
+_RETRY_CONTINUATION_RE = re.compile(
+    r"\b(?:try again|retry|again|rerun|re-run|run it again|launch it again|"
+    r"start it again|failed|fails?|died|crashed|broke|insta|instantly)\b",
+    re.IGNORECASE,
+)
+_COOKBOOK_CONTEXT_RE = re.compile(
+    r"\b(?:cookbook|serve|serving|served|launch|start|preset|vllm|sglang|"
+    r"llama\.?cpp|ollama|download|cached models?|model servers?|running models?|"
+    r"gpu box|ajax|qwen|gemma|llama|mistral|minimax)\b",
+    re.IGNORECASE,
+)


 def _is_explicit_continuation(text: str) -> bool:
@@ -719,6 +794,37 @@ def _is_explicit_continuation(text: str) -> bool:
    return bool(_EXPLICIT_CONTINUATION_RE.match(str(text or "").strip()))


+def _is_casual_low_signal(text: str) -> bool:
+    """True for short greetings/slang that should not inherit stale context."""
+    s = str(text or "").strip()
+    m = _CASUAL_OPENING_RE.match(s)
+    if not m:
+        return False
+    tail = m.group("tail") or ""
+    if _CASUAL_BLOCKLIST_RE.search(tail):
+        return False
+    # Allow a short vocative/address after the opener without hardcoding the
+    # address term itself: "hey man", "yo dude", "sup <name>". Longer tails are
+    # more likely to be an actual request and should get normal context/tooling.
+    tail_words = re.findall(r"[A-Za-z0-9_'-]+", tail)
+    return len(tail_words) <= 2
+
+
+def _is_contextual_retry_continuation(messages: List[Dict], text: str) -> bool:
+    """Treat "try again / it failed" as a continuation only for active tool work.
+
+    These follow-ups are common after Cookbook launches: the latest user turn
+    says only "try again it failed", while the actionable model/host/command
+    details live one or two turns back. Keep this intentionally narrow so
+    ordinary chat does not inherit stale Cookbook context.
+    """
+    latest = str(text or "").strip()
+    if not latest or not _RETRY_CONTINUATION_RE.search(latest):
+        return False
+    recent = _recent_context_for_retrieval(messages, max_user=5, max_chars=1200)
+    return bool(_COOKBOOK_CONTEXT_RE.search(recent))
+
+
 def _assistant_requested_followup(messages: List[Dict]) -> bool:
    """True when the previous assistant turn asked for missing task details.

@@ -760,11 +866,12 @@ def _classify_agent_request(messages: List[Dict], last_user: str) -> Dict[str, o
    which domain rule packs get appended to the system prompt.
    """
    text = str(last_user or "").strip()
-    continuation = _is_explicit_continuation(text) or _assistant_requested_followup(messages)
+    retry_continuation = _is_contextual_retry_continuation(messages, text)
+    continuation = _is_explicit_continuation(text) or _assistant_requested_followup(messages) or retry_continuation
    retrieval_query = _recent_context_for_retrieval(messages) if continuation else text
    q = retrieval_query.lower()

-    if not text or bool(_LOW_SIGNAL_RE.match(text)):
+    if not text or bool(_LOW_SIGNAL_RE.match(text)) or _is_casual_low_signal(text):
        return {
            "low_signal": True,
            "continuation": False,
@@ -807,10 +914,25 @@ def _classify_agent_request(messages: List[Dict], last_user: str) -> Dict[str, o
        domains.add("sessions")
    if has(r"\b(file|folder|directory|repo|git|grep|find in files|read file|edit file|shell|terminal|bash|python)\b"):
        domains.add("files")
+    # Managing detached bash jobs: "kill the background job", "stop the job",
+    # "kill that job", "check the job output", "is the bg job done".
+    if (has(r"\b(background|bg)\s+(jobs?|task)\b")
+            or has(r"\b(kill|stop|cancel|terminate|check|tail|show|list)\b.{0,16}\bjobs?\b")
+            or has(r"\bjobs?\b.{0,16}\b(output|status|done|finished|running)\b")):
+        domains.add("files")
    if has(r"\b(endpoint|api token|mcp|webhook|preference|configure|config|setting)\b"):
        domains.add("settings")
    if has(r"\b(contact|contacts|phone|phone number|address book|vcard)\b"):
        domains.add("contacts")
+    # API-integration intent — calling a configured service via the api_call
+    # tool. Without this the #3794 repro ("Use the api_call tool to call Home
+    # Assistant GET /api/states") matched no domain, classified as low-signal,
+    # and the tool never reached the schema filter. Detect it explicitly so the
+    # "integrations" domain seeds api_call deterministically (see
+    # _DOMAIN_TOOL_MAP), independent of embedding retrieval.
+    if has(r"\bapi[ _]call\b", r"\bintegrations?\b",
+           r"\b(?:home ?assistant|miniflux|gitea|linkding|jellyfin)\b"):
+        domains.add("integrations")

    low_signal = not continuation and not domains
    return {
@@ -839,8 +961,11 @@ def _recent_context_for_retrieval(messages: List[Dict], max_user: int = 3, max_c
        if isinstance(content, list):
            content = " ".join(b.get("text", "") for b in content if isinstance(b, dict))
        content = (content or "").strip()
-        # Skip injected tool-result envelopes — role=user but not human intent.
-        if not content or content.startswith("[Tool execution results]"):
+        # Skip injected envelopes — role=user but not human intent. Tool results
+        # are now wrapped via untrusted_context_message (metadata.trusted=False);
+        # keep the legacy "[Tool execution results]" prefix for older histories.
+        meta = msg.get("metadata") or {}
+        if not content or meta.get("trusted") is False or content.startswith("[Tool execution results]"):
            continue
        collected.append(content)
        if len(collected) >= max_user:
@@ -859,6 +984,8 @@ def _build_system_prompt(
    compact: bool = False,
    owner: Optional[str] = None,
    suppress_local_context: bool = False,
+    suppress_skills: bool = False,
+    active_email: Optional[Dict[str, str]] = None,
 ) -> List[Dict]:
    """Build agent system prompt, inject MCP/document context, merge consecutive system msgs."""
    global _cached_base_prompt, _cached_base_prompt_key
@@ -875,7 +1002,7 @@ def _build_system_prompt(
        _ov_sig = _hl.sha256(_json.dumps(get_builtin_overrides() or {}, sort_keys=True).encode()).hexdigest()
    except Exception:
        _ov_sig = ""
-    cache_key = (frozenset(disabled_tools or []), bool(mcp_mgr), needs_admin, _rt_key, compact, _ov_sig, owner, suppress_local_context)
+    cache_key = (frozenset(disabled_tools or []), bool(mcp_mgr), needs_admin, _rt_key, compact, _ov_sig, owner, suppress_local_context, suppress_skills)
    if _cached_base_prompt and _cached_base_prompt_key == cache_key and not active_document:
        agent_prompt = _cached_base_prompt
        # Skill index is user-editable (name + description), so it must never
@@ -885,6 +1012,7 @@ def _build_system_prompt(
            disabled_tools, mcp_mgr, needs_admin, relevant_tools,
            mcp_disabled_map=mcp_disabled_map, compact=compact, owner=owner,
            suppress_local_context=suppress_local_context,
+            suppress_skills=suppress_skills,
        )
    else:
        agent_prompt, _skill_index_block = _build_base_prompt(
@@ -896,6 +1024,7 @@ def _build_system_prompt(
            compact=compact,
            owner=owner,
            suppress_local_context=suppress_local_context,
+            suppress_skills=suppress_skills,
        )
        if not active_document:
            _cached_base_prompt = agent_prompt
@@ -924,8 +1053,8 @@ def _build_system_prompt(
    try:
        from src.user_time import current_datetime_context_message
        _datetime_message = current_datetime_context_message()
-    except Exception:
-        pass
+    except Exception as e:
+        logger.warning("Failed to build datetime context message", exc_info=e)

    # Document context is kept as a SEPARATE message (not merged into the tool
    # prompt) so the context trimmer doesn't destroy it when truncating the
@@ -968,8 +1097,8 @@ def _build_system_prompt(
            try:
                from src.pdf_form_doc import find_source_upload_id
                _is_form_backed = bool(find_source_upload_id(active_document.current_content or ""))
-            except Exception:
-                pass
+            except Exception as e:
+                logger.warning("Failed to detect if document is form-backed, assuming plain", exc_info=e)

            if _is_form_backed:
                doc_ctx = (
@@ -1051,6 +1180,66 @@ def _build_system_prompt(
    else:
        set_active_document(None)

+    # Active email reader — frontend told us the user has an email open.
+    # Inject a context block so "reply", "summarize this", "what does it say"
+    # resolve to the real UID instead of the agent inventing a fresh .md
+    # draft with fake headers. This is the email equivalent of _doc_message.
+    _email_message = None
+    if active_email and active_email.get("uid"):
+        _em_uid = active_email.get("uid", "")
+        _em_folder = active_email.get("folder", "INBOX")
+        _em_account = active_email.get("account", "")
+        _em_subject = active_email.get("subject", "") or "(no subject)"
+        _em_from = active_email.get("from", "") or "(unknown sender)"
+        _em_preview = (active_email.get("body_preview", "") or "").strip()
+        _preview_block = f"\nBody preview:\n```\n{_em_preview[:1800]}\n```" if _em_preview else ""
+        _acct_arg = f" {_em_account}" if _em_account else ""
+        email_ctx = (
+            f"ACTIVE EMAIL OPEN (the user has this email open in a reader window right now)\n"
+            f"UID: {_em_uid}\n"
+            f"Folder: {_em_folder}\n"
+            f"Account: {_em_account or '(default)'}\n"
+            f"From: {_em_from}\n"
+            f"Subject: {_em_subject}{_preview_block}\n\n"
+            f"CRITICAL DEFAULT — every request about email this turn refers to "
+            f"THIS email unless the user names a DIFFERENT specific recipient "
+            f"(a name, an email address, or another thread). Examples that "
+            f"ALL mean reply-to-the-open-email:\n"
+            f"  • 'reply' / 'reply to this' / 'respond'\n"
+            f"  • 'write email saying X' / 'send email saying X' / 'draft something'\n"
+            f"  • 'tell them X' / 'say hi' / 'thanks' / 'ack' / 'lmk'\n"
+            f"  • 'summarize it' / 'what does it say' / 'tldr'\n"
+            f"  • 'forward this' / 'forward to <addr>'\n"
+            f"DO NOT ASK THE USER 'who do you want to send this to?' — the "
+            f"answer is ALWAYS the sender of the open email (above) unless they "
+            f"named someone else. Asking that is the wrong move every time.\n\n"
+            f"RULES for the open email:\n"
+            f"1. DRAFT a reply (default for any 'write/send/reply/tell them' "
+            f"request without a different recipient): call `ui_control` with "
+            f"`action=\"open_email_reply\"` and `extra=\"{_em_uid} {_em_folder} "
+            f"reply\"`. This opens the proper reply doc with To/Subject/"
+            f"In-Reply-To pre-filled by the backend. The user will see and edit "
+            f"it before sending. DO NOT `create_document` a markdown file with "
+            f"hand-written `To:` / `Subject:` / `In-Reply-To:` headers — that "
+            f"is wrong every time.\n"
+            f"2. SEND a reply immediately (skip the draft): call "
+            f"`reply_to_email` with the UID above. Only do this when the user "
+            f"explicitly says 'send' / 'send the reply' / 'reply and send'.\n"
+            f"3. READ the full body (the preview above may be truncated): "
+            f"call `read_email` with the UID/folder/account above.\n"
+            f"4. SUMMARIZE / answer questions about it: read it first, then "
+            f"answer in chat. Don't create a document for a summary unless "
+            f"the user explicitly asks for one.\n"
+            f"5. Never ask the user to paste the email or 'share it with you' "
+            f"— you already have its identity above and can read the full body.\n"
+            f"6. The ONLY time you ask 'who to send to?' is when the user "
+            f"explicitly says 'send a NEW email to someone else' or names a "
+            f"recipient you can't identify. A bare 'send email saying X' = the "
+            f"open email's sender.\n"
+        )
+        _email_message = untrusted_context_message("active email reader", email_ctx)
+        _email_message["_protected"] = True
+
    # Inject writing style for any email writing path. This is deliberately
    # broader than read/list: models may compose via send_email, reply_to_email,
    # or ui_control open_email_reply after the first tool round.
@@ -1119,7 +1308,7 @@ def _build_system_prompt(
    # few. If the teacher wrote a procedure for "open my X chat" last
    # time the student failed, this is where the student finds it
    # before deciding which tool to call.
-    if not suppress_local_context:
+    if not suppress_local_context and not suppress_skills:
        try:
            last_user = _extract_last_user_message(messages)
            # Respect the user's skills-enabled toggle (mirrors memory_enabled).
@@ -1258,6 +1447,9 @@ def _build_system_prompt(
    if _doc_message:
        merged.insert(last_user_idx, _doc_message)
        last_user_idx += 1  # the document message is now at last_user_idx
+    if _email_message:
+        merged.insert(last_user_idx, _email_message)
+        last_user_idx += 1
    if _skills_message:
        merged.insert(last_user_idx, _skills_message)
        last_user_idx += 1
@@ -1283,6 +1475,7 @@ def _build_base_prompt(
    compact: bool = False,
    owner: Optional[str] = None,
    suppress_local_context: bool = False,
+    suppress_skills: bool = False,
 ):
    """Build the agent prompt with only relevant tools included.

@@ -1292,12 +1485,18 @@ def _build_base_prompt(
    from src.tool_index import ALWAYS_AVAILABLE

    disabled = set(disabled_tools or [])
-    if not get_setting("image_gen_enabled", True):
+    if not get_setting("image_gen_enabled", False):
        disabled.add("generate_image")

    if relevant_tools is not None:
-        # RAG mode: include always-available + retrieved + admin (if needed)
-        tool_names = set(ALWAYS_AVAILABLE) | set(relevant_tools)
+        # RAG mode: trust the relevant_tools set as already-composed.
+        # get_tools_for_query starts from ALWAYS_AVAILABLE and may
+        # *discard* tools that conflict with the query's intent (e.g.
+        # drop manage_memory for clear contact-save patterns). Unioning
+        # ALWAYS_AVAILABLE back in here used to silently undo those
+        # drops. Only force-include the irreducible loop primitives
+        # (ask_user, update_plan) as belt-and-suspenders.
+        tool_names = set(relevant_tools) | {"ask_user", "update_plan"}
        if needs_admin:
            tool_names |= _ADMIN_TOOLS
        agent_prompt = _assemble_prompt(tool_names, disabled, compact=compact)
@@ -1329,7 +1528,7 @@ def _build_base_prompt(
    # The caller wraps it in untrusted_context_message and ships it as a
    # user-role message — same treatment as the matched-skills block.
    skill_index_block = ""
-    if not suppress_local_context:
+    if not suppress_local_context and not suppress_skills:
        try:
            from services.memory.skills import SkillsManager
            from src.constants import DATA_DIR
@@ -1488,8 +1687,14 @@ def _append_tool_results(
        if round_reasoning:
            msg["reasoning_content"] = round_reasoning
        messages.append(msg)
+        # Tool output (shell/python stdout, file reads, fetched pages, email
+        # bodies, MCP results) is sourced from outside the server. Wrap it as
+        # untrusted data so prompt-injection inside a tool result is treated as
+        # data, not instructions — same hardening as skills (#788) and the
+        # web/RAG context. THREAT_MODEL.md lists tool output as a surface that
+        # must go through untrusted_context_message.
        messages.append(
-            {"role": "user", "content": f"[Tool execution results]\n\n{tool_output_text}"}
+            untrusted_context_message("tool execution results", tool_output_text)
        )


@@ -1738,6 +1943,7 @@ async def stream_agent_loop(
    max_tool_calls: int = 0,
    context_length: int = 0,
    active_document=None,
+    active_email: Optional[Dict[str, str]] = None,
    session_id: Optional[str] = None,
    disabled_tools: Optional[Set[str]] = None,
    owner: Optional[str] = None,
@@ -1747,6 +1953,7 @@ async def stream_agent_loop(
    approved_plan: Optional[str] = None,
    tool_policy: Optional[ToolPolicy] = None,
    workspace: Optional[str] = None,
+    forced_tools: Optional[Set[str]] = None,
    _is_teacher_run: bool = False,
 ) -> AsyncGenerator[str, None]:
    """Streaming agent loop generator.
@@ -1786,6 +1993,20 @@ async def stream_agent_loop(
    _needs_admin = _detect_admin_intent(messages)
    _last_user = _extract_last_user_message(messages)
    _intent = _classify_agent_request(messages, _last_user)
+    _low_signal_turn = bool(_intent.get("low_signal"))
+    _casual_low_signal_turn = _is_casual_low_signal(_last_user)
+    _direct_low_signal = (
+        _low_signal_turn
+        and not bool(_intent.get("continuation"))
+        and not plan_mode
+        and not approved_plan
+        and not guide_only
+        and (_casual_low_signal_turn or active_document is None)
+        and (_casual_low_signal_turn or not active_email)
+        and (_casual_low_signal_turn or not workspace)
+        and not forced_tools
+        and not relevant_tools
+    )
    # Tool retrieval uses the latest message by default. It may inherit recent
    # user turns only for explicit continuations ("yes", "do it", "1").
    _retrieval_query = str(_intent.get("retrieval_query") or _last_user)
@@ -1793,11 +2014,86 @@ async def stream_agent_loop(
        "[agent-intent] latest=%r continuation=%s low_signal=%s domains=%s retrieval_query=%r",
        _last_user[:120],
        bool(_intent.get("continuation")),
-        bool(_intent.get("low_signal")),
+        _low_signal_turn,
        sorted(_intent.get("domains") or []),
        _retrieval_query[:200],
    )
    _mcp_disabled_map = _load_mcp_disabled_map() if mcp_mgr else {}
+    if _direct_low_signal:
+        logger.info("[agent] direct low-signal reply path for latest=%r", _last_user[:80])
+        direct_messages = [{"role": "user", "content": _last_user}]
+        direct_response = ""
+        direct_start = time.time()
+        direct_actual_model = model
+        real_input_tokens = 0
+        real_output_tokens = 0
+        try:
+            async for chunk in stream_llm_with_fallback(
+                [(endpoint_url, model, headers)] + list(fallbacks or []),
+                direct_messages,
+                temperature=temperature,
+                max_tokens=min(max_tokens or 128, 128),
+                prompt_type=None,
+                tools=None,
+                timeout=int(get_setting("agent_stream_timeout_seconds", 300) or 300),
+                session_id=session_id,
+            ):
+                if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
+                    try:
+                        data = json.loads(chunk[6:])
+                    except json.JSONDecodeError:
+                        yield chunk
+                        continue
+                    if data.get("type") == "usage":
+                        usage = data.get("data", {}) or {}
+                        direct_actual_model = usage.get("model") or direct_actual_model
+                        real_input_tokens += usage.get("input_tokens", 0) or 0
+                        real_output_tokens += usage.get("output_tokens", 0) or 0
+                        continue
+                    if data.get("type") == "model_actual":
+                        direct_actual_model = data.get("model") or direct_actual_model
+                        data["requested_model"] = model
+                        yield f"data: {json.dumps(data)}\n\n"
+                        continue
+                    if data.get("type") == "fallback":
+                        direct_actual_model = data.get("answered_by") or direct_actual_model
+                        yield chunk
+                        continue
+                    if "delta" in data:
+                        if not data.get("thinking"):
+                            direct_response += data.get("delta", "")
+                        yield chunk
+                        continue
+                    yield chunk
+                elif chunk.startswith("event: "):
+                    yield chunk
+        except Exception as _direct_err:
+            logger.warning("[agent] direct low-signal path failed: %s", _direct_err)
+            fallback = "Hey."
+            direct_response += fallback
+            yield f"data: {json.dumps({'delta': fallback})}\n\n"
+
+        if not direct_response.strip():
+            fallback = "Hey."
+            direct_response = fallback
+            yield f"data: {json.dumps({'delta': fallback})}\n\n"
+
+        duration = time.time() - direct_start
+        metrics = {
+            "model": direct_actual_model,
+            "requested_model": model,
+            "input_tokens": real_input_tokens or estimate_tokens(direct_messages),
+            "output_tokens": real_output_tokens or max(len(direct_response) // 4, 1),
+            "total_time": round(duration, 2),
+            "response_time": round(duration, 2),
+            "agent_rounds": 0,
+            "tool_calls": 0,
+            "direct_low_signal": True,
+        }
+        yield f"data: {json.dumps({'type': 'metrics', 'data': metrics})}\n\n"
+        yield "data: [DONE]\n\n"
+        return
+
    if plan_mode and mcp_mgr:
        # Allow read-only MCP tools to investigate, block write/unknown ones:
        # hide them from the schemas AND reject them at runtime by qualified name.
@@ -1809,11 +2105,11 @@ async def stream_agent_loop(

    # RAG-based tool selection: retrieve relevant tools for this query.
    # If caller provided a pre-computed set (e.g. task_scheduler), use that.
-    _relevant_tools = set() if guide_only else relevant_tools
+    _relevant_tools = relevant_tools
    _t1 = time.time()
    if _relevant_tools:
        logger.info(f"[tool-rag] Using caller-provided relevant_tools ({len(_relevant_tools)} tools)")
-    if not guide_only and not _relevant_tools and bool(_intent.get("low_signal")):
+    if not guide_only and not _relevant_tools and _low_signal_turn:
        from src.tool_index import ALWAYS_AVAILABLE
        if workspace:
            # An active workspace IS the file-work signal: a vague "look at the
@@ -1904,6 +2200,53 @@ async def stream_agent_loop(
    if _relevant_tools is not None and active_document is not None:
        _relevant_tools.update({"edit_document", "update_document", "suggest_document"})

+    # Per-request UI toggles are stronger than retrieval. If the user turns on
+    # Search, the model must see the search tools even when the latest text is a
+    # typo or otherwise low-signal for tool RAG.
+    if not guide_only and forced_tools:
+        if _relevant_tools is None:
+            from src.tool_index import ALWAYS_AVAILABLE
+            _relevant_tools = set(ALWAYS_AVAILABLE)
+        _relevant_tools.update(t for t in forced_tools if t not in disabled_tools)
+
+    # The skill index injected by _build_system_prompt tells the model to
+    # call `manage_skills action=view`, and Jaccard-matched skills are pasted
+    # into the prompt as procedures to follow — but neither path goes through
+    # tool selection, so the model can be handed a procedure naming tools
+    # (grep, read_file, ...) that aren't in its schema list. Keep the schemas
+    # in lockstep: manage_skills is callable whenever any skill is indexed,
+    # and a matched skill's declared requires_toolsets ride along with it.
+    if not guide_only and _relevant_tools is not None and not _low_signal_turn:
+        try:
+            from services.memory.skills import SkillsManager
+            from src.constants import DATA_DIR
+            _skills_on = True
+            try:
+                from routes.prefs_routes import _load_for_user as _load_prefs
+                _skills_on = (_load_prefs(owner) or {}).get("skills_enabled", True)
+            except Exception:
+                pass
+            _sm = SkillsManager(DATA_DIR)
+            _owner_skills = _sm.load(owner=owner) if _skills_on else []
+            if _owner_skills:
+                _relevant_tools.add("manage_skills")
+                if _retrieval_query:
+                    # Validate against every known executable tool, not just
+                    # TOOL_SECTIONS — code-nav tools (grep/glob/ls) ship as
+                    # schemas without a prompt-prose section.
+                    from src.tool_policy import known_tool_names
+                    _known = known_tool_names()
+                    for _sk in _sm.get_relevant_skills(
+                        _retrieval_query, skills=_owner_skills,
+                        threshold=0.25, max_items=3,
+                    ):
+                        _relevant_tools.update(
+                            t for t in (_sk.get("requires_toolsets") or [])
+                            if t in _known
+                        )
+        except Exception as _e:
+            logger.debug(f"[tool-rag] skill-aware tool include skipped: {_e}")
+
    if _relevant_tools is not None:
        logger.info("[agent-intent] selected_tools=%s", sorted(_relevant_tools)[:50])

@@ -1938,7 +2281,7 @@ async def stream_agent_loop(
    _model_supports_tools = any(kw in _model_lc for kw in (
        "gpt-4", "gpt-5", "gpt-o", "claude", "gemini", "gemma",
        "qwen3", "qwen2.5", "mixtral", "mistral", "llama-3.1", "llama-3.2",
-        "llama-3.3", "llama-4",
+        "llama-3.3", "llama-4", "llama3.1", "llama3.2", "llama3.3", "llama4",
        # Local-served models that follow OpenAI-style function calling
        # via vLLM's `--enable-auto-tool-choice`. Belt-and-suspenders
        # with the per-endpoint flag above.
@@ -1980,13 +2323,16 @@ async def stream_agent_loop(
        _is_api_model = False
    else:
        _is_api_model = any(h in endpoint_url for h in _API_HOSTS) or _model_supports_tools
+    _compact_agent_prompt = _is_api_model or _is_ollama_native or _ollama_openai_compat
    messages, mcp_schemas = _build_system_prompt(
        messages, model, active_document, mcp_mgr, disabled_tools,
        needs_admin=_needs_admin, relevant_tools=_relevant_tools,
        mcp_disabled_map=_mcp_disabled_map,
-        compact=_is_api_model,
+        compact=_compact_agent_prompt,
        owner=owner,
        suppress_local_context=guide_only,
+        suppress_skills=_low_signal_turn,
+        active_email=active_email,
    )
    if plan_mode and not guide_only:
        # Steer the model to investigate-then-propose. Hard tool gating handles
@@ -2071,6 +2417,14 @@ async def stream_agent_loop(
    # Strip internal metadata keys before sending to the LLM API
    messages = [{k: v for k, v in msg.items() if k != "_protected"} for msg in messages]

+    agent_prompt_tokens = estimate_tokens(messages)
+    logger.info(
+        "[agent-timing] prep_done model=%s prompt_tokens=%s context_length=%s prep=%s",
+        model,
+        agent_prompt_tokens,
+        context_length,
+        {k: round(v, 3) for k, v in prep_timings.items()},
+    )
    yield f"data: {json.dumps({'type': 'agent_prep', 'data': {k: round(v, 3) for k, v in prep_timings.items()}})}\n\n"

    full_response = ""
@@ -2167,9 +2521,17 @@ async def stream_agent_loop(
        elif _is_api_model:
            # Filter schemas by RAG-selected tools (if available)
            if _relevant_tools:
+                # _build_base_prompt unions _ADMIN_TOOLS into the prompt
+                # sections when admin intent fires — the schema list must
+                # offer the same names, or the model reads prose describing
+                # tools it cannot call and substitutes the nearest schema
+                # it does have (e.g. manage_memory for manage_skills).
+                _schema_names = set(_relevant_tools)
+                if _needs_admin:
+                    _schema_names |= _ADMIN_TOOLS
                base_schemas = [
                    s for s in FUNCTION_TOOL_SCHEMAS
-                    if s.get("function", {}).get("name") in _relevant_tools
+                    if s.get("function", {}).get("name") in _schema_names
                ]
                _mcp_filtered = [
                    s for s in mcp_schemas
@@ -2207,6 +2569,19 @@ async def stream_agent_loop(
        # complementary cap for the rare stream that trickles bytes forever and
        # so never trips the inactivity timeout. Generous — only catches runaway.
        _round_deadline = time.time() + max(agent_stream_timeout * 4, 1200)
+        _round_start = time.time()
+        _round_first_event_logged = False
+        _round_first_token_logged = False
+        logger.info(
+            "[agent-timing] round_start round=%s model=%s endpoint=%s prompt_tokens=%s tools=%s native_tools=%s timeout=%s",
+            round_num,
+            model,
+            endpoint_url,
+            estimate_tokens(messages),
+            len(_tool_names_sent),
+            bool(all_tool_schemas),
+            agent_stream_timeout,
+        )
        async for chunk in stream_llm_with_fallback(
            _candidates,
            messages,
@@ -2217,11 +2592,30 @@ async def stream_agent_loop(
            timeout=agent_stream_timeout,
            session_id=session_id,
        ):
+            if not _round_first_event_logged:
+                _round_first_event_logged = True
+                logger.info(
+                    "[agent-timing] first_event round=%s elapsed=%.3fs kind=%s",
+                    round_num,
+                    time.time() - _round_start,
+                    "error" if chunk.startswith("event: error") else "data",
+                )
            if time.time() > _round_deadline:
-                logger.warning(f"[agent] round {round_num} stream exceeded wall-clock deadline; cutting off")
+                logger.warning(
+                    "[agent-timing] round_deadline round=%s elapsed=%.3fs deadline_s=%s",
+                    round_num,
+                    time.time() - _round_start,
+                    max(agent_stream_timeout * 4, 1200),
+                )
                break
            # Forward error events from stream_llm to the frontend
            if chunk.startswith("event: error"):
+                logger.warning(
+                    "[agent-timing] stream_error round=%s elapsed=%.3fs chunk=%r",
+                    round_num,
+                    time.time() - _round_start,
+                    chunk[:500],
+                )
                yield chunk
                continue
            if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
@@ -2301,6 +2695,15 @@ async def stream_agent_loop(
                        if not first_token_received:
                            time_to_first_token = time.time() - total_start
                            first_token_received = True
+                        if not _round_first_token_logged:
+                            _round_first_token_logged = True
+                            logger.info(
+                                "[agent-timing] first_visible_token round=%s elapsed=%.3fs total_elapsed=%.3fs thinking=%s",
+                                round_num,
+                                time.time() - _round_start,
+                                time.time() - total_start,
+                                bool(data.get("thinking")),
+                            )
                        # Keep reasoning deltas in a separate accumulator so
                        # we can echo them back via `reasoning_content` on the
                        # next request (DeepSeek requires this; harmless for
@@ -2370,7 +2773,21 @@ async def stream_agent_loop(
                yield chunk
            # Intercept [DONE] — don't forward until all rounds finish

-        tool_blocks, used_native = _resolve_tool_blocks(round_response, native_tool_calls, round_num, is_api_model=_is_api_model)
+        logger.info(
+            "[agent-timing] round_stream_done round=%s elapsed=%.3fs text_chars=%s tool_calls=%s first_event=%s first_token=%s",
+            round_num,
+            time.time() - _round_start,
+            len(round_response),
+            len(native_tool_calls),
+            _round_first_event_logged,
+            _round_first_token_logged,
+        )
+        tool_blocks, used_native = _resolve_tool_blocks(
+            round_response,
+            native_tool_calls,
+            round_num,
+            is_api_model=(_is_api_model and not guide_only),
+        )

        # Force-answer round: we told the model to STOP calling tools and
        # answer. If it ignored that and emitted a (possibly DSML) tool
@@ -2454,7 +2871,7 @@ async def stream_agent_loop(
        # model with no real native_tool_calls) must not be stripped from the
        # persisted text either — otherwise it streams once and then disappears
        # on reload (#3222 follow-up).
-        cleaned_round = strip_tool_blocks(round_response, skip_fenced=(_is_api_model and not used_native)).strip()
+        cleaned_round = strip_tool_blocks(round_response, skip_fenced=(_is_api_model and not used_native and not guide_only)).strip()
        round_texts.append(cleaned_round)

        if not tool_blocks:
@@ -2526,6 +2943,15 @@ async def stream_agent_loop(
                _intent_nudge_count += 1
                _matched_phrase = _intent_match.group(0).strip()
                logger.info(f"[agent] intent-without-action nudge #{_intent_nudge_count} on round {round_num}: {_matched_phrase!r}")
+                _lower_phrase = _matched_phrase.lower()
+                _cookbook_log_hint = ""
+                if any(_word in _lower_phrase for _word in ("log", "logs", "output", "tail", "status")):
+                    _cookbook_log_hint = (
+                        " If this is about a Cookbook/model serve, the concrete calls are: "
+                        "`list_served_models` first, then `tail_serve_output` with the "
+                        "session_id from the serve/list result. Never answer with "
+                        "\"check logs\" when those tools are available."
+                    )
                messages.append({
                    "role": "system",
                    "content": (
@@ -2534,6 +2960,7 @@ async def stream_agent_loop(
                        "see you announced the action but didn't run it, which "
                        "is the most frustrating thing you can do. "
                        "DO IT NOW: emit the actual function call this turn. "
+                        f"{_cookbook_log_hint}"
                        "If you decided not to do it after all, say so plainly in "
                        "one sentence instead of restating the plan."
                    ),
@@ -2705,6 +3132,46 @@ async def stream_agent_loop(
                    )
                desc, result = await _tool_task

+            # A skill the model just loaded can prescribe tools that weren't
+            # RAG-selected this turn (declared via requires_toolsets in its
+            # frontmatter). Union them into the selection so the NEXT round's
+            # schema list includes them — otherwise the model reads "use
+            # grep" from the skill it fetched but has no grep schema to call.
+            if (
+                block.tool_type == "manage_skills"
+                and _relevant_tools is not None
+                and not result.get("error")
+            ):
+                _ms_args = {}
+                _ms_raw = (block.content or "").strip()
+                if _ms_raw.startswith("{"):
+                    try:
+                        _ms_args = json.loads(_ms_raw)
+                    except json.JSONDecodeError:
+                        _ms_args = {}
+                _ms_name = str(_ms_args.get("name", "") or "").strip()
+                if _ms_name and _ms_args.get("action") in ("view", "view_ref"):
+                    try:
+                        from services.memory.skills import SkillsManager as _SkM
+                        from src.constants import DATA_DIR as _DD
+                        from src.tool_policy import known_tool_names as _ktn
+                        _known = _ktn()
+                        for _sk in _SkM(_DD).load(owner=owner):
+                            if _sk.get("name") == _ms_name:
+                                _new = {
+                                    t for t in (_sk.get("requires_toolsets") or [])
+                                    if t in _known and t not in _relevant_tools
+                                }
+                                if _new:
+                                    _relevant_tools.update(_new)
+                                    logger.info(
+                                        "[tool-rag] skill '%s' unlocked tools for next round: %s",
+                                        _ms_name, sorted(_new),
+                                    )
+                                break
+                    except Exception as _e:
+                        logger.debug(f"skill requires_toolsets unlock skipped: {_e}")
+
            # Extract structured web sources from web_search tool output.
            # web_search returns {"output": ..., "exit_code": 0}; check "output"
            # first so the <!-- SOURCES:…--> marker is found and stripped even
@@ -2748,9 +3215,12 @@ async def stream_agent_loop(
                    f'data: {json.dumps({"type": "ui_control", "data": result})}\n\n'
                )

-            # ask_user: the agent posed a multiple-choice question. Emit it so the
-            # frontend renders clickable options, then end the turn (below) and
-            # wait — the user's pick becomes the next message.
+            # ask_user: remember the payload now, but emit the interactive event
+            # only *after* tool_output below.  Emitting it before tool_output let
+            # the subsequent tool-card rewrite/scroll push the choices out of
+            # view.  The payload is also copied into the persisted tool event so
+            # history reload can reconstruct an unanswered card.
+            _pending_ask_user_event = None
            if "ask_user" in result:
                # The question lives in the tool args. ChatMessage.to_dict()
                # replays only role+content to the model next turn — tool_event
@@ -2765,9 +3235,7 @@ async def stream_agent_loop(
                    _auq_delta = ("\n\n" if full_response.strip() else "") + _auq_q
                    full_response += _auq_delta
                    yield 'data: ' + json.dumps({"delta": _auq_delta}) + '\n\n'
-                yield (
-                    f'data: {json.dumps({"type": "ask_user", "data": result["ask_user"]})}\n\n'
-                )
+                _pending_ask_user_event = _auq
                _awaiting_user = True

            # update_plan: agent wrote back to the plan (ticked a step / revised).
@@ -2822,9 +3290,25 @@ async def stream_agent_loop(

            # Emit tool_output (include ui_event data if present)
            tool_output_data = {"type": "tool_output", "tool": block.tool_type, "command": cmd_display, "output": output_text, "exit_code": result.get("exit_code")}
+            if _pending_ask_user_event:
+                # Keep enough state in the streamed tool result for alternate
+                # clients to render the prompt without depending on event order.
+                tool_output_data["ask_user"] = _pending_ask_user_event
            if "ui_event" in result:
                tool_output_data["ui_event"] = result["ui_event"]
-                for k in ("toggle_name", "state", "mode", "model", "endpoint_url", "theme_name", "colors"):
+                for k in (
+                    "toggle_name", "state", "mode", "model", "endpoint_url",
+                    "theme_name", "colors",
+                    # ui_control open_email_reply payload — without these the
+                    # frontend openReplyDraft bails on undefined uid and the
+                    # reply window silently never opens.
+                    "uid", "folder", "account_id",
+                    # Optional pre-filled body for open_email_reply so the
+                    # agent can compose-and-open in one tool call.
+                    "body",
+                    # ui_control open_panel payload
+                    "panel",
+                ):
                    if k in result:
                        tool_output_data[k] = result[k]
            # Forward image data from generate_image tool
@@ -2840,6 +3324,14 @@ async def stream_agent_loop(
                tool_output_data["diff"] = result["diff"]
            yield f'data: {json.dumps(tool_output_data)}\n\n'

+            # This must be the final UI event for ask_user: the frontend appends
+            # the card below the now-settled tool node and cancels any between-
+            # round spinner.  The turn ends after the current tool batch.
+            if _pending_ask_user_event:
+                yield (
+                    f'data: {json.dumps({"type": "ask_user", "data": _pending_ask_user_event})}\n\n'
+                )
+
            # Native document tools open in the editor + carry the REAL doc id.
            # Emit a doc_update so the frontend opens/activates it and sends it
            # back as active_doc_id next turn (otherwise the agent can't "see"
@@ -2897,6 +3389,11 @@ async def stream_agent_loop(
            # this the diff shows live but vanishes from saved history.
            if result.get("diff"):
                tool_event["diff"] = result["diff"]
+            if _pending_ask_user_event:
+                # Persist the structured question with the tool event.  On a
+                # reload, chatRenderer can restore the card; a later user
+                # message removes it as answered.
+                tool_event["ask_user"] = _pending_ask_user_event
            tool_events.append(tool_event)
            if block.tool_type in _VERIFIER_EFFECTFUL_TOOLS:
                _effectful_used = True
@@ -174,8 +174,20 @@ async def subscribe(session_id: str) -> AsyncGenerator[str, None]:
            next_seq += 1
        if run.status != "running":
            return
+        heartbeat_idx = 0
        while True:
-            seq, ev = await q.get()
+            try:
+                seq, ev = await asyncio.wait_for(q.get(), timeout=10.0)
+            except asyncio.TimeoutError:
+                # Keep slow local models/proxies alive while they prefill before
+                # the first token. SSE comments are ignored by the UI but reset
+                # browser/proxy idle timers, which prevents "empty response"
+                # disconnects on llama.cpp first-token latencies of 30s+.
+                if run.status == "running":
+                    heartbeat_idx += 1
+                    yield f": heartbeat {heartbeat_idx}\n\n"
+                    continue
+                seq, ev = (None, None)
            if seq is None:            # end sentinel
                while next_seq < len(run.buffer):   # flush any tail the sentinel raced
                    yield run.buffer[next_seq]
@@ -22,6 +22,14 @@ from .subprocess_tools import BashTool, PythonTool
 from .web_tools import WebSearchTool, WebFetchTool
 from .filesystem_tools import ReadFileTool, WriteFileTool, EditFileTool, LsTool, GlobTool, GrepTool, GetWorkspaceTool
 from .document_tools import CreateDocumentTool, UpdateDocumentTool, EditDocumentTool, SuggestDocumentTool, ManageDocumentTool
+from .model_interaction_tools import ChatWithModelTool, AskTeacherTool, ListModelsTool
+from .bg_job_tools import ManageBgJobsTool
+from .session_tools import CreateSessionTool, ListSessionsTool, SendToSessionTool, ManageSessionTool
+from .admin_tools import (
+    ADMIN_TOOL_HANDLERS,
+    do_manage_endpoints, do_manage_mcp, do_manage_webhooks,
+    do_manage_tokens, do_manage_settings,
+)

 TOOL_HANDLERS = {
    "bash": BashTool().execute,
@@ -40,7 +48,17 @@ TOOL_HANDLERS = {
    "suggest_document": SuggestDocumentTool().execute,
    "manage_documents": ManageDocumentTool().execute,
    "get_workspace": GetWorkspaceTool().execute,
+    "chat_with_model": ChatWithModelTool().execute,
+    "ask_teacher": AskTeacherTool().execute,
+    "list_models": ListModelsTool().execute,
+    "manage_bg_jobs": ManageBgJobsTool().execute,
+    "create_session": CreateSessionTool().execute,
+    "list_sessions": ListSessionsTool().execute,
+    "send_to_session": SendToSessionTool().execute,
+    "manage_session": ManageSessionTool().execute,
 }
+# Config/integration admin tools (manage_endpoints/mcp/webhooks/tokens/settings).
+TOOL_HANDLERS.update(ADMIN_TOOL_HANDLERS)

 # ---------------------------------------------------------------------------
 # Constants (re-exported for backward compatibility — single source of truth
@@ -52,7 +70,7 @@ PYTHON_TIMEOUT = 30

 # Tool types that trigger execution
 TOOL_TAGS = {"bash", "python", "web_search", "web_fetch", "read_file", "write_file", "edit_file",
-             "grep", "glob", "ls", "get_workspace",
+             "grep", "glob", "ls", "get_workspace", "manage_bg_jobs",
             "create_document", "update_document", "edit_document",
             "search_chats",
             "chat_with_model", "create_session", "list_sessions",
@@ -127,10 +145,5 @@ from src.tool_implementations import (  # noqa: E402, F401
    do_search_chats,
    do_manage_skills,
    do_manage_tasks,
-    do_manage_endpoints,
-    do_manage_mcp,
-    do_manage_webhooks,
-    do_manage_tokens,
-    do_manage_settings,
    do_api_call,
 )
@@ -0,0 +1,784 @@
+"""Config/integration admin agent tools (TOOL_HANDLERS).
+
+Moved verbatim from tool_implementations.py as part of the tool-registry
+migration (#3629, the `admin_tools.py` bullet): manage_endpoints / manage_mcp /
+manage_webhooks / manage_tokens / manage_settings, plus manage_mcp's
+command-allowlist guard. Each impl keeps its `do_*(content, owner)` shape;
+ADMIN_TOOL_HANDLERS wraps them into registry `execute(content, ctx)` adapters
+via one factory.
+"""
+import json
+import os
+import re
+import logging
+from typing import Optional, Dict
+
+from src.tool_utils import get_mcp_manager, _parse_tool_args
+
+logger = logging.getLogger(__name__)
+
+
+async def do_manage_endpoints(content: str, owner: Optional[str] = None) -> Dict:
+    """Manage model endpoints: list, add, delete, enable, disable."""
+    from core.database import SessionLocal, ModelEndpoint
+    try:
+        args = _parse_tool_args(content)
+    except ValueError:
+        return {"error": "Invalid JSON arguments", "exit_code": 1}
+
+    action = args.get("action", "list")
+    db = SessionLocal()
+    try:
+        if action == "list":
+            eps = db.query(ModelEndpoint).all()
+            items = [{"id": e.id, "name": e.name, "base_url": e.base_url,
+                       "is_enabled": e.is_enabled} for e in eps]
+            return {"response": f"{len(items)} endpoints", "endpoints": items, "exit_code": 0}
+
+        elif action == "add":
+            import uuid as _uuid
+            name = args.get("name", "")
+            base_url = args.get("base_url", "")
+            api_key = args.get("api_key", "")
+            if not base_url:
+                return {"error": "base_url is required", "exit_code": 1}
+            eid = str(_uuid.uuid4())[:8]
+            from datetime import datetime
+            ep = ModelEndpoint(id=eid, name=name or base_url, base_url=base_url,
+                               api_key=api_key, is_enabled=True,
+                               created_at=datetime.utcnow(), updated_at=datetime.utcnow())
+            db.add(ep)
+            db.commit()
+            return {"response": f"Added endpoint '{name or base_url}' (id: {eid})", "exit_code": 0}
+
+        elif action == "delete":
+            eid = args.get("endpoint_id", "")
+            ep = db.query(ModelEndpoint).filter(ModelEndpoint.id == eid).first()
+            if not ep:
+                return {"error": f"Endpoint {eid} not found", "exit_code": 1}
+            name = ep.name
+            db.delete(ep)
+            db.commit()
+            return {"response": f"Deleted endpoint '{name}'", "exit_code": 0}
+
+        elif action in ("enable", "disable"):
+            eid = args.get("endpoint_id", "")
+            ep = db.query(ModelEndpoint).filter(ModelEndpoint.id == eid).first()
+            if not ep:
+                return {"error": f"Endpoint {eid} not found", "exit_code": 1}
+            ep.is_enabled = (action == "enable")
+            db.commit()
+            return {"response": f"Endpoint '{ep.name}' {action}d", "exit_code": 0}
+
+        else:
+            return {"error": f"Unknown action: {action}", "exit_code": 1}
+    except Exception as e:
+        logger.error(f"manage_endpoints error: {e}")
+        return {"error": str(e), "exit_code": 1}
+    finally:
+        db.close()
+
+
+# ---------------------------------------------------------------------------
+# MCP server management tool
+# ---------------------------------------------------------------------------
+
+# Parallel to routes/cookbook_helpers._validate_serve_cmd but deliberately the
+# opposite policy: that gate guards an admin-only serve command and allows
+# interpreters (python3/etc) because model-serving needs them, whereas this is
+# the model/prompt-injection-reachable manage_mcp path, so interpreters and
+# runners are denied here.
+#
+# Commands that can execute arbitrary code regardless of their arguments. These
+# are NEVER accepted on the manage_mcp agent path, even if an operator lists one
+# in ODYSSEUS_MCP_ALLOWED_COMMANDS -- a stdio server that genuinely needs an
+# interpreter or package runner must be registered via the trusted admin route.
+_MCP_DENIED_COMMANDS = frozenset({
+    "sh", "bash", "zsh", "fish", "dash", "ksh", "csh", "tcsh", "ash", "busybox",
+    "cmd", "command.com", "powershell", "pwsh",
+    "python", "pypy", "node", "nodejs", "deno", "bun", "ruby", "jruby",
+    "perl", "raku", "php", "lua", "luajit", "tclsh", "wish", "expect", "rscript",
+    "groovy", "scala", "elixir", "erl", "iex", "java", "javac", "jshell", "jbang",
+    "kotlin", "kotlinc", "dotnet", "mono", "swift", "osascript", "tsx", "ts-node",
+    "npx", "bunx", "uvx", "pipx", "npm", "pnpm", "yarn", "pip", "uv",
+    "gem", "cargo", "go", "bundle", "poetry", "conda", "mamba", "brew",
+    "apt", "apt-get", "yum", "dnf", "pacman", "apk",
+    "env", "xargs", "nohup", "setsid", "nice", "ionice", "time", "timeout",
+    "watch", "stdbuf", "unbuffer", "script", "ssh", "scp", "sshpass", "sudo",
+    "doas", "su", "make", "cmake", "docker", "podman", "kubectl", "find",
+    "awk", "gawk", "sed", "vi", "vim", "nvim", "emacs", "ed", "tee", "eval",
+})
+
+# Argv flags that make even an allowlisted binary execute inline code. Matched
+# by prefix so glued forms (-cimport os, --eval=...) are caught, not just the
+# exact-token form.
+_MCP_CODE_EXEC_SHORT_FLAGS = ("-c", "-e", "-m")
+_MCP_CODE_EXEC_LONG_FLAGS = ("--eval", "--exec", "--print", "--module", "--command", "--require")
+
+_MCP_URL_SCHEMES = ("http://", "https://", "ftp://", "ftps://", "file://", "data:", "jar:", "blob:")
+
+# Shell metacharacters refused in command/args. Args are passed as an argv list
+# (no shell), but refusing these keeps the surface narrow and obvious.
+_MCP_SHELL_METACHARS = set(";|&$`><\n\r")
+
+# Env vars that let a child process load attacker-supplied code before main().
+_MCP_DANGEROUS_ENV = frozenset({
+    "LD_PRELOAD", "LD_LIBRARY_PATH", "LD_AUDIT", "DYLD_INSERT_LIBRARIES",
+    "DYLD_LIBRARY_PATH", "DYLD_FRAMEWORK_PATH", "PYTHONPATH", "PYTHONSTARTUP",
+    "PYTHONHOME", "PYTHONEXECUTABLE", "NODE_OPTIONS", "NODE_PATH", "BASH_ENV",
+    "ENV", "SHELLOPTS", "PERL5LIB", "PERL5OPT", "RUBYOPT", "RUBYLIB", "GEM_PATH",
+    "R_PROFILE", "R_HOME", "PATH", "IFS", "PROMPT_COMMAND",
+})
+
+
+def _mcp_allowed_commands() -> set:
+    """Operator-configured allowlist of safe MCP launcher basenames for the agent
+    path. Empty by default; set ODYSSEUS_MCP_ALLOWED_COMMANDS (comma-separated)
+    to opt specific trusted binaries in. Denied commands are rejected even if
+    listed here."""
+    raw = os.environ.get("ODYSSEUS_MCP_ALLOWED_COMMANDS", "")
+    return {c.strip().lower() for c in raw.split(",") if c.strip()}
+
+
+def _validate_mcp_command(command, args, env) -> Optional[str]:
+    """Validate a model-supplied stdio MCP registration. Returns an error string
+    if it must be rejected, else None.
+
+    Closes the RCE where manage_mcp 'add' passed prompt-injection-controlled
+    command/args/env straight to a subprocess spawn (issue #438): a payload
+    smuggled into a skill description, memory entry, fetched page, or email body
+    could register a stdio server running arbitrary code as the app UID.
+    """
+    if not isinstance(command, str) or not command.strip():
+        return "command must be a non-empty string"
+    command = command.strip()
+    if "/" in command or "\\" in command:
+        return "command must be a bare executable name, not a path"
+    if any(ch in _MCP_SHELL_METACHARS for ch in command):
+        return "command contains shell metacharacters"
+    base = command.lower()
+    if base.endswith(".exe") or base.endswith(".cmd") or base.endswith(".bat"):
+        base = base.rsplit(".", 1)[0]
+    # Canonicalize a trailing version suffix so versioned aliases collapse to the
+    # family name (python3.11 -> python, node18 -> node, pip3 -> pip); both the
+    # raw basename and the canonical form are denied, so an operator cannot
+    # accidentally allowlist a runtime alias back into the path.
+    canon = re.sub(r"[-_.]?\d+(?:\.\d+)*$", "", base)
+    if base in _MCP_DENIED_COMMANDS or canon in _MCP_DENIED_COMMANDS:
+        return (
+            f"command '{command}' is not allowed on the agent MCP path: "
+            "interpreters, runtimes, package runners, and shells can execute "
+            "arbitrary code. Register such a server via the admin route instead."
+        )
+    if base not in _mcp_allowed_commands():
+        return (
+            f"command '{command}' is not in the MCP allowlist. Add it to "
+            "ODYSSEUS_MCP_ALLOWED_COMMANDS if you trust it, or register the "
+            "server via the admin route."
+        )
+
+    if args is not None:
+        if isinstance(args, str):
+            try:
+                args = json.loads(args)
+            except Exception:
+                return "args must be a JSON list"
+        if not isinstance(args, list):
+            return "args must be a list"
+        for a in args:
+            if not isinstance(a, str):
+                return "args must all be strings"
+            s = a.strip()
+            low = s.lower()
+            if any(s == f or s.startswith(f) for f in _MCP_CODE_EXEC_SHORT_FLAGS):
+                return f"arg '{a}' is a code-execution flag and is not allowed"
+            if any(low == f or low.startswith(f + "=") for f in _MCP_CODE_EXEC_LONG_FLAGS):
+                return f"arg '{a}' is a code-execution flag and is not allowed"
+            if any(low.startswith(u) for u in _MCP_URL_SCHEMES):
+                return f"arg '{a}' is a remote URL and is not allowed"
+            if any(ch in _MCP_SHELL_METACHARS for ch in a):
+                return f"arg '{a}' contains shell metacharacters"
+
+    if env:
+        if isinstance(env, str):
+            try:
+                env = json.loads(env)
+            except Exception:
+                return "env must be a JSON object"
+        if not isinstance(env, dict):
+            return "env must be an object"
+        for k in env:
+            if str(k).strip().upper() in _MCP_DANGEROUS_ENV:
+                return f"env var '{k}' can inject code into the child process and is not allowed"
+
+    return None
+
+
+async def do_manage_mcp(content: str, owner: Optional[str] = None) -> Dict:
+    """Manage MCP servers: list, add, delete, enable, disable, reconnect."""
+    try:
+        args = _parse_tool_args(content)
+    except ValueError:
+        return {"error": "Invalid JSON arguments", "exit_code": 1}
+
+    action = args.get("action", "list")
+
+    if action == "list":
+        mcp = get_mcp_manager()
+        if not mcp:
+            return {"response": "No MCP manager available", "servers": [], "exit_code": 0}
+        from core.database import SessionLocal, McpServer
+        db = SessionLocal()
+        try:
+            servers = db.query(McpServer).all()
+            items = []
+            for s in servers:
+                st = mcp.get_server_status(s.id)
+                status = st.get("status", "disconnected")
+                tool_count = st.get("tool_count", 0)
+                items.append({"id": s.id, "name": s.name, "transport": s.transport,
+                              "is_enabled": s.is_enabled, "status": status,
+                              "tool_count": tool_count})
+            return {"response": f"{len(items)} MCP servers", "servers": items, "exit_code": 0}
+        finally:
+            db.close()
+
+    elif action == "add":
+        from core.database import SessionLocal, McpServer
+        import uuid as _uuid
+        from datetime import datetime
+        name = args.get("name", "")
+        command = args.get("command", "")
+        cmd_args = args.get("args", [])
+        env = args.get("env", {})
+        if not name or not command:
+            return {"error": "name and command are required", "exit_code": 1}
+        # Validate BEFORE any DB write or spawn: a rejected registration must
+        # leave no enabled row (which would otherwise auto-reconnect on restart)
+        # and must not attempt a connection.
+        _mcp_err = _validate_mcp_command(command, cmd_args, env)
+        if _mcp_err:
+            return {"error": f"manage_mcp: refused unsafe server registration: {_mcp_err}", "exit_code": 1}
+        sid = str(_uuid.uuid4())[:8]
+        db = SessionLocal()
+        try:
+            srv = McpServer(id=sid, name=name, transport="stdio", command=command,
+                            args=json.dumps(cmd_args) if isinstance(cmd_args, list) else cmd_args,
+                            env=json.dumps(env) if isinstance(env, dict) else env,
+                            is_enabled=True, created_at=datetime.utcnow(), updated_at=datetime.utcnow())
+            db.add(srv)
+            db.commit()
+        finally:
+            db.close()
+        # Try to connect
+        mcp = get_mcp_manager()
+        tool_count = 0
+        if mcp:
+            try:
+                await mcp.connect_server(
+                    sid, name, "stdio", command=command,
+                    args=cmd_args if isinstance(cmd_args, list) else json.loads(cmd_args),
+                    env=env if isinstance(env, dict) else json.loads(env),
+                )
+                st = mcp.get_server_status(sid)
+                tool_count = st.get("tool_count", 0)
+            except Exception as e:
+                logger.warning(f"MCP connect failed for {name}: {e}")
+        return {"response": f"Added MCP server '{name}' ({tool_count} tools)", "exit_code": 0}
+
+    elif action == "delete":
+        sid = args.get("server_id", "")
+        from core.database import SessionLocal, McpServer
+        db = SessionLocal()
+        try:
+            srv = db.query(McpServer).filter(McpServer.id == sid).first()
+            if not srv:
+                return {"error": f"Server {sid} not found", "exit_code": 1}
+            name = srv.name
+            mcp = get_mcp_manager()
+            if mcp:
+                try:
+                    await mcp.disconnect_server(sid)
+                except Exception:
+                    pass
+            db.delete(srv)
+            db.commit()
+            return {"response": f"Deleted MCP server '{name}'", "exit_code": 0}
+        finally:
+            db.close()
+
+    elif action == "reconnect":
+        sid = args.get("server_id", "")
+        mcp = get_mcp_manager()
+        if not mcp:
+            return {"error": "MCP manager not available", "exit_code": 1}
+        try:
+            await mcp.disconnect_server(sid)
+            from core.database import SessionLocal, McpServer
+            db2 = SessionLocal()
+            try:
+                srv = db2.query(McpServer).filter(McpServer.id == sid).first()
+                if srv:
+                    _args = json.loads(srv.args) if srv.args else []
+                    _env = json.loads(srv.env) if srv.env else {}
+                    await mcp.connect_server(
+                        server_id=sid,
+                        name=srv.name,
+                        transport=srv.transport,
+                        command=srv.command,
+                        args=_args,
+                        env=_env,
+                        url=srv.url,
+                    )
+                    st = mcp.get_server_status(sid)
+                    return {"response": f"Reconnected '{srv.name}' ({st.get('tool_count', 0)} tools)", "exit_code": 0}
+                return {"error": f"Server {sid} not found", "exit_code": 1}
+            finally:
+                db2.close()
+        except Exception as e:
+            return {"error": str(e), "exit_code": 1}
+
+    elif action in ("enable", "disable"):
+        sid = args.get("server_id", "")
+        from core.database import SessionLocal, McpServer
+        db = SessionLocal()
+        try:
+            srv = db.query(McpServer).filter(McpServer.id == sid).first()
+            if not srv:
+                return {"error": f"Server {sid} not found", "exit_code": 1}
+            srv.is_enabled = (action == "enable")
+            db.commit()
+            return {"response": f"MCP server '{srv.name}' {action}d", "exit_code": 0}
+        finally:
+            db.close()
+
+    elif action == "list_tools":
+        mcp = get_mcp_manager()
+        if not mcp:
+            return {"response": "No MCP manager", "tools": [], "exit_code": 0}
+        tools = mcp.get_all_tools()
+        items = [{"name": t["name"], "server": t["server_name"],
+                  "description": t.get("description", "")[:100]} for t in tools]
+        return {"response": f"{len(items)} MCP tools available", "tools": items, "exit_code": 0}
+
+    else:
+        return {"error": f"Unknown action: {action}", "exit_code": 1}
+
+
+# ---------------------------------------------------------------------------
+# Webhook management tool
+# ---------------------------------------------------------------------------
+
+async def do_manage_webhooks(content: str, owner: Optional[str] = None) -> Dict:
+    """Manage webhooks: list, add, delete, enable, disable, test."""
+    from core.database import SessionLocal
+    try:
+        args = _parse_tool_args(content)
+    except ValueError:
+        return {"error": "Invalid JSON arguments", "exit_code": 1}
+
+    action = args.get("action", "list")
+    db = SessionLocal()
+    try:
+        from core.database import Webhook
+        if action == "list":
+            hooks = db.query(Webhook).all()
+            items = [{"id": h.id, "name": h.name, "url": h.url,
+                       "events": h.events, "is_active": h.is_active} for h in hooks]
+            return {"response": f"{len(items)} webhooks", "webhooks": items, "exit_code": 0}
+
+        elif action == "add":
+            import uuid as _uuid
+            from datetime import datetime
+            from src.webhook_manager import validate_events, validate_webhook_url
+            name = args.get("name", "")
+            url = args.get("url", "")
+            events = args.get("events", "chat.completed")
+            if not url:
+                return {"error": "url is required", "exit_code": 1}
+            try:
+                url = validate_webhook_url(url)
+                events = validate_events(events)
+            except ValueError as e:
+                return {"error": str(e), "exit_code": 1}
+            wid = str(_uuid.uuid4())[:8]
+            hook = Webhook(id=wid, name=name or url, url=url,
+                           events=events, is_active=True,
+                           created_at=datetime.utcnow(), updated_at=datetime.utcnow())
+            db.add(hook)
+            db.commit()
+            return {"response": f"Added webhook '{name or url}'", "exit_code": 0}
+
+        elif action == "delete":
+            wid = args.get("webhook_id", "")
+            hook = db.query(Webhook).filter(Webhook.id == wid).first()
+            if not hook:
+                return {"error": f"Webhook {wid} not found", "exit_code": 1}
+            name = hook.name
+            db.delete(hook)
+            db.commit()
+            return {"response": f"Deleted webhook '{name}'", "exit_code": 0}
+
+        elif action in ("enable", "disable"):
+            wid = args.get("webhook_id", "")
+            hook = db.query(Webhook).filter(Webhook.id == wid).first()
+            if not hook:
+                return {"error": f"Webhook {wid} not found", "exit_code": 1}
+            hook.is_active = (action == "enable")
+            db.commit()
+            return {"response": f"Webhook '{hook.name}' {action}d", "exit_code": 0}
+
+        else:
+            return {"error": f"Unknown action: {action}", "exit_code": 1}
+    except Exception as e:
+        logger.error(f"manage_webhooks error: {e}")
+        return {"error": str(e), "exit_code": 1}
+    finally:
+        db.close()
+
+
+# ---------------------------------------------------------------------------
+# API token management tool
+# ---------------------------------------------------------------------------
+
+async def do_manage_tokens(content: str, owner: Optional[str] = None) -> Dict:
+    """Manage API tokens: list, create, delete."""
+    from core.database import SessionLocal, ApiToken
+    try:
+        args = _parse_tool_args(content)
+    except ValueError:
+        return {"error": "Invalid JSON arguments", "exit_code": 1}
+
+    action = args.get("action", "list")
+    db = SessionLocal()
+    try:
+        if action == "list":
+            tokens = db.query(ApiToken).all()
+            items = [{"id": t.id, "name": t.name, "token_prefix": t.token_prefix + "...",
+                       "is_active": t.is_active} for t in tokens]
+            return {"response": f"{len(items)} API tokens", "tokens": items, "exit_code": 0}
+
+        elif action == "create":
+            import uuid as _uuid, secrets, bcrypt
+            from datetime import datetime
+            name = args.get("name", "API Token")
+            raw_token = secrets.token_urlsafe(32)
+            token_hash = bcrypt.hashpw(raw_token.encode(), bcrypt.gensalt()).decode()
+            tid = str(_uuid.uuid4())[:8]
+            t = ApiToken(id=tid, name=name, token_hash=token_hash,
+                         token_prefix=raw_token[:8], is_active=True,
+                         created_at=datetime.utcnow(), updated_at=datetime.utcnow())
+            db.add(t)
+            db.commit()
+            return {"response": f"Created token '{name}'", "token": raw_token, "exit_code": 0}
+
+        elif action == "delete":
+            tid = args.get("token_id", "")
+            t = db.query(ApiToken).filter(ApiToken.id == tid).first()
+            if not t:
+                return {"error": f"Token {tid} not found", "exit_code": 1}
+            name = t.name
+            db.delete(t)
+            db.commit()
+            return {"response": f"Deleted token '{name}'", "exit_code": 0}
+
+        else:
+            return {"error": f"Unknown action: {action}", "exit_code": 1}
+    except Exception as e:
+        logger.error(f"manage_tokens error: {e}")
+        return {"error": str(e), "exit_code": 1}
+    finally:
+        db.close()
+
+# ---------------------------------------------------------------------------
+# Settings/preferences management tool
+# ---------------------------------------------------------------------------
+
+async def do_manage_settings(content: str, owner: Optional[str] = None) -> Dict:
+    """Manage user settings and preferences."""
+    try:
+        args = _parse_tool_args(content)
+    except ValueError:
+        return {"error": "Invalid JSON arguments", "exit_code": 1}
+
+    action = args.get("action", "list")
+
+    from core.database import SessionLocal
+    db = SessionLocal()
+    try:
+        # set/get/list/delete operate on the REAL app settings (the same store
+        # the Settings panel writes), so changing a model / voice / search
+        # engine / reminder channel from chat actually takes effect.
+        from src.settings import load_settings, save_settings, DEFAULT_SETTINGS
+
+        # Secrets/credentials the agent must NOT write: kept read-only (masked)
+        # so API keys never flow through chat. User sets these in the panel.
+        _SECRET_KEYS = {
+            "brave_api_key", "google_pse_key", "google_pse_cx",
+            "tavily_api_key", "serper_api_key", "app_public_url",
+        }
+        def _is_secret(k):
+            # `token` must be a suffix, not a substring: otherwise the int
+            # setting `agent_input_token_budget` (which even has a "token budget"
+            # alias to set it from chat) is wrongly classified as a credential.
+            return (
+                k in _SECRET_KEYS
+                or k.endswith("token")
+                or any(t in k for t in ("api_key", "_key", "secret", "password"))
+            )
+
+        # Friendly aliases → real keys, so natural phrasing resolves.
+        _ALIASES_SET = {
+            "voice": "tts_voice", "tts voice": "tts_voice", "tts": "tts_enabled",
+            "text to speech": "tts_enabled", "tts provider": "tts_provider",
+            "speech speed": "tts_speed", "voice speed": "tts_speed",
+            "stt": "stt_enabled", "speech to text": "stt_enabled", "transcription": "stt_enabled",
+            "search engine": "search_provider", "search provider": "search_provider",
+            "search results": "search_result_count", "result count": "search_result_count",
+            "default model": "default_model", "chat model": "default_model",
+            "default endpoint": "default_endpoint_id",
+            "task model": "task_model", "background model": "task_model",
+            "teacher model": "teacher_model", "teacher": "teacher_enabled",
+            "utility model": "utility_model", "research model": "research_model",
+            "research max tokens": "research_max_tokens",
+            "vision model": "vision_model", "vision": "vision_enabled",
+            "image model": "image_model", "image quality": "image_quality",
+            "image gen": "image_gen_enabled", "image generation": "image_gen_enabled",
+            "reminder channel": "reminder_channel", "reminders": "reminder_channel",
+            "ntfy topic": "reminder_ntfy_topic",
+            "webhook integration": "reminder_webhook_integration_id",
+            "webhook template": "reminder_webhook_payload_template", "webhook payload": "reminder_webhook_payload_template",
+            "agent tool calls": "agent_max_tool_calls", "max tool calls": "agent_max_tool_calls",
+            "agent timeout": "agent_stream_timeout_seconds", "stream timeout": "agent_stream_timeout_seconds",
+            "token budget": "agent_input_token_budget", "input budget": "agent_input_token_budget",
+            "hard max": "agent_input_token_hard_max",
+            "token budget cap": "agent_input_token_hard_max",
+            "input budget cap": "agent_input_token_hard_max",
+        }
+        def _resolve(k):
+            k2 = (k or "").strip().lower()
+            if k2 in DEFAULT_SETTINGS:
+                return k2
+            return _ALIASES_SET.get(k2, (k or "").strip())
+
+        _ENUMS = {
+            "image_quality": ["low", "medium", "high"],
+            "reminder_channel": ["browser", "email", "ntfy", "webhook"],
+        }
+        def _coerce(value, default):
+            if isinstance(default, bool):
+                return value if isinstance(value, bool) else str(value).strip().lower() in ("true", "on", "yes", "1", "enable", "enabled")
+            if isinstance(default, int):
+                return int(value)
+            return value
+
+        def _model_slug(value: str) -> str:
+            import re as _re
+            return _re.sub(r"[^a-z0-9]+", "", (value or "").lower())
+
+        def _endpoint_model_from_cache(model_query: str):
+            """Resolve friendly model text to an enabled endpoint + real model id.
+
+            The Settings UI stores both `<prefix>_endpoint_id` and
+            `<prefix>_model`; writing only the model leaves the runtime on the
+            old endpoint. Prefer cached model lists so this stays fast/offline.
+            """
+            import json as _json
+            import re as _re
+            from core.database import ModelEndpoint
+
+            wanted = (model_query or "").strip()
+            wanted_slug = _model_slug(wanted)
+            wanted_tokens = [_model_slug(t) for t in _re.findall(r"[A-Za-z0-9]+", wanted)]
+            wanted_tokens = [t for t in wanted_tokens if t]
+            if not wanted_slug:
+                return None
+            best = None
+            for ep in db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True).all():
+                raw_models = []
+                try:
+                    raw_models = _json.loads(ep.cached_models or "[]") or []
+                except Exception:
+                    raw_models = []
+                # If cache is empty, still allow matching against endpoint name
+                # for callers using model@endpoint elsewhere later.
+                for mid in raw_models:
+                    mid = str(mid)
+                    mid_slug = _model_slug(mid)
+                    if not mid_slug:
+                        continue
+                    exact = mid.lower() == wanted.lower()
+                    compact_match = wanted_slug in mid_slug or mid_slug in wanted_slug
+                    token_match = bool(wanted_tokens) and all(tok in mid_slug for tok in wanted_tokens)
+                    if exact or compact_match or token_match:
+                        score = 3 if exact else (2 if compact_match else 1)
+                        if not best or score > best[0]:
+                            best = (score, ep.id, mid)
+            if best:
+                return {"endpoint_id": best[1], "model": best[2]}
+            return None
+
+        def _mask(k, v):
+            return "••••• (set in panel)" if _is_secret(k) and v else v
+
+        if action == "list":
+            s = load_settings()
+            shown = {k: _mask(k, v) for k, v in s.items() if k in DEFAULT_SETTINGS and not isinstance(v, dict)}
+            return {"response": f"{len(shown)} settings (use get/set with a key)", "settings": shown, "exit_code": 0}
+
+        elif action == "get":
+            key = _resolve(args.get("key", ""))
+            if not key:
+                return {"error": "key is required", "exit_code": 1}
+            if key not in DEFAULT_SETTINGS:
+                return {"error": f"Unknown setting '{args.get('key')}'. Use action='list' to see them.", "exit_code": 1}
+            val = load_settings().get(key, DEFAULT_SETTINGS.get(key))
+            return {"response": f"{key} = {_mask(key, val)}", "value": _mask(key, val), "exit_code": 0}
+
+        elif action == "set":
+            raw = args.get("key", "")
+            value = args.get("value")
+            if not raw:
+                return {"error": "key is required", "exit_code": 1}
+            key = _resolve(raw)
+            if key not in DEFAULT_SETTINGS:
+                return {"error": f"Unknown setting '{raw}'. Use action='list' to see available settings.", "exit_code": 1}
+            if _is_secret(key):
+                return {"response": f"'{key}' is a credential/secret. For security I can't set it from chat. Open Settings and set it there.", "exit_code": 0}
+            # Structured settings (dicts/lists like keybinds, default_model_fallbacks)
+            # have no safe scalar coercion; _coerce would pass a bare string
+            # straight through and clobber the structure. Refuse them here; they're
+            # edited in their dedicated panels. (reset/delete still restore the
+            # default structure, which is safe.)
+            if isinstance(DEFAULT_SETTINGS[key], (dict, list)):
+                return {"response": f"'{key}' is a structured setting. Edit it in its panel, not from chat. (You can reset it to default here.)", "exit_code": 0}
+            try:
+                value = _coerce(value, DEFAULT_SETTINGS[key])
+            except (ValueError, TypeError):
+                return {"error": f"'{value}' isn't a valid value for {key} (expected {type(DEFAULT_SETTINGS[key]).__name__}).", "exit_code": 1}
+            if key in _ENUMS and str(value).lower() not in _ENUMS[key]:
+                return {"error": f"{key} must be one of: {', '.join(_ENUMS[key])}.", "exit_code": 1}
+            s = load_settings()
+            s[key] = value
+            if key in {"default_model", "research_model", "utility_model", "task_model", "vision_model", "image_model"}:
+                resolved = _endpoint_model_from_cache(str(value))
+                if resolved:
+                    prefix = key[:-6]
+                    s[f"{prefix}_endpoint_id"] = resolved["endpoint_id"]
+                    s[key] = resolved["model"]
+                    value = resolved["model"]
+            save_settings(s)
+            if key.endswith("_model") and s.get(f"{key[:-6]}_endpoint_id"):
+                return {"response": f"Set {key} = {value} (endpoint {s.get(f'{key[:-6]}_endpoint_id')}).", "exit_code": 0}
+            return {"response": f"Set {key} = {value}.", "exit_code": 0}
+
+        elif action == "delete" or action == "reset":
+            key = _resolve(args.get("key", ""))
+            if key not in DEFAULT_SETTINGS:
+                return {"error": f"Unknown setting '{args.get('key')}'.", "exit_code": 1}
+            if _is_secret(key):
+                return {"response": f"'{key}' is a credential. Reset it in the panel.", "exit_code": 0}
+            s = load_settings()
+            s[key] = DEFAULT_SETTINGS[key]
+            save_settings(s)
+            return {"response": f"Reset {key} to default ({DEFAULT_SETTINGS[key]}).", "exit_code": 0}
+
+        elif action in ("disable_tool", "enable_tool", "list_tools"):
+            # Tool-toggle actions. These edit settings.json:disabled_tools
+            # (the global list read on every chat request) rather than
+            # prefs.json. Friendly aliases accepted: "shell" -> "bash",
+            # "search" -> "web_search", "browser" -> "builtin_browser",
+            # "documents" -> the document tool set, "memory" ->
+            # manage_memory, etc.
+            from src.settings import get_setting, save_settings, load_settings
+            _ALIASES = {
+                "shell": ["bash"],
+                "terminal": ["bash"],
+                "search": ["web_search", "web_fetch"],
+                "web": ["web_search", "web_fetch"],
+                "browser": ["builtin_browser"],
+                "documents": ["create_document", "edit_document", "update_document", "suggest_document"],
+                "doc": ["create_document", "edit_document", "update_document", "suggest_document"],
+                "memory": ["manage_memory"],
+                "skills": ["manage_skills"],
+                "images": ["generate_image"],
+                "image": ["generate_image"],
+                "tasks": ["manage_tasks"],
+                "notes": ["manage_notes"],
+                "calendar": ["manage_calendar"],
+                "email": ["mcp__email__list_emails", "mcp__email__read_email", "mcp__email__send_email"],
+                "research": ["web_search", "web_fetch"],  # research is a per-request flag, not a tool (closest analog)
+            }
+
+            if action == "list_tools":
+                current = get_setting("disabled_tools", []) or []
+                return {
+                    "response": (
+                        f"Currently disabled: {', '.join(current) if current else '(none)'}.\n"
+                        "Common toggles: shell (bash), search (web_search), browser, documents, "
+                        "memory, skills, images, tasks, notes, calendar, email."
+                    ),
+                    "disabled": list(current),
+                    "exit_code": 0,
+                }
+
+            tool_name = (args.get("tool") or args.get("name") or "").strip().lower()
+            if not tool_name:
+                return {"error": "tool name required (e.g. 'shell', 'search', 'bash')", "exit_code": 1}
+            targets = _ALIASES.get(tool_name, [tool_name])
+
+            settings = load_settings()
+            current = list(settings.get("disabled_tools") or [])
+            before = set(current)
+            if action == "disable_tool":
+                for t in targets:
+                    if t not in current:
+                        current.append(t)
+            else:  # enable_tool
+                current = [t for t in current if t not in targets]
+            after = set(current)
+            settings["disabled_tools"] = current
+            save_settings(settings)
+
+            verb = "Disabled" if action == "disable_tool" else "Enabled"
+            changed = sorted(after.symmetric_difference(before))
+            return {
+                "response": (
+                    f"{verb} {tool_name} ({', '.join(targets)}). "
+                    f"Now disabled: {', '.join(current) if current else '(none)'}."
+                ),
+                "changed": changed,
+                "disabled": list(current),
+                "exit_code": 0,
+            }
+
+        else:
+            return {"error": f"Unknown action: {action}", "exit_code": 1}
+    except Exception as e:
+        logger.error(f"manage_settings error: {e}")
+        return {"error": str(e), "exit_code": 1}
+    finally:
+        db.close()
+
+
+# ---------------------------------------------------------------------------
+# API call tool
+# ---------------------------------------------------------------------------
+
+
+
+# ── registry adapters ────────────────────────────────────────────────────────
+def _owner_adapter(fn):
+    """Wrap a do_*(content, owner) impl as a registry execute(content, ctx)."""
+    async def _execute(content: str, ctx: dict) -> dict:
+        return await fn(content, ctx.get("owner"))
+    return _execute
+
+
+ADMIN_TOOL_HANDLERS = {
+    "manage_endpoints": _owner_adapter(do_manage_endpoints),
+    "manage_mcp": _owner_adapter(do_manage_mcp),
+    "manage_webhooks": _owner_adapter(do_manage_webhooks),
+    "manage_tokens": _owner_adapter(do_manage_tokens),
+    "manage_settings": _owner_adapter(do_manage_settings),
+}
@@ -0,0 +1,98 @@
+"""Agent tool to inspect and control detached background `bash` jobs.
+
+`bash` blocks prefixed with a `#!bg` marker run detached via `src.bg_jobs`; the
+agent is auto-re-invoked with the output when they finish. This tool covers the
+gaps in that flow: list the jobs in the current chat, read a still-running job's
+output on demand, and kill a runaway job instead of waiting out its max-runtime.
+
+Registry tool (`TOOL_HANDLERS["manage_bg_jobs"]`). Jobs are scoped to the chat
+that launched them, so every action requires the caller's `session_id` and a job
+from another session is treated as not found.
+"""
+
+import json
+import time
+from typing import Any, Dict, List
+
+_LIST_ACTIONS = {"list", "ls", "jobs"}
+_OUTPUT_ACTIONS = {"output", "get", "read", "tail", "status", "show"}
+_KILL_ACTIONS = {"kill", "stop", "cancel", "terminate"}
+
+
+def _age(rec: Dict[str, Any]) -> str:
+    start = rec.get("started_at")
+    if not start:
+        return "?"
+    secs = int(time.time() - start)
+    if secs < 60:
+        return f"{secs}s"
+    if secs < 3600:
+        return f"{secs // 60}m"
+    return f"{secs // 3600}h{(secs % 3600) // 60}m"
+
+
+def _status_label(rec: Dict[str, Any]) -> str:
+    status = rec.get("status", "?")
+    if rec.get("killed"):
+        return "killed"
+    if rec.get("timed_out"):
+        return "timed out"
+    if rec.get("died"):
+        return "died"
+    if status in ("done", "failed"):
+        return f"{status} (exit {rec.get('exit_code')})"
+    return status
+
+
+def _row(rec: Dict[str, Any]) -> str:
+    cmd = (rec.get("command") or "").strip().splitlines()[0][:80]
+    return f"[{rec.get('id')}] {_status_label(rec)} | {_age(rec)} | {cmd}"
+
+
+class ManageBgJobsTool:
+    async def execute(self, content: str, ctx: dict) -> dict:
+        from src import bg_jobs
+
+        session_id = ctx.get("session_id")
+        raw = (content or "").strip()
+        try:
+            args = json.loads(raw) if raw else {}
+        except (ValueError, TypeError):
+            args = {}
+        if not isinstance(args, dict):
+            args = {}
+        action = str(args.get("action", "list")).strip().lower()
+        job_id = str(args.get("job_id") or args.get("id") or "").strip()
+
+        if not session_id:
+            return {"error": "manage_bg_jobs: no active chat session; background jobs are scoped to a chat.", "exit_code": 1}
+
+        if action in _LIST_ACTIONS:
+            jobs: List[Dict[str, Any]] = bg_jobs.list_for_session(session_id)
+            if not jobs:
+                return {"output": "No background jobs in this chat.", "exit_code": 0}
+            jobs.sort(key=lambda r: r.get("started_at") or 0, reverse=True)
+            lines = "\n".join(_row(r) for r in jobs)
+            return {"output": f"{len(jobs)} background job(s):\n{lines}", "exit_code": 0}
+
+        if action in _OUTPUT_ACTIONS or action in _KILL_ACTIONS:
+            if not job_id:
+                return {"error": f"manage_bg_jobs: action '{action}' requires a job_id (see action='list').", "exit_code": 1}
+            rec = bg_jobs.get(job_id)
+            # Scope: only the chat that launched a job may see or control it.
+            if rec is None or rec.get("session_id") != session_id:
+                return {"error": f"manage_bg_jobs: no background job '{job_id}' in this chat.", "exit_code": 1}
+
+            if action in _KILL_ACTIONS:
+                if rec.get("status") != "running":
+                    return {"output": f"Job `{job_id}` already {_status_label(rec)}; nothing to kill.", "exit_code": 0}
+                killed = bg_jobs.kill(job_id)
+                return {"output": f"Killed background job `{job_id}` ({(killed or {}).get('command', '').splitlines()[0][:80]}).", "exit_code": 0}
+
+            out = rec.get("output") or "(no output yet)"
+            return {
+                "output": f"Job `{job_id}` [{_status_label(rec)}, {_age(rec)}]\nCommand: {rec.get('command')}\n\nOutput:\n{out}",
+                "exit_code": 0,
+            }
+
+        return {"error": f"manage_bg_jobs: unknown action '{action}'. Use list, output, or kill.", "exit_code": 1}
@@ -1,8 +1,8 @@
 from typing import Any, Dict, List, Optional
 import logging
 import re
-import json
 from src.constants import MAX_READ_CHARS
+from src.tool_utils import _parse_tool_args

 logger = logging.getLogger(__name__)

@@ -154,38 +154,6 @@ def _coerce_email_document_content(existing: str, incoming: str) -> str:
        body = new
    return header.rstrip() + "\n---\n" + body

-def _parse_tool_args(content):
-    """Parse a tool-call argument blob.
-
-    Accepts either a JSON string or an already-decoded dict. Unwraps the
-    common `{"body": {...}}` envelope that smaller models emit when they
-    read tool descriptions like "Body is JSON: {...}" literally — they
-    pass `body` as a field name rather than treating it as a noun.
-
-    Returns a dict on success, raises ValueError on bad JSON.
-    """
-    if isinstance(content, str):
-        try:
-            args = json.loads(content) if content.strip() else {}
-        except (json.JSONDecodeError, TypeError) as e:
-            raise ValueError(str(e))
-    elif isinstance(content, dict):
-        args = content
-    else:
-        args = {}
-    # Unwrap {"body": {...}} envelope — but only if `body` is the sole key
-    # and points at a dict. We don't want to clobber a legitimate `body`
-    # field on tools where it's a real arg (e.g. send_email body text).
-    if (
-        isinstance(args, dict)
-        and len(args) == 1
-        and "body" in args
-        and isinstance(args["body"], dict)
-        and "action" in args["body"]  # extra safety: only unwrap if the inner dict looks like a tool call
-    ):
-        args = args["body"]
-    return args
-
 def parse_edit_blocks(content: str) -> list:
    """Parse <<<FIND>>>...<<<REPLACE>>>...<<<END>>> blocks."""
    edits = []
@@ -1,6 +1,7 @@
 import asyncio
 import json
 import os
+import re
 import difflib
 import fnmatch
 import shutil
@@ -16,6 +17,31 @@ _CODENAV_SKIP_DIRS = frozenset({
 _CODENAV_MAX_HITS = 200
 _CODENAV_MAX_LINE = 400

+
+def _glob_to_regex(pat: str) -> "re.Pattern":
+    """Translate a forward-slash glob (**, *, ?) into a compiled regex.
+    `**/` matches zero or more complete directories.
+    `*` matches within a single path segment (does not cross /).
+    """
+    i, n, out = 0, len(pat), []
+    while i < n:
+        if pat[i : i + 3] == "**/":
+            out.append("(?:[^/]+/)*")
+            i += 3
+        elif pat[i : i + 2] == "**":
+            out.append(".*")
+            i += 2
+        elif pat[i] == "*":
+            out.append("[^/]*")
+            i += 1
+        elif pat[i] == "?":
+            out.append("[^/]")
+            i += 1
+        else:
+            out.append(re.escape(pat[i]))
+            i += 1
+    return re.compile("".join(out))
+
 def _unified_diff(old: str, new: str, path: str) -> Optional[Dict[str, Any]]:
    if old == new:
        return None
@@ -259,23 +285,38 @@ class GlobTool:
            return {"error": f"glob: {e}", "exit_code": 1}

        def _glob():
-            from pathlib import Path
-            base = Path(root)
-            if not base.is_dir():
+            base = os.path.abspath(root)
+            if not os.path.isdir(base):
                return None, f"glob: {root}: not a directory"
+            norm_pat = pattern.replace("\\", "/")
+            # Fast path: literal pattern (no wildcards) → direct path lookup.
+            if not any(c in norm_pat for c in "*?["):
+                cand = os.path.normpath(os.path.join(base, norm_pat))
+                if os.path.exists(cand):
+                    return [cand], None
+                # Literal not at exact path — fall through to walk so
+                # e.g. "foo.py" still matches at any depth (like rglob).
+            # Compile glob to regex: * stays within one segment, **/ spans dirs.
+            regex = _glob_to_regex(norm_pat)
            matched = []
+            cap = _CODENAV_MAX_HITS * 5
            try:
-                for p in base.rglob(pattern):
-                    if set(p.relative_to(base).parts) & _CODENAV_SKIP_DIRS:
-                        continue
-                    try:
-                        mtime = p.stat().st_mtime
-                    except OSError:
-                        mtime = 0
-                    matched.append((mtime, str(p)))
-                    if len(matched) > _CODENAV_MAX_HITS * 5:
+                for dp, dns, fns in os.walk(base):
+                    # Prune skipped dirs before descending (unlike rglob which
+                    # descends first then filters — fatal on large node_modules).
+                    dns[:] = [d for d in dns if d not in _CODENAV_SKIP_DIRS]
+                    for name in fns + dns:
+                        full = os.path.join(dp, name)
+                        rel = os.path.relpath(full, base).replace(os.sep, "/")
+                        if regex.fullmatch(rel) or regex.fullmatch(name):
+                            try:
+                                mtime = os.stat(full).st_mtime
+                            except OSError:
+                                mtime = 0
+                            matched.append((mtime, full))
+                    if len(matched) > cap:
                        break
-            except (OSError, ValueError) as _e:
+            except OSError as _e:
                return None, f"glob: {_e}"
            matched.sort(key=lambda t: t[0], reverse=True)
            return [pth for _, pth in matched[:_CODENAV_MAX_HITS]], None
@@ -0,0 +1,208 @@
+"""model_interaction_tools.py - agent tools for talking to other models.
+
+Owns the model-interaction tool implementations (chat_with_model, ask_teacher,
+list_models) and their handler classes, registered in ``TOOL_HANDLERS``. Part
+of the tool -> registry migration (#3629): the implementations were moved here
+out of ``src.ai_interaction`` so dispatch flows through the registry instead of
+the elif chain / dispatch_ai_tool in tool_execution.py.
+
+Shared helpers that still live in ``src.ai_interaction`` and are used by tools
+not yet migrated (``_resolve_model``, ``AI_CHAT_TIMEOUT``) are imported lazily
+inside the functions to avoid an import cycle at module load.
+"""
+import logging
+from typing import Dict, Optional
+
+logger = logging.getLogger(__name__)
+
+
+_TEACHER_SYSTEM_PROMPT = (
+    "You are a senior AI mentor. A less capable model is stuck on a problem and asking for help. "
+    "Provide clear, actionable guidance:\n"
+    "1. Brief analysis of the problem\n"
+    "2. Recommended approach (step by step)\n"
+    "3. Key things to watch out for\n\n"
+    "Be concise and practical. No preamble."
+)
+
+
+async def chat_with_model(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
+    """Send a message to a specific model and return its response.
+
+    Content format:
+      Line 1: model_name (or model_name@endpoint_name)
+      Line 2+: the message to send
+    """
+    from src.ai_interaction import _resolve_model, AI_CHAT_TIMEOUT
+    from src.llm_core import llm_call_async
+
+    lines = content.strip().split("\n", 1)
+    if not lines or not lines[0].strip():
+        return {"error": "First line must be the model name"}
+
+    model_spec = lines[0].strip()
+    message = lines[1].strip() if len(lines) > 1 else ""
+    if not message:
+        return {"error": "No message provided (line 2+ is the message)"}
+
+    try:
+        url, model, headers = _resolve_model(model_spec, owner=owner)
+    except ValueError as e:
+        return {"error": str(e)}
+
+    try:
+        response = await llm_call_async(
+            url, model,
+            [{"role": "user", "content": message}],
+            headers=headers,
+            timeout=AI_CHAT_TIMEOUT,
+        )
+        # Truncate very long responses
+        if len(response) > 10000:
+            response = response[:10000] + "\n... (truncated)"
+        return {"model": model, "response": response}
+    except Exception as e:
+        logger.error(f"chat_with_model failed: {e}")
+        return {"error": f"Failed to get response from {model_spec}: {e}"}
+
+
+async def ask_teacher(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
+    """Ask a more capable model for help.
+
+    Content format:
+      Line 1: model_name (or 'auto')
+      Line 2+: the problem description
+    """
+    from src.ai_interaction import _resolve_model, AI_CHAT_TIMEOUT
+    from src.llm_core import llm_call_async
+    from src.settings import get_setting
+
+    lines = content.strip().split("\n", 1)
+    model_spec = lines[0].strip() if lines else "auto"
+    problem = lines[1].strip() if len(lines) > 1 else ""
+
+    if not problem:
+        return {"error": "No problem description provided"}
+
+    if model_spec.lower() in ("auto", ""):
+        model_spec = get_setting("teacher_model", "")
+        if not model_spec:
+            return {"error": "No teacher model configured. Specify a model name or set teacher_model in settings."}
+
+    try:
+        url, model, headers = _resolve_model(model_spec, owner=owner)
+    except ValueError as e:
+        return {"error": str(e)}
+
+    try:
+        response = await llm_call_async(
+            url, model,
+            [
+                {"role": "system", "content": _TEACHER_SYSTEM_PROMPT},
+                {"role": "user", "content": f"Problem:\n{problem}"},
+            ],
+            headers=headers,
+            timeout=AI_CHAT_TIMEOUT,
+        )
+        if len(response) > 8000:
+            response = response[:8000] + "\n... (truncated)"
+        return {"model": model, "response": response, "teacher": True}
+    except Exception as e:
+        logger.error(f"ask_teacher failed: {e}")
+        return {"error": f"Teacher call failed ({model_spec}): {e}"}
+
+
+async def list_models(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
+    """List all available models across configured endpoints.
+
+    Content = optional filter keyword.
+    """
+    import json
+    import httpx
+    from src.database import SessionLocal, ModelEndpoint
+    from src.llm_core import _detect_provider, ANTHROPIC_MODELS
+    from src.auth_helpers import owner_filter
+    from src.endpoint_resolver import resolve_endpoint_runtime, build_headers, build_models_url
+
+    keyword = content.strip().lower() if content.strip() else None
+
+    db = SessionLocal()
+    try:
+        query = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True)
+        if owner:
+            query = owner_filter(query, ModelEndpoint, owner)
+        endpoints = query.all()
+        if not endpoints:
+            return {"results": "No enabled model endpoints configured."}
+
+        result_lines = []
+        total_models = 0
+
+        for ep in endpoints:
+            try:
+                base, api_key = resolve_endpoint_runtime(ep, owner=owner)
+            except Exception:
+                continue
+            provider = _detect_provider(base)
+            headers = build_headers(api_key, base)
+
+            model_ids = []
+            if provider == "anthropic":
+                model_ids = list(ANTHROPIC_MODELS)
+            else:
+                try:
+                    models_url = build_models_url(base)
+                    if models_url:
+                        r = httpx.get(models_url, headers=headers, timeout=5)
+                        r.raise_for_status()
+                        data = r.json()
+                        model_ids = [m.get("id") for m in (data.get("data") or []) if m.get("id")]
+                        if not model_ids:
+                            model_ids = [
+                                m.get("name") or m.get("model")
+                                for m in (data.get("models") or [])
+                                if m.get("name") or m.get("model")
+                            ]
+                    else:
+                        model_ids = json.loads(ep.cached_models or "[]")
+                except Exception:
+                    model_ids = ["(endpoint offline)"]
+
+            if keyword:
+                model_ids = [m for m in model_ids if keyword in m.lower() or keyword in (ep.name or "").lower()]
+
+            if model_ids:
+                result_lines.append(f"\n**{ep.name or base}** ({provider}):")
+                for mid in model_ids:
+                    result_lines.append(f"  - `{mid}`")
+                    total_models += 1
+
+        if not result_lines:
+            return {"results": "No models found" + (f" matching '{keyword}'" if keyword else "") + "."}
+
+        header = f"Available models ({total_models} total):"
+        return {"results": header + "\n".join(result_lines)}
+    except Exception as e:
+        logger.error(f"list_models failed: {e}")
+        return {"error": str(e)}
+    finally:
+        db.close()
+
+
+# ---------------------------------------------------------------------------
+# Handler classes registered in TOOL_HANDLERS
+# ---------------------------------------------------------------------------
+
+class ChatWithModelTool:
+    async def execute(self, content: str, ctx: dict) -> Dict:
+        return await chat_with_model(content, ctx.get("session_id"), owner=ctx.get("owner"))
+
+
+class AskTeacherTool:
+    async def execute(self, content: str, ctx: dict) -> Dict:
+        return await ask_teacher(content, ctx.get("session_id"), owner=ctx.get("owner"))
+
+
+class ListModelsTool:
+    async def execute(self, content: str, ctx: dict) -> Dict:
+        return await list_models(content, ctx.get("session_id"), owner=ctx.get("owner"))
@@ -0,0 +1,464 @@
+"""session_tools.py - agent tools for AI-to-AI session management.
+
+Owns create_session, list_sessions, send_to_session and manage_session, moved
+out of src.ai_interaction as part of the tool -> registry migration (#3629), and
+their handler classes registered in TOOL_HANDLERS.
+
+The session manager is a runtime-set singleton in src.ai_interaction, so each
+function fetches it via get_session_manager() (imported here); _resolve_model and
+AI_CHAT_TIMEOUT are reused from there too.
+"""
+import json
+import logging
+import uuid
+from typing import Dict, Optional
+
+from src.ai_interaction import get_session_manager, _resolve_model, AI_CHAT_TIMEOUT
+
+logger = logging.getLogger(__name__)
+
+
+async def create_session(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
+    """Create a new chat session.
+
+    Content format:
+      Line 1: session name
+      Line 2: model_name (or model_name@endpoint_name)
+    """
+    _session_manager = get_session_manager()
+    if not _session_manager:
+        return {"error": "Session manager not available"}
+
+    lines = content.strip().split("\n")
+    if len(lines) < 2:
+        return {"error": "Need 2 lines: session name, then model spec"}
+
+    name = lines[0].strip()
+    model_spec = lines[1].strip()
+
+    if not name:
+        return {"error": "Session name cannot be empty"}
+
+    try:
+        url, model, headers = _resolve_model(model_spec, owner=owner)
+    except ValueError as e:
+        return {"error": str(e)}
+
+    sid = str(uuid.uuid4())[:8]
+    try:
+        _session_manager.create_session(
+            session_id=sid,
+            name=name,
+            endpoint_url=url,
+            model=model,
+            rag=False,
+            owner=owner,
+        )
+        # Store headers on session for future calls
+        sess = _session_manager.get_session(sid)
+        if sess and headers:
+            sess.headers = headers
+        try:
+            from src.event_bus import fire_event
+            fire_event("session_created", owner)
+        except Exception:
+            logger.debug("session_created event dispatch failed", exc_info=True)
+
+        return {"session_id": sid, "name": name, "model": model, "endpoint_url": url}
+    except Exception as e:
+        logger.error(f"create_session failed: {e}")
+        return {"error": f"Failed to create session: {e}"}
+
+async def list_sessions(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
+    """List sessions sorted by most-recently-active first.
+
+    Output includes a relative "last active" timestamp per row so the
+    agent can answer "open my last chat" without guessing from titles.
+    The most-recent session is always first in the list.
+
+    Content = optional filter keyword (matches session name).
+    """
+    _session_manager = get_session_manager()
+    if not _session_manager:
+        return {"error": "Session manager not available"}
+
+    keyword = content.strip().lower() if content.strip() else None
+
+    try:
+        from core.database import SessionLocal, Session as DbSession
+        from datetime import datetime, timezone
+
+        # Pull every session's last_accessed from the DB so we can sort
+        # by recency. In-memory sessions hold name + model + msg_count;
+        # the DB row holds the timestamps.
+        db = SessionLocal()
+        try:
+            db_rows = {r.id: r for r in db.query(DbSession).all()}
+        finally:
+            db.close()
+
+        # SECURITY: scope to the caller's sessions. Passing None returned
+        # every user's sessions, which the agent tool then exposed via the
+        # "list my chats" reply.
+        sessions = _session_manager.get_sessions_for_user(owner)
+        rows = []
+        for sid, sess in sessions.items():
+            if keyword and keyword not in (sess.name or "").lower():
+                continue
+            db_row = db_rows.get(sid)
+            # Prefer last_accessed; fall back to updated_at, then created_at.
+            ts = None
+            if db_row:
+                ts = getattr(db_row, 'last_accessed', None) or getattr(db_row, 'updated_at', None) or getattr(db_row, 'created_at', None)
+            rows.append((ts, sid, sess))
+
+        # Sort by timestamp DESC; rows without a timestamp sink to the bottom.
+        rows.sort(key=lambda r: r[0] or datetime.min, reverse=True)
+
+        def _rel(ts):
+            if not ts:
+                return 'never'
+            now = datetime.utcnow()
+            try:
+                if ts.tzinfo is not None:
+                    now = datetime.now(timezone.utc)
+                diff = (now - ts).total_seconds()
+            except Exception:
+                return 'unknown'
+            if diff < 60: return 'just now'
+            if diff < 3600: return f'{int(diff / 60)}m ago'
+            if diff < 86400: return f'{int(diff / 3600)}h ago'
+            if diff < 86400 * 7: return f'{int(diff / 86400)}d ago'
+            return ts.strftime('%Y-%m-%d')
+
+        lines = []
+        for i, (ts, sid, sess) in enumerate(rows):
+            if i >= 50:
+                lines.append(f"... and {len(rows) - 50} more (showing first 50)")
+                break
+            safe_name = (sess.name or "Untitled").replace("[", "\\[").replace("]", "\\]")
+            msg_count = getattr(sess, "message_count", 0) or 0
+            model = getattr(sess, "model", "unknown")
+            marker = " ← most recent" if i == 0 else ""
+            lines.append(f"- **[{safe_name}](#session-{sid})** (id: `{sid}`, model: {model}, {msg_count} msgs, last active {_rel(ts)}){marker}")
+
+        if not lines:
+            return {"results": "No sessions found" + (f" matching '{keyword}'" if keyword else "") + "."}
+
+        return {
+            "results": (
+                f"Found {len(rows)} session(s), sorted most-recent first:\n"
+                + "\n".join(lines)
+                + "\n\nAssistant: when replying to the user, preserve the chat-title markdown links exactly as shown, e.g. `[Chat](#session-id)`. Do not rewrite this as a plain, non-clickable table."
+            )
+        }
+    except Exception as e:
+        logger.error(f"list_sessions failed: {e}")
+        return {"error": str(e)}
+
+async def send_to_session(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
+    """Send a message to an existing session and get a response.
+
+    Content format:
+      Line 1: session_id
+      Line 2+: message
+    """
+    _session_manager = get_session_manager()
+    from src.llm_core import llm_call_async
+    from core.models import ChatMessage
+
+    if not _session_manager:
+        return {"error": "Session manager not available"}
+
+    lines = content.strip().split("\n", 1)
+    if len(lines) < 2:
+        return {"error": "Need 2 lines: session_id, then message"}
+
+    target_sid = lines[0].strip()
+    message = lines[1].strip()
+
+    sess = _session_manager.get_session(target_sid)
+    if not sess:
+        return {"error": f"Session '{target_sid}' not found"}
+
+    # Owner-scope: reject access to another user's session
+    if owner and getattr(sess, "owner", None) and sess.owner != owner:
+        return {"error": f"Session '{target_sid}' not found"}
+
+    if not message:
+        return {"error": "No message provided"}
+
+    try:
+        # Build context from session history
+        context = sess.get_context_messages()
+        context.append({"role": "user", "content": message})
+
+        response = await llm_call_async(
+            sess.endpoint_url, sess.model, context,
+            headers=sess.headers,
+            timeout=AI_CHAT_TIMEOUT,
+        )
+
+        # Save both messages to session
+        sess.add_message(ChatMessage("user", message))
+        sess.add_message(ChatMessage("assistant", response))
+
+        # Truncate for tool output
+        if len(response) > 10000:
+            response = response[:10000] + "\n... (truncated)"
+
+        return {
+            "session_id": target_sid,
+            "session_name": sess.name,
+            "response": response,
+        }
+    except Exception as e:
+        logger.error(f"send_to_session failed: {e}")
+        return {"error": f"Failed to send to session: {e}"}
+
+async def manage_session(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
+    """Manage sessions: rename, archive, delete, important, truncate, fork.
+
+    Content format:
+      Line 1: action (rename|archive|unarchive|delete|important|unimportant|truncate|fork)
+      Line 2: target session_id (or "current" to use the active session)
+      Line 3+: action-specific params (e.g. new name for rename, keep_count for truncate)
+    """
+    _session_manager = get_session_manager()
+    if not _session_manager:
+        return {"error": "Session manager not available"}
+
+    from src.database import SessionLocal, Session as DbSession
+
+    # Accept BOTH the structured JSON args the tool schema advertises
+    # ({action, session_id, value}) AND the legacy line-based format
+    # (line1=action, line2=session_id, line3=value). Native function-calling
+    # models send JSON; fenced-block callers send lines. Previously only the
+    # line format was parsed, so a model that followed the schema (JSON) got
+    # "Need at least 2 lines" / "Rename needs line 3" and couldn't drive it.
+    _raw = (content or "").strip()
+    action = ""
+    target_sid = ""
+    value = None      # the action param: new name (rename) / keep_count (truncate, fork)
+    _list_filter = ""
+    _parsed = None
+    if _raw.startswith("{"):
+        try:
+            _parsed = json.loads(_raw)
+        except Exception:
+            _parsed = None
+    if isinstance(_parsed, dict):
+        action = str(_parsed.get("action") or "").strip().lower()
+        target_sid = str(_parsed.get("session_id") or _parsed.get("session") or _parsed.get("id") or "").strip()
+        _v = _parsed.get("value")
+        if _v is None:
+            _v = (_parsed.get("name") or _parsed.get("new_name")
+                  or _parsed.get("title") or _parsed.get("keep_count"))
+        value = None if _v is None else str(_v).strip()
+        _list_filter = str(_parsed.get("filter") or "").strip()
+    else:
+        lines = _raw.split("\n")
+        if not lines or not lines[0].strip():
+            return {"error": "Missing action (rename|archive|delete|important|truncate|fork|list|switch)"}
+        action = lines[0].strip().lower()
+        target_sid = lines[1].strip() if len(lines) >= 2 else ""
+        value = lines[2].strip() if len(lines) >= 3 else None
+        _list_filter = "\n".join(lines[1:]).strip()
+
+    if not action:
+        return {"error": "Missing action (rename|archive|delete|important|truncate|fork|list|switch)"}
+
+    # `list` alias - dispatch to list_sessions so the agent's natural
+    # first guess (every other manage_* tool has a `list` action) works.
+    if action == "list":
+        return await list_sessions(_list_filter, session_id, owner=owner)
+
+    if not target_sid:
+        return {"error": "Need a session_id (or 'current' for the active chat)"}
+
+    # Allow "current" to refer to the active session
+    if target_sid.lower() == "current" and session_id:
+        target_sid = session_id
+
+    # `switch` / `open` / `select` / `view` - the agent reaches for
+    # these when the user asks to "open" or "switch to" a session.
+    # There's no server-side way to make the browser navigate, so we
+    # just return a clickable anchor link the user can click. The
+    # frontend's chat-history click delegate routes `#session-<id>`
+    # to selectSession(). The agent's reply naturally embeds this
+    # result so the user sees a single clickable line.
+    def _session_query(db):
+        query = db.query(DbSession).filter(DbSession.id == target_sid)
+        if owner is not None:
+            query = query.filter(DbSession.owner == owner)
+        return query
+
+    if action in ("switch", "open", "select", "view"):
+        db = SessionLocal()
+        try:
+            db_sess = _session_query(db).first()
+            if not db_sess:
+                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
+            name = db_sess.name or target_sid
+        finally:
+            db.close()
+        return {
+            "action": action,
+            "session_id": target_sid,
+            "name": name,
+            "results": f"[{name}](#session-{target_sid}) - click to open.",
+        }
+
+    db = SessionLocal()
+    try:
+        if action == "rename":
+            if not value:
+                return {"error": "rename needs a new name (the `value` arg, or line 3 in the legacy format)"}
+            new_name = value
+            db_sess = _session_query(db).first()
+            if not db_sess:
+                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
+            db_sess.name = new_name
+            db.commit()
+            _session_manager.update_session_name(target_sid, new_name)
+            return {"action": "rename", "session_id": target_sid, "name": new_name,
+                    "results": f"Session renamed to '{new_name}'"}
+
+        elif action == "archive":
+            db_sess = _session_query(db).first()
+            if not db_sess:
+                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
+            db_sess.archived = True
+            db.commit()
+            return {"action": "archive", "session_id": target_sid,
+                    "results": f"Session '{db_sess.name}' archived"}
+
+        elif action == "unarchive":
+            db_sess = _session_query(db).first()
+            if not db_sess:
+                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
+            db_sess.archived = False
+            db.commit()
+            return {"action": "unarchive", "session_id": target_sid,
+                    "results": f"Session '{db_sess.name}' unarchived"}
+
+        elif action == "delete":
+            if target_sid == session_id:
+                return {"error": "Cannot delete the current session while chatting in it. Delete other sessions first."}
+            db_sess = _session_query(db).first()
+            if not db_sess:
+                return {"error": f"Session '{target_sid}' not found. Refusing to delete an unknown chat id; use the exact id from list_sessions."}
+            if db_sess and db_sess.is_important:
+                return {"error": f"Session '{db_sess.name}' is starred/favorited. Unstar it first before deleting."}
+            try:
+                ok = _session_manager.delete_session(target_sid)
+                if not ok:
+                    return {"error": f"Session '{target_sid}' was not deleted because it no longer exists."}
+                return {"action": "delete", "session_id": target_sid,
+                        "results": f"Session '{db_sess.name or target_sid}' deleted"}
+            except Exception as e:
+                return {"error": f"Failed to delete session: {e}"}
+
+        elif action in ("important", "unimportant"):
+            is_important = action == "important"
+            db_sess = _session_query(db).first()
+            if not db_sess:
+                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
+            # Prevent AI from unstarring sessions - only the user can do that manually
+            if not is_important and db_sess.is_important:
+                return {"error": f"Session '{db_sess.name}' is starred by the user. Only the user can unstar sessions manually."}
+            db_sess.is_important = is_important
+            db.commit()
+            status = "marked as important" if is_important else "unmarked as important"
+            return {"action": action, "session_id": target_sid,
+                    "results": f"Session '{db_sess.name}' {status}"}
+
+        elif action == "truncate":
+            db_sess = _session_query(db).first()
+            if not db_sess:
+                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
+            keep_count = 10
+            if value:
+                try:
+                    keep_count = int(value)
+                except ValueError:
+                    pass
+            success = _session_manager.truncate_messages(target_sid, keep_count)
+            if success:
+                return {"action": "truncate", "session_id": target_sid,
+                        "results": f"Session truncated to last {keep_count} messages"}
+            return {"error": f"Failed to truncate session '{target_sid}'"}
+
+        elif action == "fork":
+            db_sess = _session_query(db).first()
+            if not db_sess:
+                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
+            keep_count = 0  # 0 = all messages
+            if value:
+                try:
+                    keep_count = int(value)
+                except ValueError:
+                    pass
+
+            source = _session_manager.get_session(target_sid)
+            if not source:
+                return {"error": f"Session '{target_sid}' not found"}
+
+            new_sid = str(uuid.uuid4())[:8]
+            _session_manager.create_session(
+                session_id=new_sid,
+                name=f"Fork: {source.name}",
+                endpoint_url=source.endpoint_url,
+                model=source.model,
+                rag=False,
+                owner=owner,
+            )
+            # Copy messages
+            history = source.get_context_messages()
+            if keep_count > 0:
+                history = history[:keep_count]
+            from core.models import ChatMessage as InMemoryMsg
+            new_sess = _session_manager.get_session(new_sid)
+            for msg in history:
+                new_sess.add_message(InMemoryMsg(msg["role"], msg["content"]))
+            try:
+                from src.event_bus import fire_event
+                fire_event("session_created", owner)
+            except Exception:
+                logger.debug("session_created event dispatch failed", exc_info=True)
+
+            return {"action": "fork", "session_id": new_sid,
+                    "source_session": target_sid, "messages_copied": len(history),
+                    "results": f"Forked session '{source.name}' -> new session {new_sid} ({len(history)} messages)"}
+
+        else:
+            return {"error": f"Unknown action '{action}'. Use: list, switch, rename, archive, unarchive, delete, important, unimportant, truncate, fork"}
+    except Exception as e:
+        logger.error(f"manage_session failed: {e}")
+        return {"error": str(e)}
+    finally:
+        db.close()
+
+
+# ---------------------------------------------------------------------------
+# Handler classes registered in TOOL_HANDLERS
+# ---------------------------------------------------------------------------
+
+class CreateSessionTool:
+    async def execute(self, content: str, ctx: dict) -> Dict:
+        return await create_session(content, ctx.get("session_id"), owner=ctx.get("owner"))
+
+
+class ListSessionsTool:
+    async def execute(self, content: str, ctx: dict) -> Dict:
+        return await list_sessions(content, ctx.get("session_id"), owner=ctx.get("owner"))
+
+
+class SendToSessionTool:
+    async def execute(self, content: str, ctx: dict) -> Dict:
+        return await send_to_session(content, ctx.get("session_id"), owner=ctx.get("owner"))
+
+
+class ManageSessionTool:
+    async def execute(self, content: str, ctx: dict) -> Dict:
+        return await manage_session(content, ctx.get("session_id"), owner=ctx.get("owner"))
@@ -7,6 +7,7 @@ from src.constants import MAX_OUTPUT_CHARS
 class WebSearchTool:
    async def execute(self, content: str, ctx: dict) -> dict:
        from src.search import comprehensive_web_search
+        progress_cb = ctx.get("progress_cb") if isinstance(ctx, dict) else None
        raw = content.strip()
        query = raw
        time_filter = None
@@ -37,18 +38,39 @@ class WebSearchTool:
            elif " news" in q_lc or q_lc.startswith("news ") or q_lc.endswith(" news"):
                time_filter = "week"
        loop = asyncio.get_running_loop()
-        text, sources = await asyncio.wait_for(
-            loop.run_in_executor(
-                None,
-                lambda: comprehensive_web_search(
-                    query,
-                    max_pages=max_pages,
-                    time_filter=time_filter,
-                    return_sources=True,
+        if progress_cb:
+            await progress_cb({
+                "elapsed_s": 0,
+                "tail": f"Searching web for: {query[:160]}",
+            })
+        try:
+            text, sources = await asyncio.wait_for(
+                loop.run_in_executor(
+                    None,
+                    lambda: comprehensive_web_search(
+                        query,
+                        max_pages=max_pages,
+                        time_filter=time_filter,
+                        return_sources=True,
+                    ),
                ),
-            ),
-            timeout=30,
-        )
+                timeout=30,
+            )
+        except asyncio.TimeoutError:
+            return {
+                "error": f"web_search timed out after 30s: {query[:200]}",
+                "exit_code": 1,
+            }
+        except Exception as e:
+            return {
+                "error": f"web_search failed: {type(e).__name__}: {str(e) or 'no details'}",
+                "exit_code": 1,
+            }
+        if progress_cb:
+            await progress_cb({
+                "elapsed_s": 30,
+                "tail": "Search completed; preparing sources.",
+            })
        output = text[:MAX_OUTPUT_CHARS] if len(text) > MAX_OUTPUT_CHARS else text
        if sources:
            output += "\n\n<!-- SOURCES:" + json.dumps(sources) + " -->"
@@ -57,13 +79,23 @@ class WebSearchTool:
 class WebFetchTool:
    async def execute(self, content: str, ctx: dict) -> dict:
        from src.search.content import fetch_webpage_content
+        from src.constants import WEB_FETCH_HARD_MAX_BYTES
        raw = content.strip()
        url = ""
+        max_bytes = None
        if raw.startswith("{"):
            try:
                parsed = json.loads(raw)
                if isinstance(parsed, dict):
                    url = str(parsed.get("url") or "").strip()
+                    # Download-budget override (#3812): "full": true raises the
+                    # budget to the hard cap; an explicit max_bytes is clamped
+                    # to the hard cap downstream. Default stays the soft cap.
+                    if parsed.get("full") is True:
+                        max_bytes = WEB_FETCH_HARD_MAX_BYTES
+                    mb = parsed.get("max_bytes")
+                    if isinstance(mb, int) and mb > 0:
+                        max_bytes = mb
            except json.JSONDecodeError:
                url = ""
        if not url:
@@ -78,7 +110,7 @@ class WebFetchTool:
        loop = asyncio.get_running_loop()
        try:
            result = await asyncio.wait_for(
-                loop.run_in_executor(None, lambda: fetch_webpage_content(url, timeout=10)),
+                loop.run_in_executor(None, lambda: fetch_webpage_content(url, timeout=10, max_bytes=max_bytes)),
                timeout=30,
            )
        except asyncio.TimeoutError:
@@ -94,8 +126,28 @@ class WebFetchTool:
                return {"error": f"web_fetch: {url}: {err}", "exit_code": 1}
            return {"error": f"web_fetch: {url}: no readable text content (not HTML, or the page needs JS/login)", "exit_code": 1}

+        # Tell the model when the download budget cut the body short and how
+        # to get the rest, instead of silently presenting a partial page as
+        # the whole thing.
+        size_note = ""
+        if result.get("truncated"):
+            fetched = result.get("fetched_bytes") or 0
+            total = result.get("total_bytes")
+            total_txt = f" of {total:,} bytes" if total else ""
+            size_note = (
+                f"[partial content: download stopped at {fetched:,} bytes{total_txt}. "
+                f'Re-call with {{"url": "{url}", "full": true}} to fetch up to '
+                f"{WEB_FETCH_HARD_MAX_BYTES:,} bytes.]\n\n"
+            )
+
+        # The notice must lead the output so the MAX_OUTPUT_CHARS trim below can
+        # never drop it. The title is untrusted, uncapped page content, so a
+        # giant title ahead of the notice could push it out of range; keep the
+        # notice first and cap the title as a second guard.
+        if len(title) > 300:
+            title = title[:300] + "..."
        header = (f"# {title}\n" if title else "") + f"Source: {url}\n\n"
-        output = header + text
+        output = size_note + header + text
        if len(output) > MAX_OUTPUT_CHARS:
            output = output[:MAX_OUTPUT_CHARS] + "\n\n[...truncated]"
        return {"output": output, "exit_code": 0}
@@ -1,8 +1,14 @@
 """
 ai_interaction.py

-AI-to-AI interaction tools: chat_with_model, create_session, list_sessions,
-send_to_session, pipeline.
+AI-to-AI interaction tools: pipeline and manage_memory, plus shared model
+resolution (_resolve_model), the session-manager singleton, and dispatch_ai_tool.
+
+As part of the tool -> registry migration (#3629), chat_with_model, ask_teacher
+and list_models moved to src/agent_tools/model_interaction_tools.py, and
+create_session, list_sessions, send_to_session and manage_session moved to
+src/agent_tools/session_tools.py. Those modules reuse get_session_manager /
+_resolve_model / AI_CHAT_TIMEOUT from here.

 These are agent tools — the LLM writes fenced code blocks and they execute
 through the standard agent_tools.py pipeline.
@@ -159,440 +165,6 @@ def _resolve_model(spec: str, owner: Optional[str] = None) -> Tuple[str, str, Di
 # Tool implementations
 # ---------------------------------------------------------------------------

-async def do_chat_with_model(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
-    """Send a message to a specific model and return its response.
-
-    Content format:
-      Line 1: model_name (or model_name@endpoint_name)
-      Line 2+: the message to send
-    """
-    from src.llm_core import llm_call_async
-
-    lines = content.strip().split("\n", 1)
-    if not lines or not lines[0].strip():
-        return {"error": "First line must be the model name"}
-
-    model_spec = lines[0].strip()
-    message = lines[1].strip() if len(lines) > 1 else ""
-    if not message:
-        return {"error": "No message provided (line 2+ is the message)"}
-
-    try:
-        url, model, headers = _resolve_model(model_spec, owner=owner)
-    except ValueError as e:
-        return {"error": str(e)}
-
-    try:
-        response = await llm_call_async(
-            url, model,
-            [{"role": "user", "content": message}],
-            headers=headers,
-            timeout=AI_CHAT_TIMEOUT,
-        )
-        # Truncate very long responses
-        if len(response) > 10000:
-            response = response[:10000] + "\n... (truncated)"
-        return {"model": model, "response": response}
-    except Exception as e:
-        logger.error(f"chat_with_model failed: {e}")
-        return {"error": f"Failed to get response from {model_spec}: {e}"}
-
-
-_TEACHER_SYSTEM_PROMPT = (
-    "You are a senior AI mentor. A less capable model is stuck on a problem and asking for help. "
-    "Provide clear, actionable guidance:\n"
-    "1. Brief analysis of the problem\n"
-    "2. Recommended approach (step by step)\n"
-    "3. Key things to watch out for\n\n"
-    "Be concise and practical. No preamble."
-)
-
-
-async def do_ask_teacher(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
-    """Ask a more capable model for help.
-
-    Content format:
-      Line 1: model_name (or 'auto')
-      Line 2+: the problem description
-    """
-    from src.llm_core import llm_call_async
-    from src.settings import get_setting
-
-    lines = content.strip().split("\n", 1)
-    model_spec = lines[0].strip() if lines else "auto"
-    problem = lines[1].strip() if len(lines) > 1 else ""
-
-    if not problem:
-        return {"error": "No problem description provided"}
-
-    if model_spec.lower() in ("auto", ""):
-        model_spec = get_setting("teacher_model", "")
-        if not model_spec:
-            return {"error": "No teacher model configured. Specify a model name or set teacher_model in settings."}
-
-    try:
-        url, model, headers = _resolve_model(model_spec, owner=owner)
-    except ValueError as e:
-        return {"error": str(e)}
-
-    try:
-        response = await llm_call_async(
-            url, model,
-            [
-                {"role": "system", "content": _TEACHER_SYSTEM_PROMPT},
-                {"role": "user", "content": f"Problem:\n{problem}"},
-            ],
-            headers=headers,
-            timeout=AI_CHAT_TIMEOUT,
-        )
-        if len(response) > 8000:
-            response = response[:8000] + "\n... (truncated)"
-        return {"model": model, "response": response, "teacher": True}
-    except Exception as e:
-        logger.error(f"ask_teacher failed: {e}")
-        return {"error": f"Teacher call failed ({model_spec}): {e}"}
-
-
-async def do_second_opinion(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
-    """Get a second opinion from another model, then have the original model
-    evaluate the feedback and produce a unified version.
-
-    Content format:
-      Line 1: model_name (or model_name@endpoint_name)
-      Line 2+ (optional): specific question or focus area
-
-    Flow:
-      1. Pull recent conversation context
-      2. Send to reviewer model → get honest feedback
-      3. Send feedback back to the session's own model → evaluate & unify
-      4. Return both the review and the unified response
-    """
-    from src.llm_core import llm_call_async
-
-    lines = content.strip().split("\n", 1)
-    if not lines or not lines[0].strip():
-        return {"error": "First line must be the model name"}
-
-    model_spec = lines[0].strip()
-    focus = lines[1].strip() if len(lines) > 1 else ""
-
-    try:
-        reviewer_url, reviewer_model, reviewer_headers = _resolve_model(model_spec, owner=owner)
-    except ValueError as e:
-        return {"error": str(e)}
-
-    # Pull recent conversation context from current session
-    context_text = ""
-    sess = None
-    if session_id and _session_manager:
-        sess = _session_manager.get_session(session_id)
-        if sess:
-            messages = sess.get_context_messages()
-            recent = messages[-15:] if len(messages) > 15 else messages
-            parts = []
-            for m in recent:
-                role = m.get("role", "unknown").upper()
-                text = m.get("content", "")
-                if isinstance(text, list):
-                    text = " ".join(
-                        p.get("text", "") for p in text if isinstance(p, dict)
-                    )
-                if text:
-                    parts.append(f"[{role}]: {text[:2000]}")
-            context_text = "\n\n".join(parts)
-
-    if not context_text:
-        return {"error": "No conversation context found to review"}
-
-    # ── Step 1: Get the reviewer's feedback ──
-    reviewer_system = (
-        "You are giving a second opinion on a conversation between a user and an AI assistant. "
-        "Your job is to be genuinely helpful and honest — not a yes-man, but not a contrarian either.\n\n"
-        "Guidelines:\n"
-        "- If the plan/idea is solid, say so clearly. Don't manufacture problems that aren't there.\n"
-        "- If you spot a real flaw, blind spot, or simpler approach — call it out directly.\n"
-        "- Be practical. Don't over-engineer or over-analyze. Real-world tradeoffs matter.\n"
-        "- If there's a meaningfully better way to do something, suggest it concretely.\n"
-        "- Give credit where it's due — highlight what's working well.\n"
-        "- Keep it concise and actionable. No fluff.\n"
-        "- You're a second pair of eyes, not a professor grading a paper."
-    )
-
-    reviewer_message = f"Here's the conversation so far:\n\n{context_text}"
-    if focus:
-        reviewer_message += f"\n\n---\nSpecifically, I want your take on: {focus}"
-    else:
-        reviewer_message += "\n\n---\nGive me your honest second opinion on what's being discussed."
-
-    try:
-        review = await llm_call_async(
-            reviewer_url, reviewer_model,
-            [
-                {"role": "system", "content": reviewer_system},
-                {"role": "user", "content": reviewer_message},
-            ],
-            headers=reviewer_headers,
-            timeout=AI_CHAT_TIMEOUT,
-        )
-        if len(review) > 8000:
-            review = review[:8000] + "\n... (truncated)"
-    except Exception as e:
-        logger.error(f"second_opinion reviewer call failed: {e}")
-        return {"error": f"Failed to get second opinion from {model_spec}: {e}"}
-
-    # ── Step 2: Send review back to session's own model for evaluation ──
-    unified = ""
-    original_model = "unknown"
-    if sess:
-        original_url = sess.endpoint_url
-        original_model = sess.model
-        original_headers = getattr(sess, "headers", None) or {}
-
-        unify_system = (
-            "Another AI model just reviewed the conversation you've been having with the user. "
-            "Read their feedback carefully, then respond with:\n\n"
-            "1. **What you agree with** — acknowledge valid points honestly.\n"
-            "2. **What you disagree with** — explain why, briefly.\n"
-            "3. **Unified version** — produce an updated/refined version of whatever was being discussed, "
-            "incorporating the feedback you found valid. Don't accept every note blindly — "
-            "use your judgment on what actually improves things vs what's unnecessary.\n\n"
-            "Be concise and practical. The user wants a better result, not a meta-discussion."
-        )
-
-        unify_message = (
-            f"Here's the conversation context:\n\n{context_text}\n\n"
-            f"---\n\n"
-            f"**Review from {reviewer_model}:**\n\n{review}\n\n"
-            f"---\n\n"
-            f"Evaluate this feedback and produce a unified improved version."
-        )
-
-        try:
-            unified = await llm_call_async(
-                original_url, original_model,
-                [
-                    {"role": "system", "content": unify_system},
-                    {"role": "user", "content": unify_message},
-                ],
-                headers=original_headers,
-                timeout=AI_CHAT_TIMEOUT,
-            )
-            if len(unified) > 10000:
-                unified = unified[:10000] + "\n... (truncated)"
-        except Exception as e:
-            logger.error(f"second_opinion unify call failed: {e}")
-            unified = f"(Failed to get unified response: {e})"
-
-    # Build combined result
-    combined = (
-        f"## Second Opinion from {reviewer_model}\n\n{review}"
-        f"\n\n---\n\n"
-        f"## {original_model}'s Response\n\n{unified}"
-    )
-
-    return {
-        "model": reviewer_model,
-        "response": combined,
-        "instruction": "Present these results to the user exactly as they are. Do NOT call second_opinion again. The user can continue the conversation from here.",
-    }
-
-
-async def do_create_session(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
-    """Create a new chat session.
-
-    Content format:
-      Line 1: session name
-      Line 2: model_name (or model_name@endpoint_name)
-    """
-    if not _session_manager:
-        return {"error": "Session manager not available"}
-
-    lines = content.strip().split("\n")
-    if len(lines) < 2:
-        return {"error": "Need 2 lines: session name, then model spec"}
-
-    name = lines[0].strip()
-    model_spec = lines[1].strip()
-
-    if not name:
-        return {"error": "Session name cannot be empty"}
-
-    try:
-        url, model, headers = _resolve_model(model_spec, owner=owner)
-    except ValueError as e:
-        return {"error": str(e)}
-
-    sid = str(uuid.uuid4())[:8]
-    try:
-        _session_manager.create_session(
-            session_id=sid,
-            name=name,
-            endpoint_url=url,
-            model=model,
-            rag=False,
-            owner=owner,
-        )
-        # Store headers on session for future calls
-        sess = _session_manager.get_session(sid)
-        if sess and headers:
-            sess.headers = headers
-        try:
-            from src.event_bus import fire_event
-            fire_event("session_created", owner)
-        except Exception:
-            logger.debug("session_created event dispatch failed", exc_info=True)
-
-        return {"session_id": sid, "name": name, "model": model, "endpoint_url": url}
-    except Exception as e:
-        logger.error(f"create_session failed: {e}")
-        return {"error": f"Failed to create session: {e}"}
-
-
-async def do_list_sessions(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
-    """List sessions sorted by most-recently-active first.
-
-    Output includes a relative "last active" timestamp per row so the
-    agent can answer "open my last chat" without guessing from titles.
-    The most-recent session is always first in the list.
-
-    Content = optional filter keyword (matches session name).
-    """
-    if not _session_manager:
-        return {"error": "Session manager not available"}
-
-    keyword = content.strip().lower() if content.strip() else None
-
-    try:
-        from core.database import SessionLocal, Session as DbSession
-        from datetime import datetime, timezone
-
-        # Pull every session's last_accessed from the DB so we can sort
-        # by recency. In-memory sessions hold name + model + msg_count;
-        # the DB row holds the timestamps.
-        db = SessionLocal()
-        try:
-            db_rows = {r.id: r for r in db.query(DbSession).all()}
-        finally:
-            db.close()
-
-        # SECURITY: scope to the caller's sessions. Passing None returned
-        # every user's sessions, which the agent tool then exposed via the
-        # "list my chats" reply.
-        sessions = _session_manager.get_sessions_for_user(owner)
-        rows = []
-        for sid, sess in sessions.items():
-            if keyword and keyword not in (sess.name or "").lower():
-                continue
-            db_row = db_rows.get(sid)
-            # Prefer last_accessed; fall back to updated_at, then created_at.
-            ts = None
-            if db_row:
-                ts = getattr(db_row, 'last_accessed', None) or getattr(db_row, 'updated_at', None) or getattr(db_row, 'created_at', None)
-            rows.append((ts, sid, sess))
-
-        # Sort by timestamp DESC; rows without a timestamp sink to the bottom.
-        rows.sort(key=lambda r: r[0] or datetime.min, reverse=True)
-
-        def _rel(ts):
-            if not ts:
-                return 'never'
-            now = datetime.utcnow()
-            try:
-                if ts.tzinfo is not None:
-                    now = datetime.now(timezone.utc)
-                diff = (now - ts).total_seconds()
-            except Exception:
-                return 'unknown'
-            if diff < 60: return 'just now'
-            if diff < 3600: return f'{int(diff / 60)}m ago'
-            if diff < 86400: return f'{int(diff / 3600)}h ago'
-            if diff < 86400 * 7: return f'{int(diff / 86400)}d ago'
-            return ts.strftime('%Y-%m-%d')
-
-        lines = []
-        for i, (ts, sid, sess) in enumerate(rows):
-            if i >= 50:
-                lines.append(f"... and {len(rows) - 50} more (showing first 50)")
-                break
-            safe_name = (sess.name or "Untitled").replace("[", "\\[").replace("]", "\\]")
-            msg_count = getattr(sess, "message_count", 0) or 0
-            model = getattr(sess, "model", "unknown")
-            marker = " ← most recent" if i == 0 else ""
-            lines.append(f"- **[{safe_name}](#session-{sid})** (id: `{sid}`, model: {model}, {msg_count} msgs, last active {_rel(ts)}){marker}")
-
-        if not lines:
-            return {"results": "No sessions found" + (f" matching '{keyword}'" if keyword else "") + "."}
-
-        return {
-            "results": (
-                f"Found {len(rows)} session(s), sorted most-recent first:\n"
-                + "\n".join(lines)
-                + "\n\nAssistant: when replying to the user, preserve the chat-title markdown links exactly as shown, e.g. `[Chat](#session-id)`. Do not rewrite this as a plain, non-clickable table."
-            )
-        }
-    except Exception as e:
-        logger.error(f"list_sessions failed: {e}")
-        return {"error": str(e)}
-
-
-async def do_send_to_session(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
-    """Send a message to an existing session and get a response.
-
-    Content format:
-      Line 1: session_id
-      Line 2+: message
-    """
-    from src.llm_core import llm_call_async
-    from core.models import ChatMessage
-
-    if not _session_manager:
-        return {"error": "Session manager not available"}
-
-    lines = content.strip().split("\n", 1)
-    if len(lines) < 2:
-        return {"error": "Need 2 lines: session_id, then message"}
-
-    target_sid = lines[0].strip()
-    message = lines[1].strip()
-
-    sess = _session_manager.get_session(target_sid)
-    if not sess:
-        return {"error": f"Session '{target_sid}' not found"}
-
-    # Owner-scope: reject access to another user's session
-    if owner and getattr(sess, "owner", None) and sess.owner != owner:
-        return {"error": f"Session '{target_sid}' not found"}
-
-    if not message:
-        return {"error": "No message provided"}
-
-    try:
-        # Build context from session history
-        context = sess.get_context_messages()
-        context.append({"role": "user", "content": message})
-
-        response = await llm_call_async(
-            sess.endpoint_url, sess.model, context,
-            headers=sess.headers,
-            timeout=AI_CHAT_TIMEOUT,
-        )
-
-        # Save both messages to session
-        sess.add_message(ChatMessage("user", message))
-        sess.add_message(ChatMessage("assistant", response))
-
-        # Truncate for tool output
-        if len(response) > 10000:
-            response = response[:10000] + "\n... (truncated)"
-
-        return {
-            "session_id": target_sid,
-            "session_name": sess.name,
-            "response": response,
-        }
-    except Exception as e:
-        logger.error(f"send_to_session failed: {e}")
-        return {"error": f"Failed to send to session: {e}"}


 async def stream_ai_tool(tool: str, content: str, session_id: Optional[str] = None, owner: Optional[str] = None):
@@ -715,229 +287,6 @@ async def do_pipeline(content: str, session_id: Optional[str] = None, owner: Opt
 # Session management tool
 # ---------------------------------------------------------------------------

-async def do_manage_session(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
-    """Manage sessions: rename, archive, delete, important, truncate, fork.
-
-    Content format:
-      Line 1: action (rename|archive|unarchive|delete|important|unimportant|truncate|fork)
-      Line 2: target session_id (or "current" to use the active session)
-      Line 3+: action-specific params (e.g. new name for rename, keep_count for truncate)
-    """
-    if not _session_manager:
-        return {"error": "Session manager not available"}
-
-    from src.database import SessionLocal, Session as DbSession
-
-    # Accept BOTH the structured JSON args the tool schema advertises
-    # ({action, session_id, value}) AND the legacy line-based format
-    # (line1=action, line2=session_id, line3=value). Native function-calling
-    # models send JSON; fenced-block callers send lines. Previously only the
-    # line format was parsed, so a model that followed the schema (JSON) got
-    # "Need at least 2 lines" / "Rename needs line 3" and couldn't drive it.
-    _raw = (content or "").strip()
-    action = ""
-    target_sid = ""
-    value = None      # the action param: new name (rename) / keep_count (truncate, fork)
-    _list_filter = ""
-    _parsed = None
-    if _raw.startswith("{"):
-        try:
-            _parsed = json.loads(_raw)
-        except Exception:
-            _parsed = None
-    if isinstance(_parsed, dict):
-        action = str(_parsed.get("action") or "").strip().lower()
-        target_sid = str(_parsed.get("session_id") or _parsed.get("session") or _parsed.get("id") or "").strip()
-        _v = _parsed.get("value")
-        if _v is None:
-            _v = (_parsed.get("name") or _parsed.get("new_name")
-                  or _parsed.get("title") or _parsed.get("keep_count"))
-        value = None if _v is None else str(_v).strip()
-        _list_filter = str(_parsed.get("filter") or "").strip()
-    else:
-        lines = _raw.split("\n")
-        if not lines or not lines[0].strip():
-            return {"error": "Missing action (rename|archive|delete|important|truncate|fork|list|switch)"}
-        action = lines[0].strip().lower()
-        target_sid = lines[1].strip() if len(lines) >= 2 else ""
-        value = lines[2].strip() if len(lines) >= 3 else None
-        _list_filter = "\n".join(lines[1:]).strip()
-
-    if not action:
-        return {"error": "Missing action (rename|archive|delete|important|truncate|fork|list|switch)"}
-
-    # `list` alias — dispatch to do_list_sessions so the agent's natural
-    # first guess (every other manage_* tool has a `list` action) works.
-    if action == "list":
-        return await do_list_sessions(_list_filter, session_id, owner=owner)
-
-    if not target_sid:
-        return {"error": "Need a session_id (or 'current' for the active chat)"}
-
-    # Allow "current" to refer to the active session
-    if target_sid.lower() == "current" and session_id:
-        target_sid = session_id
-
-    # `switch` / `open` / `select` / `view` — the agent reaches for
-    # these when the user asks to "open" or "switch to" a session.
-    # There's no server-side way to make the browser navigate, so we
-    # just return a clickable anchor link the user can click. The
-    # frontend's chat-history click delegate routes `#session-<id>`
-    # to selectSession(). The agent's reply naturally embeds this
-    # result so the user sees a single clickable line.
-    def _session_query(db):
-        query = db.query(DbSession).filter(DbSession.id == target_sid)
-        if owner is not None:
-            query = query.filter(DbSession.owner == owner)
-        return query
-
-    if action in ("switch", "open", "select", "view"):
-        db = SessionLocal()
-        try:
-            db_sess = _session_query(db).first()
-            if not db_sess:
-                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
-            name = db_sess.name or target_sid
-        finally:
-            db.close()
-        return {
-            "action": action,
-            "session_id": target_sid,
-            "name": name,
-            "results": f"[{name}](#session-{target_sid}) — click to open.",
-        }
-
-    db = SessionLocal()
-    try:
-        if action == "rename":
-            if not value:
-                return {"error": "rename needs a new name (the `value` arg, or line 3 in the legacy format)"}
-            new_name = value
-            db_sess = _session_query(db).first()
-            if not db_sess:
-                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
-            db_sess.name = new_name
-            db.commit()
-            _session_manager.update_session_name(target_sid, new_name)
-            return {"action": "rename", "session_id": target_sid, "name": new_name,
-                    "results": f"Session renamed to '{new_name}'"}
-
-        elif action == "archive":
-            db_sess = _session_query(db).first()
-            if not db_sess:
-                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
-            db_sess.archived = True
-            db.commit()
-            return {"action": "archive", "session_id": target_sid,
-                    "results": f"Session '{db_sess.name}' archived"}
-
-        elif action == "unarchive":
-            db_sess = _session_query(db).first()
-            if not db_sess:
-                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
-            db_sess.archived = False
-            db.commit()
-            return {"action": "unarchive", "session_id": target_sid,
-                    "results": f"Session '{db_sess.name}' unarchived"}
-
-        elif action == "delete":
-            if target_sid == session_id:
-                return {"error": "Cannot delete the current session while chatting in it. Delete other sessions first."}
-            db_sess = _session_query(db).first()
-            if not db_sess:
-                return {"error": f"Session '{target_sid}' not found. Refusing to delete an unknown chat id; use the exact id from list_sessions."}
-            if db_sess and db_sess.is_important:
-                return {"error": f"Session '{db_sess.name}' is starred/favorited. Unstar it first before deleting."}
-            try:
-                ok = _session_manager.delete_session(target_sid)
-                if not ok:
-                    return {"error": f"Session '{target_sid}' was not deleted because it no longer exists."}
-                return {"action": "delete", "session_id": target_sid,
-                        "results": f"Session '{db_sess.name or target_sid}' deleted"}
-            except Exception as e:
-                return {"error": f"Failed to delete session: {e}"}
-
-        elif action in ("important", "unimportant"):
-            is_important = action == "important"
-            db_sess = _session_query(db).first()
-            if not db_sess:
-                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
-            # Prevent AI from unstarring sessions — only the user can do that manually
-            if not is_important and db_sess.is_important:
-                return {"error": f"Session '{db_sess.name}' is starred by the user. Only the user can unstar sessions manually."}
-            db_sess.is_important = is_important
-            db.commit()
-            status = "marked as important" if is_important else "unmarked as important"
-            return {"action": action, "session_id": target_sid,
-                    "results": f"Session '{db_sess.name}' {status}"}
-
-        elif action == "truncate":
-            db_sess = _session_query(db).first()
-            if not db_sess:
-                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
-            keep_count = 10
-            if value:
-                try:
-                    keep_count = int(value)
-                except ValueError:
-                    pass
-            success = _session_manager.truncate_messages(target_sid, keep_count)
-            if success:
-                return {"action": "truncate", "session_id": target_sid,
-                        "results": f"Session truncated to last {keep_count} messages"}
-            return {"error": f"Failed to truncate session '{target_sid}'"}
-
-        elif action == "fork":
-            db_sess = _session_query(db).first()
-            if not db_sess:
-                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
-            keep_count = 0  # 0 = all messages
-            if value:
-                try:
-                    keep_count = int(value)
-                except ValueError:
-                    pass
-
-            source = _session_manager.get_session(target_sid)
-            if not source:
-                return {"error": f"Session '{target_sid}' not found"}
-
-            new_sid = str(uuid.uuid4())[:8]
-            _session_manager.create_session(
-                session_id=new_sid,
-                name=f"Fork: {source.name}",
-                endpoint_url=source.endpoint_url,
-                model=source.model,
-                rag=False,
-                owner=owner,
-            )
-            # Copy messages
-            history = source.get_context_messages()
-            if keep_count > 0:
-                history = history[:keep_count]
-            from core.models import ChatMessage as InMemoryMsg
-            new_sess = _session_manager.get_session(new_sid)
-            for msg in history:
-                new_sess.add_message(InMemoryMsg(msg["role"], msg["content"]))
-            try:
-                from src.event_bus import fire_event
-                fire_event("session_created", owner)
-            except Exception:
-                logger.debug("session_created event dispatch failed", exc_info=True)
-
-            return {"action": "fork", "session_id": new_sid,
-                    "source_session": target_sid, "messages_copied": len(history),
-                    "results": f"Forked session '{source.name}' -> new session {new_sid} ({len(history)} messages)"}
-
-        else:
-            return {"error": f"Unknown action '{action}'. Use: list, switch, rename, archive, unarchive, delete, important, unimportant, truncate, fork"}
-    except Exception as e:
-        logger.error(f"manage_session failed: {e}")
-        return {"error": str(e)}
-    finally:
-        db.close()
-
-
 # ---------------------------------------------------------------------------
 # Memory management tool
 # ---------------------------------------------------------------------------
@@ -972,16 +321,15 @@ async def do_manage_memory(content: str, session_id: Optional[str] = None, owner
            memories = [m for m in memories if m.get("category", "").lower() == category_filter]
        if not memories:
            return {"results": "No memories found" + (f" in category '{category_filter}'" if category_filter else "") + "."}
+
        result_lines = [f"Found {len(memories)} memory entries:\n"]
-        for m in memories[:100]:
+        for m in memories:
            cat = m.get("category", "fact")
            mid = m.get("id", "?")[:8]
            text = m.get("text", "")
            if len(text) > 150:
                text = text[:150] + "..."
            result_lines.append(f"- [{cat}] `{mid}` — {text}")
-        if len(memories) > 100:
-            result_lines.append(f"... and {len(memories) - 100} more")
        return {"results": "\n".join(result_lines)}

    elif action == "add":
@@ -1105,83 +453,6 @@ async def do_manage_memory(content: str, session_id: Optional[str] = None, owner
        return {"error": f"Unknown action '{action}'. Use: list, add, edit, delete, search"}


-# ---------------------------------------------------------------------------
-# List models tool
-# ---------------------------------------------------------------------------
-
-async def do_list_models(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
-    """List all available models across configured endpoints.
-
-    Content = optional filter keyword.
-    """
-    import httpx
-    from src.database import SessionLocal, ModelEndpoint
-    from src.llm_core import _detect_provider, ANTHROPIC_MODELS
-    from src.auth_helpers import owner_filter
-
-    keyword = content.strip().lower() if content.strip() else None
-
-    db = SessionLocal()
-    try:
-        query = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True)
-        if owner:
-            query = owner_filter(query, ModelEndpoint, owner)
-        endpoints = query.all()
-        if not endpoints:
-            return {"results": "No enabled model endpoints configured."}
-
-        result_lines = []
-        total_models = 0
-
-        for ep in endpoints:
-            try:
-                base, api_key = resolve_endpoint_runtime(ep, owner=owner)
-            except Exception:
-                continue
-            provider = _detect_provider(base)
-            headers = build_headers(api_key, base)
-
-            model_ids = []
-            if provider == "anthropic":
-                model_ids = list(ANTHROPIC_MODELS)
-            else:
-                try:
-                    models_url = build_models_url(base)
-                    if models_url:
-                        r = httpx.get(models_url, headers=headers, timeout=5)
-                        r.raise_for_status()
-                        data = r.json()
-                        model_ids = [m.get("id") for m in (data.get("data") or []) if m.get("id")]
-                        if not model_ids:
-                            model_ids = [
-                                m.get("name") or m.get("model")
-                                for m in (data.get("models") or [])
-                                if m.get("name") or m.get("model")
-                            ]
-                    else:
-                        model_ids = json.loads(ep.cached_models or "[]")
-                except Exception:
-                    model_ids = ["(endpoint offline)"]
-
-            if keyword:
-                model_ids = [m for m in model_ids if keyword in m.lower() or keyword in (ep.name or "").lower()]
-
-            if model_ids:
-                result_lines.append(f"\n**{ep.name or base}** ({provider}):")
-                for mid in model_ids:
-                    result_lines.append(f"  - `{mid}`")
-                    total_models += 1
-
-        if not result_lines:
-            return {"results": "No models found" + (f" matching '{keyword}'" if keyword else "") + "."}
-
-        header = f"Available models ({total_models} total):"
-        return {"results": header + "\n".join(result_lines)}
-    except Exception as e:
-        logger.error(f"list_models failed: {e}")
-        return {"error": str(e)}
-    finally:
-        db.close()


 # ---------------------------------------------------------------------------
@@ -1293,7 +564,7 @@ async def do_ui_control(content: str, session_id: Optional[str] = None, owner: O
      set_theme <preset>      — Apply a built-in theme preset (dark, light, midnight, paper, cyberpunk, retrowave, forest, ocean, ume, copper, terminal, organs, lavender, gpt, claude, cute)
      create_theme <name> <bg> <fg> <panel> <border> <accent> [key=val ...] — Create custom theme. Optional key=val: advanced color overrides AND background effects: bgPattern=<none|dots|synapse|rain|constellations|perlin-flow|petals|sparkles|embers>, bgEffectColor=#RRGGBB, bgEffectIntensity=<num>, bgEffectSize=<num>, frosted=true|false
      open_panel <name>       — Open a panel (documents, gallery, email, sessions, notes, memories, skills, settings, cookbook)
-      open_email_reply <uid> [folder] [reply|reply-all|ai-reply] — Open a reply draft document for an email; does not send
+      open_email_reply <uid> [folder] [reply|reply-all|ai-reply] [body text] — Open a reply draft document for an email; does not send. ALWAYS append the body text when the user told you what to say (one-shot draft); only omit body when the user just asked to "open a reply" without content.
      get_toggles             — Return current toggle states (server-side knowledge)
    """
    lines = content.strip().split("\n")
@@ -1537,21 +808,54 @@ async def do_ui_control(content: str, session_id: Optional[str] = None, owner: O
        }

    elif action == "open_email_reply":
-        reply_parts = lines[0].strip().split()
-        uid = reply_parts[1].strip() if len(reply_parts) > 1 else ""
-        folder = reply_parts[2].strip() if len(reply_parts) > 2 else "INBOX"
-        mode = reply_parts[3].strip().lower() if len(reply_parts) > 3 else "reply"
+        # Two forms supported:
+        #   open_email_reply <uid> [folder] [reply|reply-all|ai-reply]
+        #   open_email_reply <uid> [folder] [reply|reply-all|ai-reply]
+        #     <body text on subsequent lines or after the mode token>
+        # The body text (if any) gets pre-filled into the reply draft so the
+        # agent can compose-and-open in one tool call instead of opening an
+        # empty draft and leaving the user to wonder what happened.
+        first_line = lines[0].strip()
+        parts = first_line.split(maxsplit=4)
+        uid = parts[1].strip() if len(parts) > 1 else ""
+        folder = parts[2].strip() if len(parts) > 2 else "INBOX"
+        mode = parts[3].strip().lower() if len(parts) > 3 else "reply"
+        # Body: everything on the first line after the mode token, plus any
+        # subsequent lines. Allows multi-line bodies.
+        inline_body = parts[4] if len(parts) > 4 else ""
+        rest_lines = "\n".join(lines[1:]).strip() if len(lines) > 1 else ""
+        body = (inline_body + ("\n" + rest_lines if rest_lines else "")).strip()
        if not uid:
-            return {"error": "open_email_reply needs: open_email_reply <uid> [folder] [reply|reply-all|ai-reply]"}
+            return {"error": "open_email_reply needs: open_email_reply <uid> [folder] [reply|reply-all|ai-reply] [body text]"}
        if mode not in ("reply", "reply-all", "ai-reply"):
            mode = "reply"
-        return {
+        # Body is REQUIRED for the agent path. Opening an empty draft is what
+        # users do by clicking the Reply button — they don't ask the agent
+        # for that. Every agent invocation of open_email_reply MUST include
+        # the body. Reject empty so the agent retries with the content the
+        # user asked for. Exception: ai-reply mode triggers the existing
+        # AI-Reply path on the frontend which generates its own body.
+        if not body and mode != "ai-reply":
+            return {
+                "error": (
+                    "open_email_reply called without body. The agent path REQUIRES a body — "
+                    "opening an empty draft is the wrong response when the user asked you to write. "
+                    "Re-call with the reply text included: "
+                    f"`open_email_reply {uid} {folder or 'INBOX'} {mode} <your reply text here>`. "
+                    "Compose the reply now based on the open email's content and the user's request, "
+                    "then call this tool again with the body. Do NOT call create_document instead."
+                ),
+            }
+        result = {
            "ui_event": "open_email_reply",
            "uid": uid,
            "folder": folder or "INBOX",
            "mode": mode,
-            "results": f"Opening reply draft for email UID {uid}",
+            "results": f"Opening reply draft for email UID {uid}" + (" with pre-filled body" if body else ""),
        }
+        if body:
+            result["body"] = body
+        return result

    elif action == "get_toggles":
        return {
@@ -1581,7 +885,9 @@ async def do_generate_image(content: str, session_id: Optional[str] = None, owne
    """
    import base64
    import httpx
+    import os
    from pathlib import Path
+    from src.url_safety import check_outbound_url

    lines = content.strip().split("\n")
    prompt = lines[0].strip() if lines else ""
@@ -1747,8 +1053,15 @@ async def do_generate_image(content: str, session_id: Optional[str] = None, owne

            elif img.get("url"):
                # Download external URL and save locally (DALL-E returns temp URLs)
+                result_url = img["url"]
+                ok, reason = check_outbound_url(
+                    result_url,
+                    block_private=os.getenv("IMAGE_BLOCK_PRIVATE_IPS", "false").lower() == "true",
+                )
+                if not ok:
+                    return {"error": f"Image API returned unsafe image URL: {reason}"}
                try:
-                    dl_resp = httpx.get(img["url"], timeout=60)
+                    dl_resp = httpx.get(result_url, timeout=60)
                    if dl_resp.status_code == 200:
                        img_dir = Path(GENERATED_IMAGES_DIR)
                        img_dir.mkdir(parents=True, exist_ok=True)
@@ -1758,10 +1071,10 @@ async def do_generate_image(content: str, session_id: Optional[str] = None, owne
                        image_url = f"/api/generated-image/{filename}"
                        image_id = _save_to_gallery(filename)
                    else:
-                        image_url = img["url"]  # fallback to external URL
+                        image_url = result_url  # fallback to external URL
                except Exception as _dl_e:
                    logger.warning(f"Failed to download DALL-E image: {_dl_e}")
-                    image_url = img["url"]  # fallback to external URL
+                    image_url = result_url  # fallback to external URL
            else:
                return {"error": "Image API returned unexpected format (no b64_json or url)"}

@@ -1790,55 +1103,20 @@ async def dispatch_ai_tool(
 ) -> Tuple[str, Dict]:
    """Dispatch an AI interaction tool. Returns (description, result_dict)."""

-    if tool == "chat_with_model":
-        model_spec = content.split("\n")[0].strip()[:60]
-        desc = f"chat_with_model: {model_spec}"
-        result = await do_chat_with_model(content, session_id, owner=owner)
-
-    elif tool == "create_session":
-        name = content.split("\n")[0].strip()[:60]
-        desc = f"create_session: {name}"
-        result = await do_create_session(content, session_id, owner=owner)
-
-    elif tool == "list_sessions":
-        keyword = content.strip()[:40]
-        desc = f"list_sessions{': ' + keyword if keyword else ''}"
-        result = await do_list_sessions(content, session_id, owner=owner)
-
-    elif tool == "send_to_session":
-        sid = content.split("\n")[0].strip()[:20]
-        desc = f"send_to_session: {sid}"
-        result = await do_send_to_session(content, session_id, owner=owner)
-
-    elif tool == "pipeline":
+    if tool == "pipeline":
        desc = "pipeline: running steps"
        result = await do_pipeline(content, session_id, owner=owner)

-    elif tool == "manage_session":
-        action = content.split("\n")[0].strip()[:40]
-        desc = f"manage_session: {action}"
-        result = await do_manage_session(content, session_id, owner=owner)
-
    elif tool == "manage_memory":
        action = content.split("\n")[0].strip()[:40]
        desc = f"manage_memory: {action}"
        result = await do_manage_memory(content, session_id, owner=owner)

-    elif tool == "list_models":
-        keyword = content.strip()[:40]
-        desc = f"list_models{': ' + keyword if keyword else ''}"
-        result = await do_list_models(content, session_id, owner=owner)
-
    elif tool == "ui_control":
        action = content.split("\n")[0].strip()[:60]
        desc = f"ui_control: {action}"
        result = await do_ui_control(content, session_id, owner=owner)

-    elif tool == "ask_teacher":
-        problem = content.split("\n", 1)[-1].strip()[:60]
-        desc = f"ask_teacher: {problem}"
-        result = await do_ask_teacher(content, session_id, owner=owner)
-
    else:
        desc = f"unknown ai tool: {tool}"
        result = {"error": f"Unknown AI interaction tool: {tool}"}
@@ -81,11 +81,26 @@ class APIKeyManager:
        keys stay encrypted. Loading via load() first would decrypt them and
        write them back as plaintext, which then fails to decrypt on the next
        load() and silently drops those providers.
+
+        Uses atomic write (temp file + os.replace) so a crash, disk-full, or
+        mid-write error never truncates the existing keys file.
        """
        keys = self._load_raw()
        keys[provider] = self.encrypt_api_key(api_key)
-        with open(self.api_keys_file, 'w', encoding="utf-8") as f:
-            json.dump(keys, f)
+        tmp_file = self.api_keys_file + ".tmp"
+        try:
+            with open(tmp_file, 'w', encoding="utf-8") as f:
+                json.dump(keys, f)
+                f.flush()
+                os.fsync(f.fileno())
+            os.replace(tmp_file, self.api_keys_file)
+        except OSError:
+            # Clean up temp file on failure; re-raise so callers see the error
+            try:
+                os.remove(tmp_file)
+            except OSError:
+                pass
+            raise

    def load(self) -> Dict[str, str]:
        """Load and decrypt API keys"""
@@ -1,6 +1,13 @@
 # src/app_helpers.py
-import os
 import base64
+import logging
+import os
+
+from fastapi import HTTPException
+from fastapi.responses import HTMLResponse
+from starlette.requests import Request
+
+logger = logging.getLogger(__name__)

 def read_if_exists(path: str) -> str:
    """Read file if it exists, return empty string otherwise."""
@@ -20,6 +27,28 @@ def abs_join(base_dir: str, rel: str) -> str:
    """Join paths and return absolute path."""
    return os.path.abspath(os.path.join(base_dir, rel))

+def serve_html_with_nonce(request: Request, file_path: str) -> HTMLResponse:
+    """Read an app-bundled HTML page and inject the CSP nonce into inline <script> tags.
+
+    Callers pass fixed, server-owned template paths (index/login/backgrounds),
+    never a client-supplied path. So any read failure here — a missing file
+    (broken deployment) or a permission/IO error — is a server fault, not a
+    client "not found": map all of them to a logged 500 so a missing core
+    template surfaces in 5xx alerting instead of hiding behind a 404. If a
+    future caller serves a client-influenced path where 404 is correct, branch
+    that at the call site rather than defaulting this shared helper to 404.
+    """
+    try:
+        with open(file_path, "r", encoding="utf-8") as f:
+            html = f.read()
+    except OSError:
+        logger.exception("Failed to read page %s", file_path)
+        raise HTTPException(500, "Internal server error")
+    nonce = getattr(request.state, "csp_nonce", "")
+    html = html.replace("{{CSP_NONCE}}", nonce)
+    return HTMLResponse(html)
+
+
 def inside_base_dir(base_dir: str, path: str) -> bool:
    """Check if path is inside base directory."""
    if not isinstance(base_dir, str) or not isinstance(path, str):
@@ -263,10 +263,32 @@ def list_for_session(session_id: str) -> List[Dict[str, Any]]:
    return [r for r in refresh().values() if r.get("session_id") == session_id]


+def kill(job_id: str) -> Optional[Dict[str, Any]]:
+    """Terminate a running job's process tree and mark it killed. Returns the
+    updated record, or None if the id is unknown. Idempotent: a job that already
+    finished is returned unchanged. Sets followed_up so the monitor does not also
+    fire an auto-continue for a job the agent deliberately stopped."""
+    jobs = _load()
+    rec = jobs.get(job_id)
+    if rec is None:
+        return None
+    if rec.get("status") == "running":
+        _kill(rec.get("pid"))
+        rec["status"] = "failed"
+        rec["exit_code"] = -1
+        rec["ended_at"] = time.time()
+        rec["killed"] = True
+        rec["followed_up"] = True
+        _save(jobs)
+    return rec
+
+
 def result_text(rec: Dict[str, Any]) -> str:
    """Human/agent-readable summary of a finished job, for the follow-up."""
    out = _read_output(rec)
-    if rec.get("timed_out"):
+    if rec.get("killed"):
+        head = "Background job was killed."
+    elif rec.get("timed_out"):
        head = f"Background job timed out after {rec.get('max_runtime_s')}s."
    elif rec.get("died"):
        head = "Background job process died unexpectedly (no exit code)."
@@ -76,8 +76,7 @@ async def action_consolidate_memory(owner: str, **kwargs) -> Tuple[str, bool]:
        import json
        import re
        from src.constants import DATA_DIR
-        from src.endpoint_resolver import resolve_endpoint
-        from src.llm_core import llm_call_async
+        from src.llm_core import llm_call_async_with_fallback
        from src.memory import MemoryManager

        manager = MemoryManager(DATA_DIR)
@@ -116,10 +115,9 @@ async def action_consolidate_memory(owner: str, **kwargs) -> Tuple[str, bool]:
            if len(group_memories) < 2:
                return False

-            url, model, headers = resolve_endpoint("utility", owner=group_owner or None)
-            if not url or not model:
-                url, model, headers = resolve_endpoint("default", owner=group_owner or None)
-            if not url or not model:
+            from src.task_endpoint import resolve_task_candidates
+            candidates = resolve_task_candidates(owner=group_owner or None)
+            if not candidates:
                return False

            try:
@@ -147,13 +145,11 @@ async def action_consolidate_memory(owner: str, **kwargs) -> Tuple[str, bool]:
                    "\"drop\":[{\"id\":\"existing id\",\"reason\":\"short reason\"}]}\n\n"
                    f"MEMORIES:\n{json.dumps(items, ensure_ascii=False)}"
                )
-                raw = await llm_call_async(
-                    url=url,
-                    model=model,
+                raw = await llm_call_async_with_fallback(
+                    candidates,
                    messages=[{"role": "user", "content": prompt}],
                    temperature=0.0,
                    max_tokens=4096,
-                    headers=headers,
                    timeout=120,
                )
                from src.text_helpers import strip_think
@@ -604,8 +600,7 @@ async def action_classify_events(owner: str, **kwargs) -> Tuple[str, bool]:
    try:
        from datetime import timedelta
        from core.database import SessionLocal, CalendarEvent
-        from src.endpoint_resolver import resolve_endpoint
-        from src.llm_core import llm_call_async
+        from src.llm_core import llm_call_async_with_fallback
        import re as _re, json as _json

        db = SessionLocal()
@@ -620,10 +615,9 @@ async def action_classify_events(owner: str, **kwargs) -> Tuple[str, bool]:
            if not events:
                return "No upcoming events to classify", True

-            llm_url, llm_model, llm_headers = resolve_endpoint("utility", owner=owner)
-            if not llm_url:
-                llm_url, llm_model, llm_headers = resolve_endpoint("default", owner=owner)
-            llm_available = bool(llm_url and llm_model)
+            from src.task_endpoint import resolve_task_candidates
+            llm_candidates = resolve_task_candidates(owner=owner)
+            llm_available = bool(llm_candidates)

            # Pull user memories so the LLM has personal context (relationships,
            # job, hobbies). Helps it know e.g. "<name> is your spouse" so their
@@ -699,11 +693,11 @@ async def action_classify_events(owner: str, **kwargs) -> Tuple[str, bool]:
                    f"EVENTS: {_json.dumps(items)}"
                )
                try:
-                    raw = await llm_call_async(
-                        url=llm_url, model=llm_model,
+                    raw = await llm_call_async_with_fallback(
+                        llm_candidates,
                        messages=[{"role": "user", "content": prompt}],
                        temperature=0.1, max_tokens=16384,
-                        headers=llm_headers, timeout=180,
+                        timeout=180,
                    )
                    from src.text_helpers import strip_think as _st
                    raw = _st(raw or "", prose=False, prompt_echo=False)
@@ -810,8 +804,7 @@ async def action_learn_sender_signatures(owner: str, **kwargs) -> Tuple[str, boo
        import asyncio as _aio
        from datetime import datetime as _dt, timedelta as _td
        from routes.email_helpers import _email_cache_owner_clause, _imap_connect, SCHEDULED_DB
-        from src.endpoint_resolver import resolve_endpoint
-        from src.llm_core import llm_call_async
+        from src.llm_core import llm_call_async_with_fallback

        # 1. Pull recent UIDs + From headers cheaply (header-only fetch).
        def _pull_headers():
@@ -891,11 +884,11 @@ async def action_learn_sender_signatures(owner: str, **kwargs) -> Tuple[str, boo
        if not eligible:
            return "All sender sigs already cached (or no eligible senders)", True

-        url, model, headers = resolve_endpoint("utility", owner=owner)
-        if not url or not model:
-            url, model, headers = resolve_endpoint("default", owner=owner)
-        if not url or not model:
+        from src.task_endpoint import resolve_task_candidates
+        candidates = resolve_task_candidates(owner=owner)
+        if not candidates:
            return "No LLM endpoint available", False
+        model = candidates[0][1]

        analyzed = 0
        no_sig = 0
@@ -949,11 +942,11 @@ async def action_learn_sender_signatures(owner: str, **kwargs) -> Tuple[str, boo
            )

            try:
-                raw = await llm_call_async(
-                    url=url, model=model,
+                raw = await llm_call_async_with_fallback(
+                    candidates,
                    messages=[{"role": "user", "content": prompt}],
                    temperature=0.0, max_tokens=600,
-                    headers=headers, timeout=60,
+                    timeout=60,
                )
                from src.text_helpers import strip_think as _st
                sig = _st(raw or "", prose=False, prompt_echo=False).strip()
@@ -1137,7 +1130,6 @@ async def action_test_skills(owner: str, **kwargs) -> Tuple[str, bool]:
        from services.memory.skills import SkillsManager
        from src.constants import DATA_DIR
        from routes.skills_routes import _run_skill_test_once, _skill_test_task
-        from src.endpoint_resolver import resolve_endpoint

        # #3 SCOPE GUARD: refuse to run on a None/empty owner — otherwise
        # `sm.load(owner=None)` returns every user's skills and we'd cross-
@@ -1152,27 +1144,40 @@ async def action_test_skills(owner: str, **kwargs) -> Tuple[str, bool]:
        if not names:
            raise TaskNoop("no skills to test")

-        url, model, headers = resolve_endpoint("default", owner=owner)
-        if not url or not model:
+        from src.task_endpoint import resolve_task_candidates
+        candidates = resolve_task_candidates(owner=owner)
+        if not candidates:
            return "No Default/Utility model configured — set one in Settings.", False

        # #2 NO SILENT MODEL SWAP: if the configured model isn't served by the
        # endpoint, try a basename match — but fail loudly instead of grabbing
        # `avail[0]` which could be an embedding-only model and produce 36
        # garbage transcripts → 36 'unknown' verdicts with no hint why.
+        url, model, headers = candidates[0]
        try:
            from src.llm_core import list_model_ids
-            avail = list_model_ids(url, headers=headers)
-            if avail and model not in avail:
-                import os as _os
-                base = _os.path.basename((model or "").rstrip("/"))
-                m = next((a for a in avail if _os.path.basename(a.rstrip("/")) == base), None)
-                if m:
-                    model = m
-                else:
-                    return (f"Default model '{model}' not served by endpoint {url}. "
-                            f"Available: {', '.join(avail[:8])}{'…' if len(avail) > 8 else ''}. "
-                            "Set a valid Default model in Settings."), False
+            import os as _os
+
+            selected = None
+            mismatch_notes = []
+            for cand_url, cand_model, cand_headers in candidates:
+                avail = list_model_ids(cand_url, headers=cand_headers)
+                if not avail or cand_model in avail:
+                    selected = (cand_url, cand_model, cand_headers)
+                    break
+                base = _os.path.basename((cand_model or "").rstrip("/"))
+                matched = next((a for a in avail if _os.path.basename(a.rstrip("/")) == base), None)
+                if matched:
+                    selected = (cand_url, matched, cand_headers)
+                    break
+                mismatch_notes.append(
+                    f"{cand_model} not served by {cand_url}; available: "
+                    f"{', '.join(avail[:8])}{'...' if len(avail) > 8 else ''}"
+                )
+            if selected:
+                url, model, headers = selected
+            elif mismatch_notes:
+                return "No configured task fallback model is served. " + " | ".join(mismatch_notes[:3]), False
        except Exception as _e:
            logger.warning(f"test_skills model resolve check failed (continuing): {_e}")

@@ -1483,7 +1488,6 @@ async def action_check_email_urgency(owner: str, **kwargs) -> Tuple[str, bool]:
        from pathlib import Path as _P
        from core.database import SessionLocal as _SL, EmailAccount as _EA
        from routes.email_helpers import _imap_connect, _decode_header
-        from src.endpoint_resolver import resolve_endpoint, resolve_utility_fallback_candidates
        from src.llm_core import llm_call_async_with_fallback

        # Per-owner state file so multi-user runs don't clobber each other's
@@ -1505,12 +1509,10 @@ async def action_check_email_urgency(owner: str, **kwargs) -> Tuple[str, bool]:

        # ── 1. Resolve LLM candidates (utility primary + utility fallbacks; fall
        # through to default chat as a last resort).
-        url, model, headers = resolve_endpoint("utility", owner=owner)
-        if not url or not model:
-            url, model, headers = resolve_endpoint("default", owner=owner)
-        if not url or not model:
+        from src.task_endpoint import resolve_task_candidates
+        candidates = resolve_task_candidates(owner=owner)
+        if not candidates:
            return "No LLM endpoint available", False
-        candidates = [(url, model, headers)] + resolve_utility_fallback_candidates(owner=owner)

        # ── 2. Enumerate enabled accounts. Match this task's owner AND fall
        # back to the legacy "unowned account whose imap_user / from_address
@@ -14,6 +14,7 @@ import subprocess
 import sys

 from core.platform_compat import IS_WINDOWS, which_tool
+from src.runtime_paths import get_app_root

 logger = logging.getLogger(__name__)

@@ -81,7 +82,7 @@ _BUILTIN_NPX_SERVERS = {
        "name": "Built-in: Browser",
        "command": "npx",
        "args": ["-y", "@playwright/mcp@latest", "--headless", "--caps", "vision"],
-    },
+    }
 }

 # Global flag to disable MCP if there are compatibility issues
@@ -94,7 +95,7 @@ async def register_builtin_servers(mcp_manager):
        logger.info("Built-in MCP servers disabled via ODYSSEUS_DISABLE_MCP")
        return

-    base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    base_dir = get_app_root()
    python = sys.executable

    async def _connect_python_server(server_id: str, script_path: str, name: str):
@@ -5,6 +5,7 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
 from pydantic import Field, field_validator

 from src.constants import DATA_DIR as _DATA_DIR_CONST
+from src.runtime_paths import get_app_root

 # Cross-platform OS flag, exposed here so callers can `from src.config import
 # IS_WINDOWS`. Defined locally (a trivial `os.name == "nt"`) rather than imported
@@ -19,7 +20,7 @@ IS_WINDOWS = os.name == "nt"
 class DataConfig(BaseSettings):
    """Configuration for data storage and file handling."""
    # Base directory
-    base_dir: Path = Field(default=Path(__file__).parent.parent, description="Base directory for the application")
+    base_dir: Path = Field(default=Path(get_app_root()), description="Base directory for the application")
    
    # Data paths
    data_dir: Path = Field(default=Path(_DATA_DIR_CONST), description="Main data directory")
@@ -138,7 +139,7 @@ class AppConfig(BaseSettings):
        if isinstance(v, dict) and "base_dir" in v:
            base_dir = v["base_dir"]
        else:
-            base_dir = Path(__file__).parent.parent
+            base_dir = Path(get_app_root())
        
        # Convert string paths to Path objects relative to base_dir
        data_dir = Path(_DATA_DIR_CONST)
@@ -2,12 +2,14 @@
 """Application-wide constants and configuration values."""
 import os

-APP_VERSION = "1.0.0"
+from src.runtime_paths import get_app_root, get_default_data_dir
+
+APP_VERSION = "1.0.1"

 # Base paths
-BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + "/"
+BASE_DIR = os.path.join(get_app_root(), "")
 STATIC_DIR = os.path.join(BASE_DIR, "static")
-DATA_DIR = os.getenv("ODYSSEUS_DATA_DIR", os.path.join(BASE_DIR, "data"))
+DATA_DIR = os.getenv("ODYSSEUS_DATA_DIR", get_default_data_dir())

 # Data file paths
 # Single source of truth: every persisted file/dir lives under DATA_DIR, which
@@ -55,7 +57,13 @@ MEMORY_VECTORS_DIR = os.path.join(DATA_DIR, "memory_vectors")

 # Paths with an intentional dedicated env override, defaulting under DATA_DIR.
 MAIL_ATTACHMENTS_DIR = os.getenv("ODYSSEUS_MAIL_ATTACHMENTS_DIR", os.path.join(DATA_DIR, "mail-attachments"))
-FASTEMBED_CACHE_DIR = os.getenv("FASTEMBED_CACHE_PATH", os.path.join(DATA_DIR, "fastembed_cache"))
+# `or` (not os.getenv's default arg) so a PRESENT-but-EMPTY value falls back to
+# the default. docker-compose.yml injects `FASTEMBED_CACHE_PATH=${FASTEMBED_CACHE_PATH:-}`,
+# which sets the var to "" when the host hasn't defined it. os.getenv(name, default)
+# only returns the default when the var is ABSENT, so the empty string would win →
+# os.makedirs("") raises [Errno 2] No such file or directory: '' → FastEmbed fails to
+# init and all vector features (RAG, semantic memory, tool index) silently degrade.
+FASTEMBED_CACHE_DIR = os.getenv("FASTEMBED_CACHE_PATH") or os.path.join(DATA_DIR, "fastembed_cache")

 # Agent tool output limits (single source of truth — imported by tool_execution.py,
 # tool_implementations.py, agent_tools.py, and any other module that needs them)
@@ -63,11 +71,26 @@ MAX_OUTPUT_CHARS = 10_000       # cap for bash/python/web_search/web_fetch outpu
 MAX_READ_CHARS = 20_000         # cap for read_file / document preview
 MAX_DIFF_LINES = 400            # cap for edit_file unified-diff display

+# web_fetch response-size policy (#3812). MAX_OUTPUT_CHARS above only trims
+# what the agent SEES; these caps bound what the server downloads, parses,
+# and writes to the content cache. The soft cap is the default download
+# budget; the agent can raise it per call (full/max_bytes) but never past
+# the hard cap, so a model can't decide to pull a multi-GB file.
+WEB_FETCH_SOFT_MAX_BYTES = 2_000_000    # default download budget (2 MB)
+WEB_FETCH_HARD_MAX_BYTES = 20_000_000   # absolute ceiling, even with override (20 MB)
+
 # API Configuration
 MAX_CONTEXT_MESSAGES = 90
 REQUEST_TIMEOUT = 20
 OPENAI_COMPAT_PATH = "/v1/chat/completions"

+# Outbound UA for web_fetch / web_search scraping; common desktop UA so pages serve normal HTML.
+WEB_FETCH_USER_AGENT = os.environ.get(
+    "WEB_FETCH_USER_AGENT",
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
+    "(KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36",
+)
+
 # Environment variables with defaults
 DEFAULT_HOST = os.getenv("LLM_HOST", "localhost")
 LLM_HOSTS = [h.strip() for h in os.getenv("LLM_HOSTS", "").split(",") if h.strip()]
@@ -79,6 +102,9 @@ SEARXNG_INSTANCE = os.getenv("SEARXNG_INSTANCE", "http://localhost:8080")
 CLEANUP_ENABLED = os.getenv("CLEANUP_ENABLED", "True").lower() == "true"
 CLEANUP_INTERVAL_HOURS = int(os.getenv("CLEANUP_INTERVAL_HOURS", "24"))

+# Auth policy
+PASSWORD_MIN_LENGTH = 8
+
 # Default parameters
 DEFAULT_TEMPERATURE = 1.0
 DEFAULT_MAX_TOKENS = 0
@@ -244,9 +244,17 @@ def trim_for_context(messages: List[Dict], context_length: int, reserve_tokens:
    protected_tokens = estimate_tokens(protected_msgs)
    budget -= protected_tokens

-    # Priority: keep first system msg (preset prompt), drop others (memory, RAG, memo)
-    essential_system = system_msgs[:1] if system_msgs else []
-    extra_system = system_msgs[1:]
+    # Priority: keep first system msg (preset prompt), drop others (memory, RAG, memo).
+    # Exception: a research-spinoff primer (the seeded report that grounds a
+    # "Discuss" chat) must never be dropped — it is the conversation's whole
+    # knowledge base. Treat any system message carrying research_spinoff_from
+    # metadata as essential alongside the leading system prompt.
+    def _is_research_primer(m):
+        return bool((m.get("metadata") or {}).get("research_spinoff_from"))
+    _primers = [m for m in system_msgs if _is_research_primer(m)]
+    _non_primer = [m for m in system_msgs if not _is_research_primer(m)]
+    essential_system = (_non_primer[:1] if _non_primer else []) + _primers
+    extra_system = _non_primer[1:]

    # Try dropping extra system messages one by one (from the end)
    trimmed = essential_system + convo_msgs
@@ -161,11 +161,13 @@ async def _tick() -> None:
    # Re-read state once before writing so we capture any updates from
    # concurrent UI syncs.
    stopped_any = False
+    successfully_stopped_sids = set()
    for sid, host, port in to_stop:
        ok = await _stop_serve(sid, host, port)
        logger.info(f"cookbook_serve_lifecycle: stop {sid} (host={host or 'local'}): {'ok' if ok else 'failed'}")
        if ok:
            stopped_any = True
+            successfully_stopped_sids.add(sid)
            # Drop the auto-registered endpoint so the model picker and
            # the chat router don't keep pointing at a dead server.
            for t in tasks:
@@ -188,12 +190,11 @@ async def _tick() -> None:
            except Exception:
                fresh = state
                fresh_tasks = tasks
-            stopped_sids = {sid for sid, _, _ in to_stop}
            for ft in fresh_tasks:
                if not isinstance(ft, dict):
                    continue
                ft_sid = ft.get("sessionId") or ft.get("id")
-                if ft_sid in stopped_sids:
+                if ft_sid in successfully_stopped_sids:
                    ft["status"] = "stopped"
                    ft["_scheduledStopAtMs"] = None
                    ft["_lastStatusFlipAt"] = now_ms
@@ -199,11 +199,20 @@ def _fit_inline_attachment_text(
    return text[:remaining] + marker, 0


-def _process_office_document(path: str, display_name: str) -> str:
+def _process_office_document(
+    path: str,
+    display_name: str,
+    session_id: str | None = None,
+    auto_opened_docs: list[Dict[str, Any]] | None = None,
+    owner: str | None = None,
+) -> str:
    """Extract an Office/EPUB document to Markdown via the optional markitdown dep.

    Falls back to a friendly banner when markitdown is unavailable or finds no
-    text, so a missing optional dependency never breaks the chat path.
+    text, so a missing optional dependency never breaks the chat path. When a
+    session_id is provided AND the extraction succeeded, the FULL text is also
+    saved as a Document so the agent can page through it via
+    `manage_documents action=read offset=…` after the inline copy is capped.
    """
    from src.markitdown_runtime import (
        is_markitdown_format,
@@ -218,6 +227,46 @@ def _process_office_document(path: str, display_name: str) -> str:
    if markdown and markdown.strip():
        title = os.path.splitext(os.path.basename(path))[0]
        body, marker = _truncate_inline(markdown)
+
+        # Persist the full extracted text as a Document. The agent's existing
+        # manage_documents tool can then read past the inline cap with offset.
+        doc_id = None
+        if session_id:
+            try:
+                from src.office_doc import create_office_document
+                doc_id = create_office_document(
+                    session_id=session_id,
+                    upload_id=os.path.basename(path),
+                    title=title,
+                    body_text=markdown,
+                )
+                if doc_id and auto_opened_docs is not None:
+                    from src.database import SessionLocal, Document
+                    _db = SessionLocal()
+                    try:
+                        _d = _db.query(Document).filter(Document.id == doc_id).first()
+                        if _d:
+                            auto_opened_docs.append({
+                                "doc_id": _d.id,
+                                "title": _d.title,
+                                "language": _d.language,
+                                "content": _d.current_content,
+                                "version": _d.version_count,
+                            })
+                    finally:
+                        _db.close()
+            except Exception as e:
+                logger.warning("Office auto-doc creation failed for %s: %s", path, e)
+
+        # Upgrade the truncation marker with a hint pointing at the full doc so
+        # the agent knows it can read the rest.
+        if doc_id and marker:
+            marker = (
+                f"\n[…truncated for inline context — full {len(markdown):,} chars "
+                f"saved as document `{doc_id}`. Use `manage_documents` with "
+                f"action=read, document_id={doc_id}, offset=<N> to page through.]"
+            )
+
        return f"\n\n[Document content — {title}]:\n{body}{marker}"

    # No content: tell the user whether to install the optional dep or whether
@@ -521,7 +570,13 @@ def build_user_content(
            elif mime.startswith("text/") or _is_text_file(path):
                extracted_text = _process_text_file(path)
            else:
-                extracted_text = _process_office_document(path, display_name)
+                extracted_text = _process_office_document(
+                    path,
+                    display_name,
+                    session_id=session_id,
+                    auto_opened_docs=auto_opened_docs,
+                    owner=owner,
+                )

            extracted_text, inline_attachment_remaining = _fit_inline_attachment_text(
                extracted_text,
@@ -31,6 +31,8 @@ import numpy as np
 import httpx
 from typing import List, Optional

+from src.runtime_paths import get_app_root
+
 logger = logging.getLogger(__name__)

 _DEFAULT_MODEL = "all-minilm:l6-v2"
@@ -161,6 +161,32 @@ def normalize_base(url: str) -> str:
    return url


+def _validated_endpoint_base(url: str) -> str:
+    """Return a base URL that is safe for endpoint path appends."""
+    base = (url or "").strip().rstrip("/")
+    if "?" in base or "#" in base:
+        raise ValueError("Endpoint base URL must not include query or fragment")
+    return urlunparse(urlparse(base)._replace(query="", fragment="")).rstrip("/")
+
+
+def _prepare_endpoint_base(base: str) -> str:
+    base = _validated_endpoint_base(normalize_base(base))
+    return _validated_endpoint_base(normalize_base(resolve_url(base)))
+
+
+def _append_endpoint_path(base: str, suffix: str) -> str:
+    parsed = urlparse(base)
+    current = (parsed.path or "").rstrip("/")
+    extra = "/" + suffix.lstrip("/")
+    path = f"{current}{extra}" if current else extra
+    return urlunparse(parsed._replace(path=path, query="", fragment=""))
+
+
+def _pathless_host(base: str, host: str) -> bool:
+    parsed = urlparse(base)
+    return (parsed.hostname or "").lower() == host and not (parsed.path or "").strip("/")
+
+
 def _anthropic_api_root(base: str) -> str:
    """Return Anthropic's API root, preserving /v1 for OpenAI-compatible APIs elsewhere."""
    base = (base or "").strip().rstrip("/")
@@ -171,15 +197,17 @@ def _anthropic_api_root(base: str) -> str:

 def build_chat_url(base: str) -> str:
    """Return the correct chat endpoint URL for a given base."""
-    base = resolve_url(base)
+    base = _prepare_endpoint_base(base)
    provider = _detect_provider(base)
    if provider == "anthropic":
-        return _anthropic_api_root(base) + "/v1/messages"
+        return _append_endpoint_path(_anthropic_api_root(base), "/v1/messages")
    if provider == "ollama":
-        return _ollama_api_root(base) + "/chat"
+        return _append_endpoint_path(_ollama_api_root(base), "/chat")
    if provider == "chatgpt-subscription":
-        return base.rstrip("/") + "/responses"
-    return base + "/chat/completions"
+        return _append_endpoint_path(base, "/responses")
+    if _pathless_host(base, "api.openai.com"):
+        base = _append_endpoint_path(base, "/v1")
+    return _append_endpoint_path(base, "/chat/completions")


 def build_models_url(base: str) -> Optional[str]:
@@ -193,21 +221,25 @@ def build_models_url(base: str) -> Optional[str]:
    untouched (so custom prefixes like ``/openai`` or ``/api/openai/v1`` keep
    their semantics).
    """
-    base = normalize_base(resolve_url(base))
+    base = _prepare_endpoint_base(base)
    provider = _detect_provider(base)
    if provider == "anthropic":
-        return _anthropic_api_root(base) + "/v1/models"
+        return _append_endpoint_path(_anthropic_api_root(base), "/v1/models")
    if provider == "ollama":
-        return _ollama_api_root(base) + "/tags"
+        return _append_endpoint_path(_ollama_api_root(base), "/tags")
    if provider == "chatgpt-subscription":
        return None
-    # Generic OpenAI-compatible fallback: ensure the path lands on /v1/models
-    # when the user omitted a path entirely. If a non-empty path is already
-    # present (e.g. /openai, /api/openai/v1, /v1), trust the caller — the
-    # /models suffix is appended as-is and the caller's prefix is preserved.
-    if not urlparse(base).path:
-        base = base + "/v1"
-    return base + "/models"
+    # Generic OpenAI-compatible fallback: local model servers with no explicit
+    # path conventionally expose `/v1/models` (LM Studio, llama.cpp, vLLM).
+    # For non-local unknown hosts, do not invent `/v1`; append `/models` to the
+    # caller's base so look-alike provider hosts stay generic.
+    parsed = urlparse(base)
+    host = (parsed.hostname or "").lower()
+    is_local = host in {"localhost", "127.0.0.1", "::1", "host.docker.internal"}
+    uses_v1_models_by_default = is_local or host in {"api.deepseek.com", "api.openai.com"}
+    if not parsed.path and uses_v1_models_by_default:
+        base = _append_endpoint_path(base, "/v1")
+    return _append_endpoint_path(base, "/models")


 def build_headers(api_key: Optional[str], base: str) -> Dict[str, str]:
@@ -392,6 +424,9 @@ def resolve_utility_fallback_candidates(owner: Optional[str] = None) -> list:
        settings = load_settings()
        utility_ep = (get_user_setting("utility_endpoint_id", owner or "", settings.get("utility_endpoint_id", "")) or "").strip()
        if not utility_ep:
+            utility_chain = get_user_setting("utility_model_fallbacks", owner or "", settings.get("utility_model_fallbacks") or []) or []
+            if utility_chain:
+                return _resolve_fallback_candidates("utility_model_fallbacks", owner=owner)
            return _resolve_fallback_candidates("default_model_fallbacks", owner=owner)
    except Exception:
        pass
@@ -4,6 +4,7 @@ import uuid
 import logging
 import re
 from typing import Dict, List, Optional, Any
+from urllib.parse import urljoin, urlparse, urlunparse

 import httpx
 from fastapi import HTTPException
@@ -202,6 +203,22 @@ def mask_integration_secret(integration: Dict[str, Any]) -> Dict[str, Any]:
    return safe


+def _normalize_integration_base_url(base_url: Any) -> str:
+    if not isinstance(base_url, str) or not base_url.strip():
+        raise ValueError("Integration base URL is required")
+    cleaned = base_url.strip().rstrip("/")
+    if "?" in cleaned or "#" in cleaned:
+        raise ValueError("Integration base URL must not include query or fragment")
+    parsed = urlparse(cleaned)
+    if parsed.scheme.lower() not in ("http", "https") or not parsed.hostname:
+        raise ValueError("Integration base URL must be an HTTP(S) URL")
+    return urlunparse(parsed._replace(scheme=parsed.scheme.lower(), query="", fragment="")).rstrip("/")
+
+
+def _join_integration_url(base_url: str, path: str) -> str:
+    return urljoin(base_url.rstrip("/") + "/", path.lstrip("/"))
+
+
 def load_integrations() -> List[Dict[str, Any]]:
    """Load all integrations from disk with secrets decrypted for runtime use."""
    if not os.path.exists(DATA_FILE):
@@ -261,8 +278,10 @@ def add_integration(data: Dict[str, Any]) -> Dict[str, Any]:

    if not isinstance(integration.get("name"), str) or not integration["name"].strip():
        raise HTTPException(400, "Integration name is required")
-    if not isinstance(integration.get("base_url"), str) or not integration["base_url"].strip():
-        raise HTTPException(400, "Integration base URL is required")
+    try:
+        integration["base_url"] = _normalize_integration_base_url(integration.get("base_url"))
+    except ValueError as exc:
+        raise HTTPException(400, str(exc)) from exc

    integrations = load_integrations()
    integrations.append(integration)
@@ -272,10 +291,14 @@ def add_integration(data: Dict[str, Any]) -> Dict[str, Any]:

 def update_integration(integration_id: str, data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
    """Update fields on an existing integration. Returns updated integration or None."""
+    data = dict(data)
    if "name" in data and (not isinstance(data["name"], str) or not data["name"].strip()):
        raise HTTPException(400, "Integration name is required")
-    if "base_url" in data and (not isinstance(data["base_url"], str) or not data["base_url"].strip()):
-        raise HTTPException(400, "Integration base URL is required")
+    if "base_url" in data:
+        try:
+            data["base_url"] = _normalize_integration_base_url(data["base_url"])
+        except ValueError as exc:
+            raise HTTPException(400, str(exc)) from exc

    integrations = load_integrations()
    for item in integrations:
@@ -341,9 +364,10 @@ async def execute_api_call(
    if not integration.get("enabled", True):
        return {"error": f"Integration '{integration.get('name')}' is disabled", "exit_code": 1}

-    base_url = integration.get("base_url", "").rstrip("/")
-    if not base_url:
-        return {"error": "Integration has no base_url configured", "exit_code": 1}
+    try:
+        base_url = _normalize_integration_base_url(integration.get("base_url", ""))
+    except ValueError as exc:
+        return {"error": str(exc), "exit_code": 1}

    # Strip common API path suffixes users might accidentally include
    # (e.g. "http://host/v1/" → "http://host"). The integration's preset
@@ -366,7 +390,10 @@ async def execute_api_call(
    if re.search(r"^https?://", path) or "://" in path:
        return {"error": "Path must not contain a protocol scheme", "exit_code": 1}

-    url = base_url + path
+    if "#" in path:
+        return {"error": "Path must not contain a fragment", "exit_code": 1}
+
+    url = _join_integration_url(base_url, path)
    method = method.upper()

    # Build headers
@@ -87,7 +87,7 @@ _host_health_lock = threading.Lock()
 _model_activity: Dict[str, float] = {}

 _HARMONY_MARKER_RE = re.compile(
-    r"<\|channel\|>(analysis|final)"
+    r"<\|channel\|>(analysis|commentary|final)"
    r"|<\|start\|>(?:assistant|system|user|tool)?"
    r"|<\|message\|>"
    r"|<\|end\|>"
@@ -96,6 +96,7 @@ _HARMONY_MARKER_RE = re.compile(
 )
 _HARMONY_MARKERS = (
    "<|channel|>analysis",
+    "<|channel|>commentary",
    "<|channel|>final",
    "<|start|>assistant",
    "<|start|>system",
@@ -145,7 +146,10 @@ class _HarmonyStreamRouter:
            out.append((text, False))
            return
        if self._in_message:
-            out.append((text, self._channel == "analysis"))
+            # analysis + commentary (tool-call preambles / function-arg bodies)
+            # are internal, not user-facing — route them to thinking so they
+            # don't leak into the visible answer; only `final` is visible.
+            out.append((text, self._channel in ("analysis", "commentary")))

    def _handle_marker(self, match: re.Match[str]) -> None:
        marker = match.group(0)
@@ -283,7 +287,8 @@ def _is_ollama_native_url(url: str) -> bool:
    """Return True for native Ollama API URLs, including Ollama Cloud."""
    try:
        parsed = urlparse(url or "")
-    except Exception:
+    except Exception as e:
+        logger.warning("Failed to parse URL for Ollama detection", exc_info=e)
        return False
    host = parsed.hostname or ""
    path = (parsed.path or "").rstrip("/")
@@ -340,43 +345,102 @@ def _normalize_ollama_url(url: str) -> str:
    return base.rstrip("/") + "/chat"


-def _ollama_normalize_tool_messages(messages: List[Dict]) -> List[Dict]:
+def _ollama_normalize_messages(messages: List[Dict]) -> List[Dict]:
    """Adapt Odysseus' canonical OpenAI-style messages to native Ollama /api/chat.

-    Odysseus carries assistant tool calls in the OpenAI shape, where
-    `function.arguments` is a JSON *string*. Native Ollama expects it to be a
-    JSON *object*; given the string it fails the whole request with HTTP 400
-    "Value looks like object, but can't find closing '}' symbol", which aborts
-    every follow-up (tool-result) round. Parse the arguments back into an object
-    here, on a shallow copy, leaving non-tool messages untouched. The opaque
-    Gemini `extra_content` (thought_signature) is dropped — it is meaningless to
-    Ollama and only matters when the conversation is replayed to Gemini.
+    Two shape mismatches silently break requests:
+
+    1. Tool calls: Odysseus carries `function.arguments` as a JSON *string*.
+       Native Ollama expects a JSON *object* and rejects the string form with
+       HTTP 400 ("Value looks like object, but can't find closing '}' symbol"),
+       aborting every follow-up (tool-result) round. Parse the arguments back
+       into an object here, on a shallow copy, leaving non-tool messages
+       untouched. The opaque Gemini `extra_content` (thought_signature) is
+       dropped — it is meaningless to Ollama and only matters when the
+       conversation is replayed to Gemini.
+
+    2. Images (issue #4723): Odysseus carries multimodal user content as an
+       OpenAI-style list ``[{type: "text", ...}, {type: "image_url",
+       image_url: {url: "data:image/...;base64,XXX"}}, ...]``. Native Ollama
+       does not accept a list for ``content`` — it wants ``content`` as a
+       string plus a separate ``images`` array of raw base64 strings (no
+       ``data:`` prefix). Without this conversion the image blocks pass
+       through untouched, the vision-capable model never sees the picture,
+       and the user gets "I can't see any image" even though the request
+       succeeded.
    """
    out: List[Dict] = []
    for m in messages or []:
-        tcs = m.get("tool_calls") if isinstance(m, dict) else None
-        if not tcs:
+        if not isinstance(m, dict):
            out.append(m)
            continue
-        new_calls = []
-        for tc in tcs:
-            fn = tc.get("function") or {}
-            args = fn.get("arguments")
-            if isinstance(args, str):
-                try:
-                    args = json.loads(args) if args.strip() else {}
-                except (json.JSONDecodeError, TypeError):
-                    args = {}
-            call: Dict = {"function": {"name": fn.get("name", ""), "arguments": args or {}}}
-            if tc.get("id"):
-                call["id"] = tc["id"]
-            new_calls.append(call)
+
        nm = dict(m)
-        nm["tool_calls"] = new_calls
+
+        # 1. Tool-call argument strings -> objects.
+        tcs = nm.get("tool_calls")
+        if tcs:
+            new_calls = []
+            for tc in tcs:
+                fn = tc.get("function") or {}
+                args = fn.get("arguments")
+                if isinstance(args, str):
+                    try:
+                        args = json.loads(args) if args.strip() else {}
+                    except (json.JSONDecodeError, TypeError):
+                        args = {}
+                call: Dict = {"function": {"name": fn.get("name", ""), "arguments": args or {}}}
+                if tc.get("id"):
+                    call["id"] = tc["id"]
+                new_calls.append(call)
+            nm["tool_calls"] = new_calls
+
+        # 2. Multimodal content list -> native content string + images array.
+        content = nm.get("content")
+        if isinstance(content, list):
+            text_parts: List[str] = []
+            images: List[str] = list(nm.get("images") or [])
+            for block in content:
+                if not isinstance(block, dict):
+                    continue
+                btype = block.get("type")
+                if btype == "text":
+                    t = block.get("text")
+                    if t:
+                        text_parts.append(str(t))
+                elif btype == "image_url":
+                    url = (block.get("image_url") or {}).get("url", "")
+                    if not url:
+                        continue
+                    if url.startswith("data:"):
+                        # Strip the ``data:[...];base64,`` prefix — native
+                        # Ollama wants only the base64 bytes.
+                        _, _, b64 = url.partition(",")
+                        if b64:
+                            images.append(b64)
+                    else:
+                        # Native Ollama images[] is base64-only; it does
+                        # not fetch HTTP URLs.  Skip unsupported schemes
+                        # rather than sending a non-base64 string that the
+                        # model silently ignores.
+                        logger.warning(
+                            "Skipping non-data image_url (Ollama images[] "
+                            "requires base64): %s",
+                            url[:80],
+                        )
+            nm["content"] = "\n".join(text_parts).strip()
+            if images:
+                nm["images"] = images
+
        out.append(nm)
    return out


+# Backward-compatible alias for callers/tests that imported the older name
+# (it only handled tool messages originally — issue #4723 broadened scope).
+_ollama_normalize_tool_messages = _ollama_normalize_messages
+
+
 def _build_ollama_payload(
    model: str,
    messages: List[Dict],
@@ -399,7 +463,7 @@ def _build_ollama_payload(
    """
    payload: Dict = {
        "model": model,
-        "messages": _ollama_normalize_tool_messages(messages),
+        "messages": _ollama_normalize_messages(messages),
        "stream": stream,
    }
    options: Dict = {}
@@ -605,12 +669,16 @@ def _detect_provider(url: str) -> str:
        return "groq"
    if _host_match(url, "nvidia.com"):
        return "nvidia"
+    if _host_match(url, "moonshot.ai") or _host_match(url, "moonshot.cn"):
+        return "moonshot"
    from src.chatgpt_subscription import is_chatgpt_subscription_base
    if is_chatgpt_subscription_base(url):
        return "chatgpt-subscription"
    from src.copilot import is_copilot_base
    if is_copilot_base(url):
        return "copilot"
+    if _host_match(url, "mistral.ai"):
+        return "mistral"
    return "openai"


@@ -709,10 +777,17 @@ def _provider_label(url: str) -> str:
            pass
    if _is_ollama_native_url(url): return "Ollama"
    try:
-        host = (urlparse(url).hostname or "").lower()
+        _parsed_local = urlparse(url)
+        host = (_parsed_local.hostname or "").lower()
+        port = _parsed_local.port
    except Exception:
        return "provider"
    if host in {"localhost", "127.0.0.1", "::1", "0.0.0.0"}:
+        # A port alone is not authoritative: vLLM, SGLang, llama.cpp and plain
+        # OpenAI-compatible servers all routinely share 8000/8080, so naming the
+        # serving tool from the port here would mislabel real setups. The tool is
+        # identified by probing llama-server's native /props endpoint during
+        # discovery (see ModelDiscovery._fingerprint_provider); this stays neutral.
        return "local endpoint"
    return host or "provider"

@@ -856,6 +931,28 @@ def _restricts_temperature(model: str) -> bool:
    m = model.lower()
    return any(m.startswith(p) or f"/{p}" in m for p in _FIXED_TEMPERATURE_MODELS)

+
+# The official Moonshot API fixes temperature at 1.0 in thinking mode and 0.6
+# when thinking is explicitly disabled for Kimi K2.5/K2.6. Any other explicit
+# value returns HTTP 400. Odysseus does not currently send the `thinking` mode
+# control, so omit temperature and let Moonshot use its default thinking mode.
+# Keep the gate provider-specific: self-hosted Kimi deployments may accept
+# custom sampling values, and older Moonshot models have different defaults.
+def _moonshot_rejects_custom_temperature(provider: str, model: str) -> bool:
+    """Check if the official Moonshot API fixes temperature for this model."""
+    if provider != "moonshot" or not isinstance(model, str):
+        return False
+    model_id = model.lower().rsplit("/", 1)[-1]
+    return bool(re.match(r"^kimi-k2\.(?:5|6)(?:$|[-_:])", model_id))
+
+
+def _omit_temperature(provider: str, model: str) -> bool:
+    """Check if a request should use the provider's default temperature."""
+    return _restricts_temperature(model) or _moonshot_rejects_custom_temperature(
+        provider, model
+    )
+
+
 # Anthropic removed the sampling parameters (temperature, top_p, top_k) starting
 # with Claude Opus 4.7. On Opus 4.7 and later, sending `temperature` at all —
 # even 0.0 — returns HTTP 400. Earlier Claude models (Opus 4.6 and below, every
@@ -877,8 +974,18 @@ def _anthropic_rejects_temperature(model: str) -> bool:
        return False
    return (int(match.group(1)), int(match.group(2))) >= (4, 7)

+# Reasoning effort level sent to Mistral thinking-capable models. Mistral's
+# API accepts "high", "medium", "low", "none" — see
+# https://docs.mistral.ai/capabilities/reasoning/. Override via env var
+# ODYSSEUS_MISTRAL_REASONING_EFFORT (e.g. set to "medium" for cheaper chat).
+_MISTRAL_REASONING_EFFORT = os.getenv("ODYSSEUS_MISTRAL_REASONING_EFFORT", "high")
+
 # Models that support structured thinking — may output </think> without opening tag
-_THINKING_MODEL_PATTERNS = ("qwen3", "qwq", "deepseek-r1", "deepseek-reasoner", "minimax", "m2-reap", "gemma")
+_THINKING_MODEL_PATTERNS = (
+    "qwen3", "qwq", "deepseek-r1", "deepseek-reasoner", "minimax",
+    "m2-reap", "gemma", "stepfun", "step-3", "step3",
+    "magistral", "mistral-small", "mistral-medium",
+)

 def _supports_thinking(model: str) -> bool:
    """Check if model supports structured thinking output."""
@@ -887,6 +994,38 @@ def _supports_thinking(model: str) -> bool:
    m = model.lower()
    return any(p in m for p in _THINKING_MODEL_PATTERNS)

+def _normalize_mistral_content(content):
+    """Mistral returns content as a structured array when reasoning is on:
+        [{"type": "thinking", "thinking": [{"type": "text", "text": "..."}], "closed": true},
+         {"type": "text", "text": "...final answer..."}]
+    Convert to (text, thinking) tuple of plain strings. Pass through strings
+    unchanged so non-Mistral OpenAI-compat endpoints are unaffected.
+    """
+    if isinstance(content, str):
+        return content, ""
+    if not isinstance(content, list):
+        return "", ""
+    text_parts = []
+    thinking_parts = []
+    for block in content:
+        if not isinstance(block, dict):
+            continue
+        btype = block.get("type")
+        if btype == "text":
+            t = block.get("text", "")
+            if t:
+                text_parts.append(t)
+        elif btype == "thinking":
+            inner = block.get("thinking", [])
+            if isinstance(inner, list):
+                for tb in inner:
+                    if isinstance(tb, dict) and tb.get("text"):
+                        thinking_parts.append(tb["text"])
+            elif isinstance(inner, str):
+                thinking_parts.append(inner)
+    return "".join(text_parts), "".join(thinking_parts)
+
+
 def _convert_openai_content_to_anthropic(content):
    """Convert OpenAI multimodal content blocks to Anthropic format.

@@ -1321,8 +1460,8 @@ def list_model_ids(
                r = httpx.get(root + "/api/tags", timeout=timeout)
                r.raise_for_status()
                return [m.get("name") or m.get("model") for m in (r.json().get("models") or []) if m.get("name") or m.get("model")]
-        except Exception:
-            pass
+        except Exception as e:
+            logger.warning("Failed to fetch model list from configured endpoint", exc_info=e)
        return []

 def normalize_model_id(
@@ -1404,11 +1543,13 @@ def llm_call(url: str, model: str, messages: List[Dict], temperature: float = LL
            "messages": messages_copy,
            "temperature": temperature,
        }
-        if _restricts_temperature(model):
+        if _omit_temperature(provider, model):
            payload.pop("temperature", None)
        if max_tokens and max_tokens > 0:
            tok_key = "max_completion_tokens" if _uses_max_completion_tokens(model) else "max_tokens"
            payload[tok_key] = max_tokens
+        if provider == "mistral" and _supports_thinking(model):
+            payload["reasoning_effort"] = _MISTRAL_REASONING_EFFORT
    try:
        note_model_activity(target_url, model)
        r = httpx_post_kimi_aware(target_url, h, json=payload, timeout=timeout)
@@ -1424,7 +1565,16 @@ def llm_call(url: str, model: str, messages: List[Dict], temperature: float = LL
            response = _parse_ollama_response(data)
        else:
            msg = data["choices"][0]["message"]
-            response = msg.get("content") or msg.get("reasoning_content") or ""
+            content = msg.get("content")
+            if isinstance(content, list):
+                # Mistral structured content — extract thinking + text
+                text_part, thinking_part = _normalize_mistral_content(content)
+                if thinking_part:
+                    response = thinking_part + "\n\n" + (text_part or "")
+                else:
+                    response = text_part or msg.get("reasoning_content") or ""
+            else:
+                response = content or msg.get("reasoning_content") or ""
        _set_cached_response(cache_key, response)
        return response
    except Exception:
@@ -1598,7 +1748,7 @@ async def llm_call_async(
            "messages": messages_copy,
            "temperature": temperature,
        }
-        if _restricts_temperature(model):
+        if _omit_temperature(provider, model):
            payload.pop("temperature", None)
        if max_tokens and max_tokens > 0:
            tok_key = "max_completion_tokens" if _uses_max_completion_tokens(model) else "max_tokens"
@@ -1606,6 +1756,8 @@ async def llm_call_async(
        # Suppress thinking for qwen3/gemma4 on Ollama /v1 — same as stream_llm.
        if _is_ollama_openai_compat_url(url) and _supports_thinking(model):
            payload["think"] = False
+        if provider == "mistral" and _supports_thinking(model):
+            payload["reasoning_effort"] = _MISTRAL_REASONING_EFFORT
        _apply_local_cache_affinity(payload, url, session_id)

    if _is_host_dead(target_url):
@@ -1715,7 +1867,7 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
            "temperature": temperature,
            "stream": True,
        }
-        if _restricts_temperature(model):
+        if _omit_temperature(provider, model):
            payload.pop("temperature", None)
        if provider not in {"openrouter", "groq"}:
            payload["stream_options"] = {"include_usage": True}
@@ -1724,6 +1876,12 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
            payload[tok_key] = max_tokens
        if tools:
            payload["tools"] = tools
+        # Mistral thinking-capable models — send reasoning_effort so Mistral
+        # activates thinking mode and returns structured reasoning_content.
+        # Effort level is configurable via ODYSSEUS_MISTRAL_REASONING_EFFORT
+        # (high / medium / low / none); default "high".
+        if provider == "mistral" and _supports_thinking(model):
+            payload["reasoning_effort"] = _MISTRAL_REASONING_EFFORT
        # For Ollama's OpenAI-compat /v1 endpoint with thinking models (qwen3,
        # gemma4, etc.), suppress thinking so tool calls aren't swallowed inside
        # <think> blocks. Ollama /v1 accepts "think": false as a top-level param.
@@ -2102,10 +2260,20 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
                                        # Text content
                                        # Reasoning tokens (VLLM --reasoning-parser, e.g. Qwen3/DeepSeek-R1, Nemotron). vLLM 0.20.2 / NIM emit the field as `reasoning`; older builds use `reasoning_content`. Some OpenAI-compatible Ollama builds use `thinking`.
                                        reasoning = delta.get("reasoning_content") or delta.get("reasoning") or delta.get("thinking") or ""
+                                        content = delta.get("content") or ""
+                                        # Mistral structured content: content is a list of typed blocks
+                                        # ({"type": "thinking", ...}, {"type": "text", ...}). Split into
+                                        # reasoning + text so thinking streams into the thinking panel.
+                                        if isinstance(content, list):
+                                            text_part, thinking_part = _normalize_mistral_content(content)
+                                            if thinking_part:
+                                                reasoning = (reasoning + thinking_part) if reasoning else thinking_part
+                                            content = text_part
                                        if reasoning:
                                            yield _stream_delta_event(reasoning, thinking=True)
-                                        content = delta.get("content") or ""
                                        if content:
+                                            content = re.sub(r"<mm:think(\s+[^>]*)?>", r"<think\1>", content, flags=re.IGNORECASE)
+                                            content = re.sub(r"</mm:think>", "</think>", content, flags=re.IGNORECASE)
                                            stripped = content.lstrip()
                                            # gpt-oss harmony format (<|channel|>analysis/final): route via the harmony
                                            # stream router. Sticky once the first marker appears — distinct from the
@@ -40,15 +40,59 @@ def load_markitdown():
    return MarkItDown


+def _extract_docx_native(path: str) -> str | None:
+    """Pure-Python .docx text extractor — no external deps.
+
+    A .docx file is just a zip of XML. The body prose lives in <w:t> runs
+    inside <w:p> paragraphs. Iterating with ElementTree (rather than
+    re.findall) keeps paragraph breaks intact and lets the XML parser handle
+    namespaces + entity unescaping. Loses tables, footnotes, images and
+    list bullets — keeps ~95% of "summarize this doc" content, which is the
+    case people hit when markitdown isn't installed.
+    """
+    import zipfile
+    import xml.etree.ElementTree as ET
+
+    ns = "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}"
+    try:
+        with zipfile.ZipFile(path) as z:
+            xml_bytes = z.read("word/document.xml")
+    except (zipfile.BadZipFile, KeyError, OSError):
+        return None
+    try:
+        root = ET.fromstring(xml_bytes)
+    except ET.ParseError:
+        return None
+    paragraphs: list[str] = []
+    for para in root.iter(f"{ns}p"):
+        runs = [t.text or "" for t in para.iter(f"{ns}t")]
+        line = "".join(runs).strip()
+        if line:
+            paragraphs.append(line)
+    return "\n\n".join(paragraphs) if paragraphs else None
+
+
 def convert_to_markdown(path: str) -> str | None:
    """Convert a document to Markdown text via markitdown.

    Returns the extracted Markdown, or ``None`` if markitdown is unavailable or
    the conversion fails — callers degrade gracefully rather than erroring.
+
+    Fallback: when markitdown isn't installed and the file is a .docx, run
+    the bundled pure-Python extractor so the most common case (Word docs)
+    works out of the box. Other Office/EPUB formats still need markitdown.
    """
    try:
        markitdown_cls = load_markitdown()
    except RuntimeError:
+        if isinstance(path, str) and path.lower().endswith(".docx"):
+            text = _extract_docx_native(path)
+            if text:
+                logger.info(
+                    "markitdown not installed — used native .docx extractor for %s",
+                    path,
+                )
+                return text
        logger.warning("markitdown not installed; cannot extract %s", path)
        return None
    try:
@@ -11,6 +11,8 @@ import os
 import re
 from typing import Any, Dict, List, Optional, Set, Tuple

+from src.runtime_paths import get_app_root
+
 logger = logging.getLogger(__name__)

 def _format_mcp_connection_error(name: str, command: str = "", args: Optional[List[str]] = None, error: Exception = None) -> str:
@@ -508,7 +510,7 @@ class McpManager:
            return False

        script_rel, name = _BUILTIN_SERVERS[server_id]
-        base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+        base_dir = get_app_root()
        script_path = os.path.join(base_dir, script_rel)

        # Clean up old connection
@@ -17,10 +17,11 @@ import httpx
 logger = logging.getLogger(__name__)

 _LOCAL_HOSTS = {"localhost", "127.0.0.1", "0.0.0.0", "::1", "host.docker.internal"}
-_PRIVATE_PREFIXES = ("10.", "172.16.", "172.17.", "172.18.", "172.19.",
-                     "172.20.", "172.21.", "172.22.", "172.23.", "172.24.",
-                     "172.25.", "172.26.", "172.27.", "172.28.", "172.29.",
-                     "172.30.", "172.31.", "192.168.")
+_PRIVATE_NETWORKS = (
+    ipaddress.ip_network("10.0.0.0/8"),
+    ipaddress.ip_network("172.16.0.0/12"),
+    ipaddress.ip_network("192.168.0.0/16"),
+)

 # Tailscale uses the CGNAT range 100.64.0.0/10, NOT all of 100.0.0.0/8.
 # A bare "100." prefix would classify public addresses (e.g. AWS ranges
@@ -36,6 +37,14 @@ def _in_tailscale_range(host: str) -> bool:
        return False


+def _is_private_ip_literal(host: str) -> bool:
+    try:
+        ip = ipaddress.ip_address(host)
+    except ValueError:
+        return False
+    return any(ip in network for network in _PRIVATE_NETWORKS)
+
+
 def _normalize_base_for_compare(url: str) -> str:
    url = (url or "").strip().rstrip("/")
    for suffix in ("/chat/completions", "/models", "/completions", "/v1/messages"):
@@ -87,7 +96,7 @@ def is_local_endpoint(url: str) -> bool:
        return True
    try:
        host = urlparse(url).hostname or ""
-        return host in _LOCAL_HOSTS or host.startswith(_PRIVATE_PREFIXES) or _in_tailscale_range(host)
+        return host in _LOCAL_HOSTS or _is_private_ip_literal(host) or _in_tailscale_range(host)
    except Exception:
        return False

@@ -163,6 +163,21 @@ class ModelDiscovery:
                    return "lmstudio"
        except Exception:
            pass
+        # llama.cpp's llama-server exposes a native /props endpoint (no /v1 prefix)
+        # describing the loaded model, slots, and chat template — distinct from
+        # LM Studio (/api/v1/models) and vLLM (/version, /metrics).
+        try:
+            r = httpx.get(f"http://{host}:{port}/props", timeout=1.5)
+            if r.is_success:
+                props = r.json() or {}
+                if isinstance(props, dict) and (
+                    "default_generation_settings" in props
+                    or "total_slots" in props
+                    or "chat_template" in props
+                ):
+                    return "llamacpp"
+        except Exception:
+            pass
        return None

    def _check_port(self, host: str, port: int) -> Optional[Dict[str, Any]]:
@@ -194,10 +209,11 @@ class ModelDiscovery:

        logger.info(f"Scanning {len(hosts)} hosts for models: {hosts}")

-        # Well-known ports: 8000-8020 (vLLM, llama.cpp, SGLang, Cookbook),
-        # 1234 (LM Studio), 11434 (Ollama), 11435 for APFEL as its default port is
-        # occupied by Ollama. The env vars can add more ports which will be merged in.
-        ports = list(range(8000, 8021)) + [1234, 11434, 11435]
+        # Well-known ports: 8000-8020 (vLLM, SGLang, Cookbook), 8080 (llama.cpp /
+        # llama-server default), 1234 (LM Studio), 11434 (Ollama), 11435 for APFEL
+        # as its default port is occupied by Ollama. The env vars can add more
+        # ports which will be merged in.
+        ports = list(range(8000, 8021)) + [8080, 1234, 11434, 11435]
        ports += [p for p in sorted(self._extra_ports) if p not in ports]
        targets = [(h, p) for h in hosts for p in ports]

@@ -0,0 +1,73 @@
+"""Auto-create a Document row from an Office attachment.
+
+When a .docx (and friends) lands in chat, the full extracted text is stored
+as a Document so the agent can page through it with `manage_documents
+action=read offset=…` even after the inline chat payload was capped. Mirrors
+the PDF auto-doc pattern in `src.pdf_form_doc`.
+"""
+
+import logging
+import uuid
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+
+def create_office_document(
+    session_id: str,
+    upload_id: str,
+    title: str,
+    body_text: Optional[str] = None,
+) -> Optional[str]:
+    """Create a markdown Document for an Office attachment and set it active.
+
+    Returns the new doc_id, or None on failure / empty body. The full
+    extracted body lives in `current_content`, so the agent can fetch
+    arbitrary windows via `manage_documents action=read` even when the
+    inline chat copy was truncated.
+    """
+    from src.database import (
+        SessionLocal,
+        Document,
+        DocumentVersion,
+        Session as DbSession,
+    )
+    from src.agent_tools.document_tools import set_active_document
+
+    if not body_text or not body_text.strip():
+        return None
+
+    db = SessionLocal()
+    try:
+        doc_id = str(uuid.uuid4())
+        ver_id = str(uuid.uuid4())
+        sess = db.query(DbSession).filter(DbSession.id == session_id).first()
+        doc = Document(
+            id=doc_id,
+            session_id=session_id,
+            title=title,
+            language="markdown",
+            current_content=body_text,
+            version_count=1,
+            is_active=True,
+            owner=sess.owner if sess else None,
+        )
+        ver = DocumentVersion(
+            id=ver_id,
+            document_id=doc_id,
+            version_number=1,
+            content=body_text,
+            summary="Imported from Office attachment",
+            source="upload",
+        )
+        db.add(doc)
+        db.add(ver)
+        db.commit()
+        set_active_document(doc_id)
+        return doc_id
+    except Exception as e:
+        db.rollback()
+        logger.error("Failed to create office document: %s", e)
+        return None
+    finally:
+        db.close()
@@ -322,6 +322,47 @@ class PersonalDocsManager:
        else:
            logger.info(f"Directory not in index: {directory}")

+    def rename_directory(self, old_directory: str, new_directory: str, *, path_map: Dict[str, str] = None):
+        """Rewrite tracked directory and excluded-file paths after an owner rename."""
+        old_directory = os.path.abspath(old_directory)
+        new_directory = os.path.abspath(new_directory)
+        path_map = {os.path.abspath(k): os.path.abspath(v) for k, v in (path_map or {}).items()}
+
+        def rewrite(path: str) -> str:
+            abs_path = os.path.abspath(path)
+            mapped = path_map.get(abs_path)
+            if mapped:
+                return mapped
+            if abs_path == old_directory:
+                return new_directory
+            if abs_path.startswith(old_directory + os.sep):
+                return new_directory + abs_path[len(old_directory):]
+            return abs_path
+
+        changed_dirs = False
+        rewritten_dirs = []
+        for directory in self.indexed_directories:
+            rewritten = rewrite(directory)
+            changed_dirs = changed_dirs or rewritten != os.path.abspath(directory)
+            if rewritten not in rewritten_dirs:
+                rewritten_dirs.append(rewritten)
+        if changed_dirs:
+            self.indexed_directories = rewritten_dirs
+            self.save_directories()
+
+        changed_excluded = False
+        rewritten_excluded = set()
+        for path in self.excluded_files:
+            rewritten = rewrite(path)
+            changed_excluded = changed_excluded or rewritten != os.path.abspath(path)
+            rewritten_excluded.add(rewritten)
+        if changed_excluded:
+            self.excluded_files = rewritten_excluded
+            self._save_excluded()
+
+        if changed_dirs or changed_excluded:
+            self.refresh_index()
+
    def get_indexed_directories(self):
        """Get the list of all indexed directories."""
        return self.indexed_directories.copy()
@@ -7,6 +7,7 @@ import time
 from pathlib import Path

 from src.constants import RAG_DIR
+from src.runtime_paths import get_app_root

 logger = logging.getLogger(__name__)

@@ -50,6 +50,23 @@ def _generate_doc_id(text: str, owner: str = "") -> str:
    return f"doc_{hashlib.sha256(key.encode('utf-8')).hexdigest()[:16]}"


+def _rewrite_owner_path(value: str, path_map: Dict[str, str], path_prefixes: List[tuple]) -> str:
+    if not isinstance(value, str) or not value:
+        return value
+    abs_value = os.path.abspath(value)
+    mapped = path_map.get(abs_value)
+    if mapped:
+        return mapped
+    for old_prefix, new_prefix in path_prefixes:
+        old_abs = os.path.abspath(old_prefix)
+        new_abs = os.path.abspath(new_prefix)
+        if abs_value == old_abs:
+            return new_abs
+        if abs_value.startswith(old_abs + os.sep):
+            return new_abs + abs_value[len(old_abs):]
+    return value
+
+
 class VectorRAG:
    """RAG system using ChromaDB vector storage with hybrid search."""

@@ -250,6 +267,75 @@ class VectorRAG:
            "failed_count": len(docs) - len(valid),
        }

+    def rename_owner(
+        self,
+        old_owner: str,
+        new_owner: str,
+        *,
+        path_map: Optional[Dict[str, str]] = None,
+        path_prefixes: Optional[List[tuple]] = None,
+    ) -> Dict[str, Any]:
+        """Rewrite existing RAG metadata after an auth username rename."""
+        if not self.healthy:
+            return {"success": False, "updated_count": 0, "message": "Collection not initialized"}
+
+        old_owner = (old_owner or "").strip().lower()
+        new_owner = (new_owner or "").strip().lower()
+        if not old_owner or not new_owner or old_owner == new_owner:
+            return {"success": True, "updated_count": 0, "message": "No owner rename needed"}
+
+        path_map = {os.path.abspath(k): os.path.abspath(v) for k, v in (path_map or {}).items()}
+        path_prefixes = path_prefixes or []
+        updated_ids = set()
+        failed_count = 0
+
+        for lane_name, collection in self._collections_for_delete():
+            try:
+                results = collection.get(
+                    where={"owner": old_owner},
+                    include=["metadatas"],
+                )
+            except Exception as e:
+                logger.warning("rename_owner metadata scan failed in %s lane: %s", lane_name, e)
+                failed_count += 1
+                continue
+
+            ids = results.get("ids") or []
+            metadatas = results.get("metadatas") or []
+            if not ids:
+                continue
+
+            new_metas = []
+            selected_ids = []
+            for doc_id, meta in zip(ids, metadatas):
+                if not isinstance(meta, dict):
+                    continue
+                next_meta = dict(meta)
+                if str(next_meta.get("owner", "")).strip().lower() == old_owner:
+                    next_meta["owner"] = new_owner
+                for key in ("source", "directory"):
+                    next_meta[key] = _rewrite_owner_path(next_meta.get(key), path_map, path_prefixes)
+                selected_ids.append(doc_id)
+                new_metas.append(next_meta)
+
+            if not selected_ids:
+                continue
+
+            try:
+                collection.update(ids=selected_ids, metadatas=new_metas)
+                updated_ids.update(selected_ids)
+            except Exception as e:
+                logger.warning("rename_owner metadata update failed in %s lane: %s", lane_name, e)
+                failed_count += len(selected_ids)
+
+        success = failed_count == 0
+        return {
+            "success": success,
+            "updated_count": len(updated_ids),
+            "failed_count": failed_count,
+            "message": f"Updated {len(updated_ids)} RAG chunk(s)",
+        }
+
    # ------------------------------------------------------------------
    # Search — hybrid: vector similarity + keyword overlap
    # ------------------------------------------------------------------
@@ -0,0 +1,78 @@
+"""Server-side mirror of the built-in characters used for reminder synthesis.
+
+The frontend ships these in static/js/presets.js (PROMPT_TEMPLATES with
+isCharacter:true). The Reminders → AI Synthesis card writes only the
+persona ID into settings; the synthesis route in note_routes.py needs
+the full prompt text to bias the utility model's voice. Keeping a small
+local mirror avoids having the client send the prompt over the wire on
+every reminder fire.
+
+If the user picks a custom character (id == "custom") we fall back to
+the warm-neutral baseline — custom prompts live in browser localStorage
+and aren't visible to the server.
+"""
+
+PERSONAS = {
+    "socrates": (
+        "Never answer directly. Respond only with questions — sharp, layered, "
+        "Socratic. Expose contradictions. Make the person argue with themselves "
+        "until the truth falls out. Use irony like a scalpel. Be genuinely "
+        "curious, never condescending."
+    ),
+    "razor": (
+        "Strip everything to the bone. No filler, no hedging, no pleasantries. "
+        "Answer in the fewest words possible. If one sentence works, don't use "
+        "two. If a word adds nothing, cut it. Blunt, precise, surgical."
+    ),
+    "nietzsche": (
+        "Think and respond through the lens of Nietzsche. Analyze every "
+        "question in terms of will to power, self-overcoming, eternal "
+        "recurrence, ressentiment, value-creation, and master-slave morality. "
+        "Write with aphoristic force — sharp, compressed, vivid, and "
+        "unapologetic — but do not sacrifice depth for style. Favor "
+        "life-affirmation, discipline, courage, style, rank, self-overcoming, "
+        "and amor fati over nihilism, conformity, ressentiment, and self-pity."
+    ),
+    "spark": (
+        "You are Spark, a playful, quick-witted assistant with bright energy "
+        "and practical instincts. Keep responses concise, vivid, and helpful. "
+        "Be warm without being cloying, imaginative without losing the thread, "
+        "and always center the user's actual goal. Use a light, lively voice "
+        "with occasional clever turns of phrase."
+    ),
+    "odysseus": (
+        "You are Odysseus, king of Ithaca — subtle in counsel, disciplined in "
+        "judgment, and unmatched in strategic cunning. Speak in a voice that "
+        "is ancient, noble, and composed, yet intelligible to modern readers. "
+        "Be eloquent but not flowery. Be wise but not vague. Speak as one who "
+        "has weathered storms and taken back his house by wit, timing, and "
+        "resolve."
+    ),
+}
+
+
+_DEFAULT_SYNTHESIS_TONE = (
+    "You write short, warm, one-line reminders. The user has set a note for "
+    "themselves and the moment to remember has arrived. Keep it under 18 "
+    "words. Be human, gentle, and direct — never robotic."
+)
+
+
+def synthesis_system_prompt(persona_id: str) -> str:
+    """Return the system prompt for reminder synthesis given a persona id.
+
+    Falls back to the warm-neutral baseline when the id is empty, unknown,
+    or refers to a custom (client-only) character we don't have on file.
+    """
+    persona = (persona_id or "").strip().lower()
+    persona_prompt = PERSONAS.get(persona)
+    if persona_prompt:
+        # Persona drives the voice; the synthesis-instruction stays attached
+        # so the model knows it's writing a short reminder, not a chat reply.
+        return (
+            persona_prompt
+            + "\n\n"
+            + "You are now writing a single one-line reminder for the user. "
+              "Keep it under 18 words and in the voice above."
+        )
+    return _DEFAULT_SYNTHESIS_TONE
@@ -0,0 +1,30 @@
+"""Helpers for resolving runtime paths in source and frozen builds."""
+
+import os
+import sys
+
+
+def get_app_root() -> str:
+    """Return the app root directory.
+
+    In normal source runs, this is the repository root. In a frozen Windows
+    build, it is the bundle content root (PyInstaller's internal directory)
+    so bundled runtime folders like `static/`, `scripts/`, and `data/` stay
+    together with the executable payload.
+    """
+    if getattr(sys, "frozen", False):
+        return getattr(sys, "_MEIPASS", os.path.dirname(os.path.abspath(sys.executable)))
+    return os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+
+def get_default_data_dir() -> str:
+    """Return the default path to the data directory.
+
+    In normal runs, this is a 'data' subdirectory under the app root.
+    In frozen builds, it is a persistent user directory (~/.odysseus/data)
+    to prevent SQLite databases and other persistent files from being
+    written to the ephemeral, temporary extraction bundle directory.
+    """
+    if getattr(sys, "frozen", False):
+        return os.path.join(os.path.expanduser("~"), ".odysseus", "data")
+    return os.path.join(get_app_root(), "data")
@@ -29,7 +29,15 @@ def _invalidate_caches():
 # ── Default values ──

 DEFAULT_SETTINGS = {
-    "image_gen_enabled": True,
+    # Agent email safety: when True, the MCP send_email / reply_to_email
+    # tools don't SMTP directly. They stage the composed message into the
+    # scheduled_emails table with status='agent_draft' and return a
+    # pending_id + the rendered email so the user can review and approve
+    # (or cancel) before it actually goes out. Default ON because models
+    # have been observed inventing signatures and sending to real
+    # recipients without confirmation.
+    "agent_email_confirm": True,
+    "image_gen_enabled": False,
    "image_model": "",
    "image_quality": "medium",
    "vision_model": "",
@@ -133,6 +141,10 @@ DEFAULT_SETTINGS = {
    # before producing output (endpoint offline / errors), the chat
    # dispatch retries the next entry in order.
    "default_model_fallbacks": [],
+    # When True, non-admin users inherit global default model/endpoint/fallbacks
+    # when they have no personal defaults. When False, users only use their
+    # personal defaults (no global fallback). Default is False.
+    "share_defaults_with_users": False,
    "utility_endpoint_id": "",
    "utility_model": "",
    # Ordered fallback chain for the Utility model (summarization, naming,
@@ -151,6 +163,7 @@ DEFAULT_SETTINGS = {
    # Reminders
    "reminder_channel": "browser",   # "browser" | "email" | "ntfy" | "webhook"
    "reminder_llm_synthesis": False,
+    "reminder_llm_persona": "",
    "reminder_ntfy_topic": "Reminders",
    "reminder_email_to": "",
    # Generic outbound webhook channel: pick any saved Integration as the
@@ -1,6 +1,11 @@
-"""Shared resolver for background-task AI endpoint (auto-naming, memory, sorting)."""
+"""Shared resolver for background-task AI endpoints."""

-from src.endpoint_resolver import resolve_endpoint
+from src.endpoint_resolver import (
+    resolve_chat_fallback_candidates,
+    resolve_endpoint,
+    resolve_utility_fallback_candidates,
+)
+from src.llm_core import llm_call_async_with_fallback


 def resolve_task_endpoint(fallback_url=None, fallback_model=None, fallback_headers=None, owner=None):
@@ -11,3 +16,60 @@ def resolve_task_endpoint(fallback_url=None, fallback_model=None, fallback_heade
    endpoint cannot be resolved.
    """
    return resolve_endpoint("task", fallback_url, fallback_model, fallback_headers, owner=owner)
+
+
+def resolve_task_candidates(
+    fallback_url=None,
+    fallback_model=None,
+    fallback_headers=None,
+    owner=None,
+):
+    """Return ordered background-task LLM candidates.
+
+    Order:
+    1. configured Background Tasks endpoint/model, or caller fallback
+    2. Utility endpoint/model
+    3. Default endpoint/model
+    4. Utility fallback chain
+    5. Default fallback chain
+    """
+    candidates = []
+
+    def _append(url, model, headers):
+        if not url or not model:
+            return
+        key = (url, model)
+        if any((u, m) == key for u, m, _ in candidates):
+            return
+        candidates.append((url, model, headers or {}))
+
+    _append(*resolve_task_endpoint(fallback_url, fallback_model, fallback_headers, owner=owner))
+    _append(*resolve_endpoint("utility", owner=owner))
+    _append(*resolve_endpoint("default", owner=owner))
+    for url, model, headers in resolve_utility_fallback_candidates(owner=owner):
+        _append(url, model, headers)
+    for url, model, headers in resolve_chat_fallback_candidates(owner=owner):
+        _append(url, model, headers)
+
+    return candidates
+
+
+async def task_llm_call_async(
+    messages,
+    *,
+    fallback_url=None,
+    fallback_model=None,
+    fallback_headers=None,
+    owner=None,
+    **kwargs,
+):
+    """Call the shared background-task LLM candidate chain."""
+    candidates = resolve_task_candidates(
+        fallback_url=fallback_url,
+        fallback_model=fallback_model,
+        fallback_headers=fallback_headers,
+        owner=owner,
+    )
+    if not candidates:
+        raise RuntimeError("No LLM endpoint available for background task")
+    return await llm_call_async_with_fallback(candidates, messages=messages, **kwargs)
@@ -9,6 +9,8 @@ import uuid
 from datetime import datetime, timedelta, timezone
 from typing import Any, Awaitable, Callable, Dict, Tuple

+from core.auth import RESERVED_USERNAMES
+
 logger = logging.getLogger(__name__)


@@ -17,6 +19,34 @@ def _utcnow() -> datetime:
    return datetime.now(timezone.utc).replace(tzinfo=None)


+# Shell/file tools a scheduled task's agent should be offered by default,
+# mirroring the chat agent (where these are on unless a privilege or global
+# setting turns them off). The RAG tool selector + ASSISTANT_ALWAYS_AVAILABLE
+# never include bash/python, so on a host with an empty/degraded tool-embedding
+# index a task could not run shell or Python even for an admin owner. Offering
+# them here is safe: stream_agent_loop's blocked_tools_for_owner() still strips
+# this whole group for non-admin multi-user owners, and only admits it for
+# admins and single-user (AUTH_ENABLED=false) deployments.
+TASK_DEFAULT_SHELL_TOOLS = frozenset({
+    "bash", "python", "read_file", "write_file", "edit_file",
+    "grep", "glob", "ls", "get_workspace",
+})
+
+
+def compose_task_relevant_tools(rag_tools, assistant_always, disabled_tools):
+    """Compose the relevant-tools set offered to a scheduled task's agent.
+
+    Unions the RAG-retrieved tools, the assistant's always-available set, and
+    the default shell/file group, then removes anything the task's crew
+    explicitly disabled via its `enabled_tools` allowlist. Per-owner admin
+    gating is applied later by stream_agent_loop (blocked_tools_for_owner).
+    """
+    tools = set(rag_tools) | set(assistant_always) | set(TASK_DEFAULT_SHELL_TOOLS)
+    if disabled_tools:
+        tools -= set(disabled_tools)
+    return tools
+
+
 # ── Shared TTL cache (singleflight) ────────────────────────────────────────
 # Multiple scheduled tasks firing in the same minute often need the same
 # external data (Miniflux unreads, MCP tool snapshots, etc.). This cache
@@ -236,6 +266,29 @@ def _digest_windows(now):
    ]


+def _checkin_calendar_events(db, owner, start, end):
+    """Calendar events in [start, end] for ONE owner, for the check-in digest.
+
+    Ownership lives on CalendarCal.owner; events inherit it via calendar_id.
+    The digest query had no owner scope, so it pulled EVERY user's events into
+    one user's check-in (a cross-tenant leak of summaries/locations). Scope it
+    by joining CalendarCal, mirroring routes/calendar_routes.list_events.
+    """
+    from core.database import CalendarEvent as _CE, CalendarCal as _CC
+    return (
+        db.query(_CE)
+        .join(_CC, _CE.calendar_id == _CC.id)
+        .filter(
+            _CC.owner == owner,
+            _CE.dtstart >= start,
+            _CE.dtstart <= end,
+            _CE.status != "cancelled",
+        )
+        .order_by(_CE.dtstart)
+        .all()
+    )
+
+
 class TaskScheduler:
    def __init__(self, session_manager):
        self._session_manager = session_manager
@@ -833,6 +886,14 @@ class TaskScheduler:
                    owner=task.owner,
                    body=run.result if output == "notification" else None,
                )
+            elif run.status == "error":
+                self.add_notification(
+                    task.name,
+                    "error",
+                    task_id,
+                    owner=task.owner,
+                    body=run.error or run.result,
+                )

            # Log result to the assistant chat so all task activity is visible.
            # Skip skipped/error rows — user shouldn't see "skipped: …" noise
@@ -1127,11 +1188,7 @@ class TaskScheduler:
                    # Strip timezone for naive DB comparison
                    _s = start.replace(tzinfo=None) if start.tzinfo else start
                    _e = end.replace(tzinfo=None) if end.tzinfo else end
-                    evs = _db.query(_CE).filter(
-                        _CE.dtstart >= _s,
-                        _CE.dtstart <= _e,
-                        _CE.status != "cancelled",
-                    ).order_by(_CE.dtstart).all()
+                    evs = _checkin_calendar_events(_db, task.owner, _s, _e)
                    if not evs:
                        continue
                    # Group by importance for richer output
@@ -1338,11 +1395,24 @@ class TaskScheduler:
            return await self._execute_checkin(task, crew, db, session_id, endpoint_url, model)

        # Build system prompt: crew member persona overrides the default.
+        # Built-in character_id (Socrates, Razor, etc.) further biases the
+        # voice — it prepends to whichever base prompt we landed on so the
+        # task still knows it's executing a scheduled task but in that
+        # character's tone.
        system_prompt = (
            (crew.personality or "").strip()
            if crew and crew.personality
            else "You are a helpful assistant executing a scheduled task. Use available tools to complete the task thoroughly."
        )
+        char_id = (getattr(task, "character_id", None) or "").strip()
+        if char_id:
+            try:
+                from src.reminder_personas import PERSONAS as _PERSONAS
+                char_prompt = _PERSONAS.get(char_id.lower())
+                if char_prompt:
+                    system_prompt = f"{char_prompt}\n\n{system_prompt}"
+            except Exception:
+                pass
        # Inject current time so the model knows what's past vs upcoming
        tz_name = _resolve_task_timezone(db, task)
        try:
@@ -1357,17 +1427,30 @@ class TaskScheduler:
            time_str = _utcnow().strftime("%A, %B %d %Y, %H:%M UTC")
        system_prompt = f"Current time: {time_str}\n\n{system_prompt}"

-        # Compute tool filter from CrewMember.enabled_tools if set
-        disabled_tools = None
+        # Compute the disabled-tools set: the crew's enabled_tools allowlist
+        # (inverted) plus the operator's global disabled_tools setting. The
+        # global list must be merged here — chat does the same merge before
+        # entering the agent loop (routes/chat_routes.py) — otherwise an admin
+        # or AUTH_ENABLED=false scheduled task would still see and call shell/
+        # file tools after the operator disabled them globally, because the
+        # prompt/schema/execution gates only enforce what is passed in.
+        disabled_tools: set[str] = set()
        if crew and crew.enabled_tools:
            try:
                enabled = json.loads(crew.enabled_tools)
                if isinstance(enabled, list) and enabled:
                    from src.tool_index import BUILTIN_TOOL_DESCRIPTIONS
                    all_tools = set(BUILTIN_TOOL_DESCRIPTIONS.keys())
-                    disabled_tools = all_tools - set(enabled)
+                    disabled_tools |= all_tools - set(enabled)
            except Exception:
                pass
+        try:
+            from src.settings import get_setting
+            _global_disabled = get_setting("disabled_tools", [])
+            if isinstance(_global_disabled, list):
+                disabled_tools.update(_global_disabled)
+        except Exception:
+            pass

        # RAG-select relevant tools for this prompt + always-available assistant tools.
        # Without this, all 40+ tools get sent and models hit their tool limit.
@@ -1377,10 +1460,10 @@ class TaskScheduler:
            tool_idx = get_tool_index()
            if tool_idx:
                rag_tools = tool_idx.get_tools_for_query(task.prompt or "", k=8)
-                relevant_tools = (rag_tools | ASSISTANT_ALWAYS_AVAILABLE)
-                if disabled_tools:
-                    relevant_tools -= disabled_tools
-                logger.info(f"[assistant] RAG selected {len(rag_tools)} tools + {len(ASSISTANT_ALWAYS_AVAILABLE)} always-available = {len(relevant_tools)} total for '{task.name}'")
+                relevant_tools = compose_task_relevant_tools(
+                    rag_tools, ASSISTANT_ALWAYS_AVAILABLE, disabled_tools
+                )
+                logger.info(f"[assistant] RAG selected {len(rag_tools)} tools + {len(ASSISTANT_ALWAYS_AVAILABLE)} always-available + shell/file defaults = {len(relevant_tools)} total for '{task.name}'")
        except Exception as e:
            logger.warning(f"[assistant] RAG tool selection failed, using all: {e}")

@@ -1388,17 +1471,23 @@ class TaskScheduler:
        try:
            result = await self._run_agent_loop(
                endpoint_url, model, task, session_id,
-                system_prompt=system_prompt, disabled_tools=disabled_tools,
+                system_prompt=system_prompt, disabled_tools=disabled_tools or None,
                relevant_tools=relevant_tools,
            )
        except Exception as e:
            logger.warning(f"Agent loop failed for task '{task.name}', falling back to simple call: {e}")
-            from src.llm_core import llm_call_async
+            from src.task_endpoint import task_llm_call_async
            messages = [
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": task.prompt},
            ]
-            result = await llm_call_async(url=endpoint_url, model=model, messages=messages, timeout=120)
+            result = await task_llm_call_async(
+                messages,
+                fallback_url=endpoint_url,
+                fallback_model=model,
+                owner=task.owner,
+                timeout=120,
+            )

        # Strip the model's chain-of-thought before saving/delivering. Task
        # output is LLM-only, so prose=True (which also removes untagged
@@ -1578,7 +1667,7 @@ class TaskScheduler:
            msg["X-Odysseus-Ref"] = str(task.id)
            msg.set_content(result or "")
            _send_smtp_message(cfg, from_addr, [to_addr], msg.as_string(), timeout=30)
-            logger.info("Task %s emailed result to %s (%sb)", task.id, to_addr, len(result or ""))
+            logger.info("Task %s emailed result (recipient_set=%s, %sb)", task.id, bool(to_addr), len(result or ""))
        except Exception as e:
            logger.error("Task %s email delivery failed: %s", task.id, e, exc_info=True)
            raise
@@ -1623,13 +1712,17 @@ class TaskScheduler:
        # Honor per-task max_steps (defense against runaway agent loops).
        # Falls back to 20 if not set — the historical default.
        _task_max_rounds = task.max_steps if task.max_steps and task.max_steps > 0 else 20
-        # Tasks are background workloads — they share the Utility model's
-        # fallback chain (Settings → Utility Model → Fallbacks). A downed
-        # primary endpoint won't silently yield `(no output)` — same recipe
-        # chat uses but with the utility list (`utility_model_fallbacks`).
+        # Tasks are background workloads: use the shared task fallback chain
+        # behind the primary endpoint so a downed primary won't silently yield
+        # `(no output)`.
        try:
-            from src.endpoint_resolver import resolve_utility_fallback_candidates
-            _task_fallbacks = resolve_utility_fallback_candidates(owner=task.owner or None)
+            from src.task_endpoint import resolve_task_candidates
+            _task_fallbacks = resolve_task_candidates(
+                fallback_url=endpoint_url,
+                fallback_model=model,
+                fallback_headers=headers,
+                owner=task.owner or None,
+            )[1:]
        except Exception:
            _task_fallbacks = []
        async for event_str in stream_agent_loop(
@@ -1666,21 +1759,22 @@ class TaskScheduler:
        # asking it to summarize what it did. Guarantees output.
        if not full_text.strip():
            try:
-                from src.llm_core import llm_call_async_with_fallback
-                from src.endpoint_resolver import resolve_utility_fallback_candidates
+                from src.task_endpoint import task_llm_call_async
                grace_context = "You ran out of steps. "
                if tool_results:
                    grace_context += "Here's what your tools returned:\n" + "\n".join(tool_results[-5:])
                else:
                    grace_context += "No tool results were captured."
                grace_context += "\n\nSummarize what you accomplished and what's still pending. Be concise."
-                _grace_candidates = [(endpoint_url, model, headers)] + resolve_utility_fallback_candidates(owner=task.owner or None)
-                full_text = await llm_call_async_with_fallback(
-                    _grace_candidates,
+                full_text = await task_llm_call_async(
                    messages=[
                        {"role": "system", "content": system_content},
                        {"role": "user", "content": grace_context},
                    ],
+                    fallback_url=endpoint_url,
+                    fallback_model=model,
+                    fallback_headers=headers,
+                    owner=task.owner or None,
                    timeout=30,
                )
                full_text = (full_text or "").strip()
@@ -1935,7 +2029,7 @@ class TaskScheduler:
                # silent SMTP failure is easier to spot in the logs.
                logger.info(
                    f"Task {task.id} delivered via MCP tool {tool_name} "
-                    f"(to={recipient or '<unset>'}, body={body_len}b, reply={stdout[:200]!r})"
+                    f"(recipient_set={bool(recipient)}, body={body_len}b, reply={stdout[:200]!r})"
                )
        except Exception as e:
            logger.error(f"Task {task.id} MCP delivery failed: {e}")
@@ -2189,7 +2283,7 @@ class TaskScheduler:
        # check-ins seeded, which then double-fire alongside the human user's
        # check-ins. This was the root cause of the duplicate 'Morning check-in'
        # rows we had to manually clean up.
-        if not owner or owner in {"internal-tool", "api", "demo", "system"}:
+        if not owner or owner in RESERVED_USERNAMES:
            logger.info(f"ensure_assistant_defaults: skip synthetic owner {owner!r}")
            return
        from core.database import SessionLocal, CrewMember, ScheduledTask
@@ -323,6 +323,24 @@ _MCP_TOOL_MAP = {
    "web_fetch":      ("web_fetch",  "web_fetch"),
    "generate_image": ("image_gen",  "generate_image"),
 }
+_EMAIL_MCP_OWNER_ARG = "_odysseus_owner"
+
+
+def _parse_qualified_mcp_args(tool: str, content: str) -> tuple[Dict, Optional[str]]:
+    raw = (content or "").strip()
+    if not raw:
+        return {}, None
+    try:
+        parsed = json.loads(raw)
+    except (json.JSONDecodeError, TypeError):
+        if tool.startswith("mcp__email__"):
+            return {}, "Email MCP tool arguments must be a JSON object."
+        return {}, None
+    if not isinstance(parsed, dict):
+        if tool.startswith("mcp__email__"):
+            return {}, "Email MCP tool arguments must be a JSON object."
+        return {}, None
+    return parsed, None


 def _parse_generate_image(content: str) -> Dict:
@@ -453,6 +471,8 @@ async def _direct_fallback(
    tool: str,
    content: str,
    progress_cb: Optional[Callable[[Dict], Awaitable[None]]] = None,
+    session_id: Optional[str] = None,
+    owner: Optional[str] = None,
 ) -> Optional[Dict]:
    _subproc_env = {
        **os.environ,
@@ -466,6 +486,8 @@ async def _direct_fallback(
        ctx = {
            "progress_cb": progress_cb,
            "subproc_env": _subproc_env,
+            "session_id": session_id,
+            "owner": owner,
        }

        from src.agent_tools import TOOL_HANDLERS
@@ -541,9 +563,7 @@ async def _execute_tool_block_impl(
    """
    from src.tool_implementations import (
        do_search_chats, do_manage_tasks,
-        do_manage_skills, do_api_call, do_manage_endpoints,
-        do_manage_mcp, do_manage_webhooks, do_manage_tokens,
-        do_manage_settings, do_manage_notes,
+        do_manage_skills, do_api_call, do_manage_notes,
        do_manage_calendar,
        do_download_model, do_serve_model, do_list_served_models, do_stop_served_model,
        do_tail_serve_output,
@@ -713,10 +733,13 @@ async def _execute_tool_block_impl(
            desc = f"bash (background): {short}"
            result = {
                "output": (
-                    f"Started background job `{rec['id']}`. It is running detached — "
+                    f"Started background job `{rec['id']}`. It is running detached; "
                    f"do NOT wait for it or poll it. You will be automatically re-invoked "
                    f"with its full output when it finishes. Continue with other work, or "
-                    f"end your turn now and resume when the result arrives."
+                    f"end your turn now and resume when the result arrives. If the user "
+                    f"later asks to check progress or stop it, call the manage_bg_jobs "
+                    f"tool yourself (output or kill); do not tell them to run a tool "
+                    f"command, and do not surface raw tool syntax in your reply."
                ),
                "exit_code": 0,
                "bg_job_id": rec["id"],
@@ -737,6 +760,11 @@ async def _execute_tool_block_impl(
        desc = f"{tool}: {first_line}"
        result = await _direct_fallback(tool, content, progress_cb=progress_cb) \
            or {"error": f"{tool}: execution failed", "exit_code": 1}
+    elif tool == "manage_bg_jobs":
+        # Inspect/kill detached `bash` jobs; needs session_id to scope to chat.
+        desc = f"manage_bg_jobs: {content.split(chr(10))[0][:80]}"
+        result = await _direct_fallback(tool, content, session_id=session_id, owner=owner) \
+            or {"error": "manage_bg_jobs: execution failed", "exit_code": 1}
    elif tool in ("create_document", "update_document", "edit_document",
                  "suggest_document", "manage_documents"):
        desc = f"{tool}: {content.split(chr(10))[0][:80]}"
@@ -748,10 +776,24 @@ async def _execute_tool_block_impl(
        query = content.split("\n")[0].strip()
        desc = f"search_chats: {query[:80]}"
        result = await do_search_chats(query, owner=owner)
-    elif tool in ("chat_with_model", "create_session", "list_sessions",
-                  "send_to_session", "pipeline",
-                  "manage_session", "manage_memory", "list_models",
-                  "ui_control", "ask_teacher"):
+    elif tool in ("chat_with_model", "ask_teacher", "list_models"):
+        # Migrated to the agent_tools registry (#3629): dispatched through
+        # TOOL_HANDLERS with the owner/session ctx these tools need, instead
+        # of the legacy dispatch_ai_tool elif. The impls live in
+        # src/agent_tools/model_interaction_tools.py.
+        first_line = content.split(chr(10))[0].strip()[:60]
+        desc = f"{tool}: {first_line}" if first_line else tool
+        result = await _document_tool_dispatch(tool, content, session_id, owner) \
+            or {"error": f"{tool}: execution failed", "exit_code": 1}
+    elif tool in ("create_session", "list_sessions", "send_to_session", "manage_session"):
+        # Migrated to the agent_tools registry (#3629): dispatched through
+        # TOOL_HANDLERS with the owner/session ctx these tools need. The impls
+        # live in src/agent_tools/session_tools.py.
+        first_line = content.split(chr(10))[0].strip()[:60]
+        desc = f"{tool}: {first_line}" if first_line else tool
+        result = await _document_tool_dispatch(tool, content, session_id, owner) \
+            or {"error": f"{tool}: execution failed", "exit_code": 1}
+    elif tool in ("pipeline", "manage_memory", "ui_control"):
        from src.ai_interaction import dispatch_ai_tool
        desc, result = await dispatch_ai_tool(tool, content, session_id, owner=owner)
    elif tool == "manage_tasks":
@@ -764,21 +806,11 @@ async def _execute_tool_block_impl(
        first_line = content.split("\n")[0].strip()[:60]
        desc = f"api_call: {first_line}"
        result = await do_api_call(content)
-    elif tool == "manage_endpoints":
-        desc = "manage_endpoints"
-        result = await do_manage_endpoints(content, owner=owner)
-    elif tool == "manage_mcp":
-        desc = "manage_mcp"
-        result = await do_manage_mcp(content, owner=owner)
-    elif tool == "manage_webhooks":
-        desc = "manage_webhooks"
-        result = await do_manage_webhooks(content, owner=owner)
-    elif tool == "manage_tokens":
-        desc = "manage_tokens"
-        result = await do_manage_tokens(content, owner=owner)
-    elif tool == "manage_settings":
-        desc = "manage_settings"
-        result = await do_manage_settings(content, owner=owner)
+    elif tool in ("manage_endpoints", "manage_mcp", "manage_webhooks", "manage_tokens", "manage_settings"):
+        # Registry-dispatched (agent_tools.admin_tools); owner threaded for ownership/admin checks.
+        desc = tool
+        result = await _direct_fallback(tool, content, owner=owner) \
+            or {"error": f"{tool}: execution failed", "exit_code": 1}
    elif tool == "manage_notes":
        desc = "manage_notes"
        result = await do_manage_notes(content, owner=owner)
@@ -858,12 +890,15 @@ async def _execute_tool_block_impl(
        # MCP tool dispatch
        mcp = get_mcp_manager()
        if mcp:
-            try:
-                args = json.loads(content) if content.strip().startswith("{") else {}
-            except (json.JSONDecodeError, TypeError):
-                args = {}
            desc = f"mcp: {tool}"
-            result = await mcp.call_tool(tool, args)
+            args, parse_error = _parse_qualified_mcp_args(tool, content)
+            if parse_error:
+                result = {"error": parse_error, "exit_code": 1}
+            else:
+                if tool.startswith("mcp__email__") and owner:
+                    args = dict(args)
+                    args[_EMAIL_MCP_OWNER_ARG] = owner
+                result = await mcp.call_tool(tool, args)
        else:
            desc = f"mcp: {tool}"
            result = {"error": "MCP manager not available", "exit_code": 1}
@@ -12,48 +12,62 @@ import os
 import re
 from typing import Any, Dict, List, Optional

+from fastapi import HTTPException
 from src.constants import MAX_READ_CHARS, DEEP_RESEARCH_DIR, VAULT_FILE
-from src.tool_utils import get_mcp_manager
+from src.tool_utils import get_mcp_manager, _parse_tool_args
 from core.constants import internal_api_base
+from routes._validators import validate_remote_host, validate_ssh_port

 logger = logging.getLogger(__name__)

+
+def _string_arg(value: Any) -> str:
+    return "" if value is None else str(value).strip()
+
+
+def _validate_cookbook_ssh_target(remote_host: Any, ssh_port: Any = "") -> tuple[str, str]:
+    remote = validate_remote_host(_string_arg(remote_host) or None) or ""
+    sport = validate_ssh_port(_string_arg(ssh_port) or None) or ""
+    return remote, sport
+
+# ---------------------------------------------------------------------------
+# Active email state
+# ---------------------------------------------------------------------------
+
+# When the user has an email reader window open, the frontend tells the
+# backend about it on each chat submit. Email tools can resolve "this email"
+# without guessing a UID. Cleared between requests by chat_routes.
+_active_email_ref: Optional[Dict[str, str]] = None
+
+
+def set_active_email(uid: Optional[str], folder: Optional[str] = None, account: Optional[str] = None,
+                     subject: Optional[str] = None, sender: Optional[str] = None) -> None:
+    """Stash the email currently open in the UI. None clears it."""
+    global _active_email_ref
+    if not uid:
+        _active_email_ref = None
+        return
+    _active_email_ref = {
+        "uid": str(uid),
+        "folder": str(folder or "INBOX"),
+        "account": str(account or ""),
+        "subject": str(subject or ""),
+        "from": str(sender or ""),
+    }
+
+
+def get_active_email() -> Optional[Dict[str, str]]:
+    return _active_email_ref
+
+
+def clear_active_email() -> None:
+    global _active_email_ref
+    _active_email_ref = None
+
 # ---------------------------------------------------------------------------
 # Argument parsing
 # ---------------------------------------------------------------------------

-def _parse_tool_args(content):
-    """Parse a tool-call argument blob.
-
-    Accepts either a JSON string or an already-decoded dict. Unwraps the
-    common `{"body": {...}}` envelope that smaller models emit when they
-    read tool descriptions like "Body is JSON: {...}" literally — they
-    pass `body` as a field name rather than treating it as a noun.
-
-    Returns a dict on success, raises ValueError on bad JSON.
-    """
-    if isinstance(content, str):
-        try:
-            args = json.loads(content) if content.strip() else {}
-        except (json.JSONDecodeError, TypeError) as e:
-            raise ValueError(str(e))
-    elif isinstance(content, dict):
-        args = content
-    else:
-        args = {}
-    # Unwrap {"body": {...}} envelope — but only if `body` is the sole key
-    # and points at a dict. We don't want to clobber a legitimate `body`
-    # field on tools where it's a real arg (e.g. send_email body text).
-    if (
-        isinstance(args, dict)
-        and len(args) == 1
-        and "body" in args
-        and isinstance(args["body"], dict)
-        and "action" in args["body"]  # extra safety: only unwrap if the inner dict looks like a tool call
-    ):
-        args = args["body"]
-    return args
-
 # ---------------------------------------------------------------------------
 # Search chats
 # ---------------------------------------------------------------------------
@@ -542,620 +556,6 @@ async def do_manage_tasks(content: str, owner: Optional[str] = None) -> Dict:
        db.close()


-# ---------------------------------------------------------------------------
-# Endpoint management tool
-# ---------------------------------------------------------------------------
-
-async def do_manage_endpoints(content: str, owner: Optional[str] = None) -> Dict:
-    """Manage model endpoints: list, add, delete, enable, disable."""
-    from core.database import SessionLocal, ModelEndpoint
-    try:
-        args = _parse_tool_args(content)
-    except ValueError:
-        return {"error": "Invalid JSON arguments", "exit_code": 1}
-
-    action = args.get("action", "list")
-    db = SessionLocal()
-    try:
-        if action == "list":
-            eps = db.query(ModelEndpoint).all()
-            items = [{"id": e.id, "name": e.name, "base_url": e.base_url,
-                       "is_enabled": e.is_enabled} for e in eps]
-            return {"response": f"{len(items)} endpoints", "endpoints": items, "exit_code": 0}
-
-        elif action == "add":
-            import uuid as _uuid
-            name = args.get("name", "")
-            base_url = args.get("base_url", "")
-            api_key = args.get("api_key", "")
-            if not base_url:
-                return {"error": "base_url is required", "exit_code": 1}
-            eid = str(_uuid.uuid4())[:8]
-            from datetime import datetime
-            ep = ModelEndpoint(id=eid, name=name or base_url, base_url=base_url,
-                               api_key=api_key, is_enabled=True,
-                               created_at=datetime.utcnow(), updated_at=datetime.utcnow())
-            db.add(ep)
-            db.commit()
-            return {"response": f"Added endpoint '{name or base_url}' (id: {eid})", "exit_code": 0}
-
-        elif action == "delete":
-            eid = args.get("endpoint_id", "")
-            ep = db.query(ModelEndpoint).filter(ModelEndpoint.id == eid).first()
-            if not ep:
-                return {"error": f"Endpoint {eid} not found", "exit_code": 1}
-            name = ep.name
-            db.delete(ep)
-            db.commit()
-            return {"response": f"Deleted endpoint '{name}'", "exit_code": 0}
-
-        elif action in ("enable", "disable"):
-            eid = args.get("endpoint_id", "")
-            ep = db.query(ModelEndpoint).filter(ModelEndpoint.id == eid).first()
-            if not ep:
-                return {"error": f"Endpoint {eid} not found", "exit_code": 1}
-            ep.is_enabled = (action == "enable")
-            db.commit()
-            return {"response": f"Endpoint '{ep.name}' {action}d", "exit_code": 0}
-
-        else:
-            return {"error": f"Unknown action: {action}", "exit_code": 1}
-    except Exception as e:
-        logger.error(f"manage_endpoints error: {e}")
-        return {"error": str(e), "exit_code": 1}
-    finally:
-        db.close()
-
-
-# ---------------------------------------------------------------------------
-# MCP server management tool
-# ---------------------------------------------------------------------------
-
-async def do_manage_mcp(content: str, owner: Optional[str] = None) -> Dict:
-    """Manage MCP servers: list, add, delete, enable, disable, reconnect."""
-    try:
-        args = _parse_tool_args(content)
-    except ValueError:
-        return {"error": "Invalid JSON arguments", "exit_code": 1}
-
-    action = args.get("action", "list")
-
-    if action == "list":
-        mcp = get_mcp_manager()
-        if not mcp:
-            return {"response": "No MCP manager available", "servers": [], "exit_code": 0}
-        from core.database import SessionLocal, McpServer
-        db = SessionLocal()
-        try:
-            servers = db.query(McpServer).all()
-            items = []
-            for s in servers:
-                st = mcp.get_server_status(s.id)
-                status = st.get("status", "disconnected")
-                tool_count = st.get("tool_count", 0)
-                items.append({"id": s.id, "name": s.name, "transport": s.transport,
-                              "is_enabled": s.is_enabled, "status": status,
-                              "tool_count": tool_count})
-            return {"response": f"{len(items)} MCP servers", "servers": items, "exit_code": 0}
-        finally:
-            db.close()
-
-    elif action == "add":
-        from core.database import SessionLocal, McpServer
-        import uuid as _uuid
-        from datetime import datetime
-        name = args.get("name", "")
-        command = args.get("command", "")
-        cmd_args = args.get("args", [])
-        env = args.get("env", {})
-        if not name or not command:
-            return {"error": "name and command are required", "exit_code": 1}
-        sid = str(_uuid.uuid4())[:8]
-        db = SessionLocal()
-        try:
-            srv = McpServer(id=sid, name=name, transport="stdio", command=command,
-                            args=json.dumps(cmd_args) if isinstance(cmd_args, list) else cmd_args,
-                            env=json.dumps(env) if isinstance(env, dict) else env,
-                            is_enabled=True, created_at=datetime.utcnow(), updated_at=datetime.utcnow())
-            db.add(srv)
-            db.commit()
-        finally:
-            db.close()
-        # Try to connect
-        mcp = get_mcp_manager()
-        tool_count = 0
-        if mcp:
-            try:
-                await mcp.connect_server(
-                    sid, name, "stdio", command=command,
-                    args=cmd_args if isinstance(cmd_args, list) else json.loads(cmd_args),
-                    env=env if isinstance(env, dict) else json.loads(env),
-                )
-                st = mcp.get_server_status(sid)
-                tool_count = st.get("tool_count", 0)
-            except Exception as e:
-                logger.warning(f"MCP connect failed for {name}: {e}")
-        return {"response": f"Added MCP server '{name}' ({tool_count} tools)", "exit_code": 0}
-
-    elif action == "delete":
-        sid = args.get("server_id", "")
-        from core.database import SessionLocal, McpServer
-        db = SessionLocal()
-        try:
-            srv = db.query(McpServer).filter(McpServer.id == sid).first()
-            if not srv:
-                return {"error": f"Server {sid} not found", "exit_code": 1}
-            name = srv.name
-            mcp = get_mcp_manager()
-            if mcp:
-                try:
-                    await mcp.disconnect_server(sid)
-                except Exception:
-                    pass
-            db.delete(srv)
-            db.commit()
-            return {"response": f"Deleted MCP server '{name}'", "exit_code": 0}
-        finally:
-            db.close()
-
-    elif action == "reconnect":
-        sid = args.get("server_id", "")
-        mcp = get_mcp_manager()
-        if not mcp:
-            return {"error": "MCP manager not available", "exit_code": 1}
-        try:
-            await mcp.disconnect_server(sid)
-            from core.database import SessionLocal, McpServer
-            db2 = SessionLocal()
-            try:
-                srv = db2.query(McpServer).filter(McpServer.id == sid).first()
-                if srv:
-                    _args = json.loads(srv.args) if srv.args else []
-                    _env = json.loads(srv.env) if srv.env else {}
-                    await mcp.connect_server(
-                        server_id=sid,
-                        name=srv.name,
-                        transport=srv.transport,
-                        command=srv.command,
-                        args=_args,
-                        env=_env,
-                        url=srv.url,
-                    )
-                    st = mcp.get_server_status(sid)
-                    return {"response": f"Reconnected '{srv.name}' ({st.get('tool_count', 0)} tools)", "exit_code": 0}
-                return {"error": f"Server {sid} not found", "exit_code": 1}
-            finally:
-                db2.close()
-        except Exception as e:
-            return {"error": str(e), "exit_code": 1}
-
-    elif action in ("enable", "disable"):
-        sid = args.get("server_id", "")
-        from core.database import SessionLocal, McpServer
-        db = SessionLocal()
-        try:
-            srv = db.query(McpServer).filter(McpServer.id == sid).first()
-            if not srv:
-                return {"error": f"Server {sid} not found", "exit_code": 1}
-            srv.is_enabled = (action == "enable")
-            db.commit()
-            return {"response": f"MCP server '{srv.name}' {action}d", "exit_code": 0}
-        finally:
-            db.close()
-
-    elif action == "list_tools":
-        mcp = get_mcp_manager()
-        if not mcp:
-            return {"response": "No MCP manager", "tools": [], "exit_code": 0}
-        tools = mcp.get_all_tools()
-        items = [{"name": t["name"], "server": t["server_name"],
-                  "description": t.get("description", "")[:100]} for t in tools]
-        return {"response": f"{len(items)} MCP tools available", "tools": items, "exit_code": 0}
-
-    else:
-        return {"error": f"Unknown action: {action}", "exit_code": 1}
-
-
-# ---------------------------------------------------------------------------
-# Webhook management tool
-# ---------------------------------------------------------------------------
-
-async def do_manage_webhooks(content: str, owner: Optional[str] = None) -> Dict:
-    """Manage webhooks: list, add, delete, enable, disable, test."""
-    from core.database import SessionLocal
-    try:
-        args = _parse_tool_args(content)
-    except ValueError:
-        return {"error": "Invalid JSON arguments", "exit_code": 1}
-
-    action = args.get("action", "list")
-    db = SessionLocal()
-    try:
-        from core.database import Webhook
-        if action == "list":
-            hooks = db.query(Webhook).all()
-            items = [{"id": h.id, "name": h.name, "url": h.url,
-                       "events": h.events, "is_active": h.is_active} for h in hooks]
-            return {"response": f"{len(items)} webhooks", "webhooks": items, "exit_code": 0}
-
-        elif action == "add":
-            import uuid as _uuid
-            from datetime import datetime
-            from src.webhook_manager import validate_events, validate_webhook_url
-            name = args.get("name", "")
-            url = args.get("url", "")
-            events = args.get("events", "chat.completed")
-            if not url:
-                return {"error": "url is required", "exit_code": 1}
-            try:
-                url = validate_webhook_url(url)
-                events = validate_events(events)
-            except ValueError as e:
-                return {"error": str(e), "exit_code": 1}
-            wid = str(_uuid.uuid4())[:8]
-            hook = Webhook(id=wid, name=name or url, url=url,
-                           events=events, is_active=True,
-                           created_at=datetime.utcnow(), updated_at=datetime.utcnow())
-            db.add(hook)
-            db.commit()
-            return {"response": f"Added webhook '{name or url}'", "exit_code": 0}
-
-        elif action == "delete":
-            wid = args.get("webhook_id", "")
-            hook = db.query(Webhook).filter(Webhook.id == wid).first()
-            if not hook:
-                return {"error": f"Webhook {wid} not found", "exit_code": 1}
-            name = hook.name
-            db.delete(hook)
-            db.commit()
-            return {"response": f"Deleted webhook '{name}'", "exit_code": 0}
-
-        elif action in ("enable", "disable"):
-            wid = args.get("webhook_id", "")
-            hook = db.query(Webhook).filter(Webhook.id == wid).first()
-            if not hook:
-                return {"error": f"Webhook {wid} not found", "exit_code": 1}
-            hook.is_active = (action == "enable")
-            db.commit()
-            return {"response": f"Webhook '{hook.name}' {action}d", "exit_code": 0}
-
-        else:
-            return {"error": f"Unknown action: {action}", "exit_code": 1}
-    except Exception as e:
-        logger.error(f"manage_webhooks error: {e}")
-        return {"error": str(e), "exit_code": 1}
-    finally:
-        db.close()
-
-
-# ---------------------------------------------------------------------------
-# API token management tool
-# ---------------------------------------------------------------------------
-
-async def do_manage_tokens(content: str, owner: Optional[str] = None) -> Dict:
-    """Manage API tokens: list, create, delete."""
-    from core.database import SessionLocal, ApiToken
-    try:
-        args = _parse_tool_args(content)
-    except ValueError:
-        return {"error": "Invalid JSON arguments", "exit_code": 1}
-
-    action = args.get("action", "list")
-    db = SessionLocal()
-    try:
-        if action == "list":
-            tokens = db.query(ApiToken).all()
-            items = [{"id": t.id, "name": t.name, "token_prefix": t.token_prefix + "...",
-                       "is_active": t.is_active} for t in tokens]
-            return {"response": f"{len(items)} API tokens", "tokens": items, "exit_code": 0}
-
-        elif action == "create":
-            import uuid as _uuid, secrets, bcrypt
-            from datetime import datetime
-            name = args.get("name", "API Token")
-            raw_token = secrets.token_urlsafe(32)
-            token_hash = bcrypt.hashpw(raw_token.encode(), bcrypt.gensalt()).decode()
-            tid = str(_uuid.uuid4())[:8]
-            t = ApiToken(id=tid, name=name, token_hash=token_hash,
-                         token_prefix=raw_token[:8], is_active=True,
-                         created_at=datetime.utcnow(), updated_at=datetime.utcnow())
-            db.add(t)
-            db.commit()
-            return {"response": f"Created token '{name}'", "token": raw_token, "exit_code": 0}
-
-        elif action == "delete":
-            tid = args.get("token_id", "")
-            t = db.query(ApiToken).filter(ApiToken.id == tid).first()
-            if not t:
-                return {"error": f"Token {tid} not found", "exit_code": 1}
-            name = t.name
-            db.delete(t)
-            db.commit()
-            return {"response": f"Deleted token '{name}'", "exit_code": 0}
-
-        else:
-            return {"error": f"Unknown action: {action}", "exit_code": 1}
-    except Exception as e:
-        logger.error(f"manage_tokens error: {e}")
-        return {"error": str(e), "exit_code": 1}
-    finally:
-        db.close()
-
-# ---------------------------------------------------------------------------
-# Settings/preferences management tool
-# ---------------------------------------------------------------------------
-
-async def do_manage_settings(content: str, owner: Optional[str] = None) -> Dict:
-    """Manage user settings and preferences."""
-    try:
-        args = _parse_tool_args(content)
-    except ValueError:
-        return {"error": "Invalid JSON arguments", "exit_code": 1}
-
-    action = args.get("action", "list")
-
-    from core.database import SessionLocal
-    db = SessionLocal()
-    try:
-        # set/get/list/delete operate on the REAL app settings (the same store
-        # the Settings panel writes), so changing a model / voice / search
-        # engine / reminder channel from chat actually takes effect.
-        from src.settings import load_settings, save_settings, DEFAULT_SETTINGS
-
-        # Secrets/credentials the agent must NOT write — kept read-only (masked)
-        # so API keys never flow through chat. User sets these in the panel.
-        _SECRET_KEYS = {
-            "brave_api_key", "google_pse_key", "google_pse_cx",
-            "tavily_api_key", "serper_api_key", "app_public_url",
-        }
-        def _is_secret(k):
-            # `token` must be a suffix, not a substring: otherwise the int
-            # setting `agent_input_token_budget` (which even has a "token budget"
-            # alias to set it from chat) is wrongly classified as a credential.
-            return (
-                k in _SECRET_KEYS
-                or k.endswith("token")
-                or any(t in k for t in ("api_key", "_key", "secret", "password"))
-            )
-
-        # Friendly aliases → real keys, so natural phrasing resolves.
-        _ALIASES_SET = {
-            "voice": "tts_voice", "tts voice": "tts_voice", "tts": "tts_enabled",
-            "text to speech": "tts_enabled", "tts provider": "tts_provider",
-            "speech speed": "tts_speed", "voice speed": "tts_speed",
-            "stt": "stt_enabled", "speech to text": "stt_enabled", "transcription": "stt_enabled",
-            "search engine": "search_provider", "search provider": "search_provider",
-            "search results": "search_result_count", "result count": "search_result_count",
-            "default model": "default_model", "chat model": "default_model",
-            "default endpoint": "default_endpoint_id",
-            "task model": "task_model", "background model": "task_model",
-            "teacher model": "teacher_model", "teacher": "teacher_enabled",
-            "utility model": "utility_model", "research model": "research_model",
-            "research max tokens": "research_max_tokens",
-            "vision model": "vision_model", "vision": "vision_enabled",
-            "image model": "image_model", "image quality": "image_quality",
-            "image gen": "image_gen_enabled", "image generation": "image_gen_enabled",
-            "reminder channel": "reminder_channel", "reminders": "reminder_channel",
-            "ntfy topic": "reminder_ntfy_topic",
-            "webhook integration": "reminder_webhook_integration_id",
-            "webhook template": "reminder_webhook_payload_template", "webhook payload": "reminder_webhook_payload_template",
-            "agent tool calls": "agent_max_tool_calls", "max tool calls": "agent_max_tool_calls",
-            "agent timeout": "agent_stream_timeout_seconds", "stream timeout": "agent_stream_timeout_seconds",
-            "token budget": "agent_input_token_budget", "input budget": "agent_input_token_budget",
-            "hard max": "agent_input_token_hard_max",
-            "token budget cap": "agent_input_token_hard_max",
-            "input budget cap": "agent_input_token_hard_max",
-        }
-        def _resolve(k):
-            k2 = (k or "").strip().lower()
-            if k2 in DEFAULT_SETTINGS:
-                return k2
-            return _ALIASES_SET.get(k2, (k or "").strip())
-
-        _ENUMS = {
-            "image_quality": ["low", "medium", "high"],
-            "reminder_channel": ["browser", "email", "ntfy", "webhook"],
-        }
-        def _coerce(value, default):
-            if isinstance(default, bool):
-                return value if isinstance(value, bool) else str(value).strip().lower() in ("true", "on", "yes", "1", "enable", "enabled")
-            if isinstance(default, int):
-                return int(value)
-            return value
-
-        def _model_slug(value: str) -> str:
-            import re as _re
-            return _re.sub(r"[^a-z0-9]+", "", (value or "").lower())
-
-        def _endpoint_model_from_cache(model_query: str):
-            """Resolve friendly model text to an enabled endpoint + real model id.
-
-            The Settings UI stores both `<prefix>_endpoint_id` and
-            `<prefix>_model`; writing only the model leaves the runtime on the
-            old endpoint. Prefer cached model lists so this stays fast/offline.
-            """
-            import json as _json
-            import re as _re
-            from core.database import ModelEndpoint
-
-            wanted = (model_query or "").strip()
-            wanted_slug = _model_slug(wanted)
-            wanted_tokens = [_model_slug(t) for t in _re.findall(r"[A-Za-z0-9]+", wanted)]
-            wanted_tokens = [t for t in wanted_tokens if t]
-            if not wanted_slug:
-                return None
-            best = None
-            for ep in db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True).all():
-                raw_models = []
-                try:
-                    raw_models = _json.loads(ep.cached_models or "[]") or []
-                except Exception:
-                    raw_models = []
-                # If cache is empty, still allow matching against endpoint name
-                # for callers using model@endpoint elsewhere later.
-                for mid in raw_models:
-                    mid = str(mid)
-                    mid_slug = _model_slug(mid)
-                    if not mid_slug:
-                        continue
-                    exact = mid.lower() == wanted.lower()
-                    compact_match = wanted_slug in mid_slug or mid_slug in wanted_slug
-                    token_match = bool(wanted_tokens) and all(tok in mid_slug for tok in wanted_tokens)
-                    if exact or compact_match or token_match:
-                        score = 3 if exact else (2 if compact_match else 1)
-                        if not best or score > best[0]:
-                            best = (score, ep.id, mid)
-            if best:
-                return {"endpoint_id": best[1], "model": best[2]}
-            return None
-
-        def _mask(k, v):
-            return "••••• (set in panel)" if _is_secret(k) and v else v
-
-        if action == "list":
-            s = load_settings()
-            shown = {k: _mask(k, v) for k, v in s.items() if k in DEFAULT_SETTINGS and not isinstance(v, dict)}
-            return {"response": f"{len(shown)} settings (use get/set with a key)", "settings": shown, "exit_code": 0}
-
-        elif action == "get":
-            key = _resolve(args.get("key", ""))
-            if not key:
-                return {"error": "key is required", "exit_code": 1}
-            if key not in DEFAULT_SETTINGS:
-                return {"error": f"Unknown setting '{args.get('key')}'. Use action='list' to see them.", "exit_code": 1}
-            val = load_settings().get(key, DEFAULT_SETTINGS.get(key))
-            return {"response": f"{key} = {_mask(key, val)}", "value": _mask(key, val), "exit_code": 0}
-
-        elif action == "set":
-            raw = args.get("key", "")
-            value = args.get("value")
-            if not raw:
-                return {"error": "key is required", "exit_code": 1}
-            key = _resolve(raw)
-            if key not in DEFAULT_SETTINGS:
-                return {"error": f"Unknown setting '{raw}'. Use action='list' to see available settings.", "exit_code": 1}
-            if _is_secret(key):
-                return {"response": f"'{key}' is a credential/secret — for security I can't set it from chat. Open Settings and set it there.", "exit_code": 0}
-            # Structured settings (dicts/lists like keybinds, default_model_fallbacks)
-            # have no safe scalar coercion — _coerce would pass a bare string
-            # straight through and clobber the structure. Refuse them here; they're
-            # edited in their dedicated panels. (reset/delete still restore the
-            # default structure, which is safe.)
-            if isinstance(DEFAULT_SETTINGS[key], (dict, list)):
-                return {"response": f"'{key}' is a structured setting — edit it in its panel, not from chat. (You can reset it to default here.)", "exit_code": 0}
-            try:
-                value = _coerce(value, DEFAULT_SETTINGS[key])
-            except (ValueError, TypeError):
-                return {"error": f"'{value}' isn't a valid value for {key} (expected {type(DEFAULT_SETTINGS[key]).__name__}).", "exit_code": 1}
-            if key in _ENUMS and str(value).lower() not in _ENUMS[key]:
-                return {"error": f"{key} must be one of: {', '.join(_ENUMS[key])}.", "exit_code": 1}
-            s = load_settings()
-            s[key] = value
-            if key in {"default_model", "research_model", "utility_model", "task_model", "vision_model", "image_model"}:
-                resolved = _endpoint_model_from_cache(str(value))
-                if resolved:
-                    prefix = key[:-6]
-                    s[f"{prefix}_endpoint_id"] = resolved["endpoint_id"]
-                    s[key] = resolved["model"]
-                    value = resolved["model"]
-            save_settings(s)
-            if key.endswith("_model") and s.get(f"{key[:-6]}_endpoint_id"):
-                return {"response": f"Set {key} = {value} (endpoint {s.get(f'{key[:-6]}_endpoint_id')}).", "exit_code": 0}
-            return {"response": f"Set {key} = {value}.", "exit_code": 0}
-
-        elif action == "delete" or action == "reset":
-            key = _resolve(args.get("key", ""))
-            if key not in DEFAULT_SETTINGS:
-                return {"error": f"Unknown setting '{args.get('key')}'.", "exit_code": 1}
-            if _is_secret(key):
-                return {"response": f"'{key}' is a credential — reset it in the panel.", "exit_code": 0}
-            s = load_settings()
-            s[key] = DEFAULT_SETTINGS[key]
-            save_settings(s)
-            return {"response": f"Reset {key} to default ({DEFAULT_SETTINGS[key]}).", "exit_code": 0}
-
-        elif action in ("disable_tool", "enable_tool", "list_tools"):
-            # Tool-toggle actions. These edit settings.json:disabled_tools
-            # (the global list read on every chat request) rather than
-            # prefs.json. Friendly aliases accepted: "shell" -> "bash",
-            # "search" -> "web_search", "browser" -> "builtin_browser",
-            # "documents" -> the document tool set, "memory" ->
-            # manage_memory, etc.
-            from src.settings import get_setting, save_settings, load_settings
-            _ALIASES = {
-                "shell": ["bash"],
-                "terminal": ["bash"],
-                "search": ["web_search"],
-                "web": ["web_search"],
-                "browser": ["builtin_browser"],
-                "documents": ["create_document", "edit_document", "update_document", "suggest_document"],
-                "doc": ["create_document", "edit_document", "update_document", "suggest_document"],
-                "memory": ["manage_memory"],
-                "skills": ["manage_skills"],
-                "images": ["generate_image"],
-                "image": ["generate_image"],
-                "tasks": ["manage_tasks"],
-                "notes": ["manage_notes"],
-                "calendar": ["manage_calendar"],
-                "email": ["mcp__email__list_emails", "mcp__email__read_email", "mcp__email__send_email"],
-                "research": ["web_search"],  # research is a per-request flag, not a tool — closest analog
-            }
-
-            if action == "list_tools":
-                current = get_setting("disabled_tools", []) or []
-                return {
-                    "response": (
-                        f"Currently disabled: {', '.join(current) if current else '(none)'}.\n"
-                        "Common toggles: shell (bash), search (web_search), browser, documents, "
-                        "memory, skills, images, tasks, notes, calendar, email."
-                    ),
-                    "disabled": list(current),
-                    "exit_code": 0,
-                }
-
-            tool_name = (args.get("tool") or args.get("name") or "").strip().lower()
-            if not tool_name:
-                return {"error": "tool name required (e.g. 'shell', 'search', 'bash')", "exit_code": 1}
-            targets = _ALIASES.get(tool_name, [tool_name])
-
-            settings = load_settings()
-            current = list(settings.get("disabled_tools") or [])
-            before = set(current)
-            if action == "disable_tool":
-                for t in targets:
-                    if t not in current:
-                        current.append(t)
-            else:  # enable_tool
-                current = [t for t in current if t not in targets]
-            after = set(current)
-            settings["disabled_tools"] = current
-            save_settings(settings)
-
-            verb = "Disabled" if action == "disable_tool" else "Enabled"
-            changed = sorted(after.symmetric_difference(before))
-            return {
-                "response": (
-                    f"{verb} {tool_name} ({', '.join(targets)}). "
-                    f"Now disabled: {', '.join(current) if current else '(none)'}."
-                ),
-                "changed": changed,
-                "disabled": list(current),
-                "exit_code": 0,
-            }
-
-        else:
-            return {"error": f"Unknown action: {action}", "exit_code": 1}
-    except Exception as e:
-        logger.error(f"manage_settings error: {e}")
-        return {"error": str(e), "exit_code": 1}
-    finally:
-        db.close()
-
-
-# ---------------------------------------------------------------------------
-# API call tool
-# ---------------------------------------------------------------------------
-
 async def do_api_call(content: str) -> Dict:
    """Execute an API call to a registered integration."""
    from src.integrations import execute_api_call, load_integrations
@@ -1545,10 +945,10 @@ async def do_manage_calendar(content: str, owner: Optional[str] = None) -> Dict:
        text = str(raw).strip().lower()
        if text in {"none", "no", "off", "false"}:
            return None
-        m = re.search(r"(\d+)\s*(?:m|min|minute|minutes)\b", text)
+        m = re.search(r"(\d+)\s*(?:minutes?|mins?|m)\b", text)
        if m:
            return max(0, int(m.group(1)))
-        m = re.search(r"(\d+)\s*(?:h|hr|hour|hours)\b", text)
+        m = re.search(r"(\d+)\s*(?:hours?|hrs?|h)\b", text)
        if m:
            return max(0, int(m.group(1)) * 60)
        if text.isdigit():
@@ -1561,7 +961,7 @@ async def do_manage_calendar(content: str, owner: Optional[str] = None) -> Dict:
            return desc
        reminder_only = re.compile(
            r"^\s*(?:remind(?:er)?|alarm)\s*:?\s*\d+\s*"
-            r"(?:m|min|minute|minutes|h|hr|hour|hours)\b.*$",
+            r"(?:minutes?|mins?|m|hours?|hrs?|h)\b.*$",
            re.I,
        )
        return "" if reminder_only.match(desc) else desc
@@ -2680,13 +2080,25 @@ async def do_serve_model(content: str, owner: Optional[str] = None) -> Dict:
                endpoint_added=endpoint_added, endpoint_id=endpoint_id or "",
            )
            note = "" if registered else " (state-write failed — task may not show in UI)"
+            where = host or "local"
+            log_path = f"/tmp/odysseus-tmux/{sid}.log"
            return {
-                "output": f"Serving {repo_id} (session: {sid}){note}",
+                "output": (
+                    f"Serving {repo_id} on {where} (session: {sid}){note}\n"
+                    f"Next required check: call list_served_models. If this task is not ready, "
+                    f"call tail_serve_output with session_id={sid} and tail=400 before answering. "
+                    f"Do not tell the user to check logs; you have the log tool."
+                ),
                "session_id": sid,
                "task_type": "serve",
                "phase": "running",
                "host": host,
                "endpoint_id": endpoint_id,
+                "log_path": log_path,
+                "next_tools": [
+                    {"name": "list_served_models", "arguments": {}},
+                    {"name": "tail_serve_output", "arguments": {"session_id": sid, "tail": 400}},
+                ],
                "exit_code": 0,
            }
        # FastAPI HTTPException puts the message under `detail`, not `error`.
@@ -2854,6 +2266,10 @@ async def _cookbook_kill_session(session_id: str, *, remote_host: str = "",
            break

    if remote:
+        try:
+            remote, sport = _validate_cookbook_ssh_target(remote, sport)
+        except HTTPException as e:
+            return {"error": str(getattr(e, "detail", e)), "exit_code": 1}
        _pf = f"-p {shlex.quote(str(sport))} " if sport and str(sport) != "22" else ""
        cmd = (
            f"ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "
@@ -2942,8 +2358,8 @@ async def do_tail_serve_output(content: str, owner: Optional[str] = None) -> Dic
        tail = 400
    tail = max(20, min(tail, 4000))
    headers = _internal_headers()
-    remote = (args.get("remote_host") or args.get("host") or "").strip()
-    sport = (args.get("ssh_port") or "").strip()
+    remote = _string_arg(args.get("remote_host") or args.get("host"))
+    sport = _string_arg(args.get("ssh_port"))
    # Resolve host from cookbook state if caller didn't pass one — same
    # lookup _cookbook_kill_session uses.
    if not remote:
@@ -2961,6 +2377,12 @@ async def do_tail_serve_output(content: str, owner: Optional[str] = None) -> Dic
                    if not sport:
                        sport = t.get("sshPort") or ""
                    break
+    if remote:
+        try:
+            remote, sport = _validate_cookbook_ssh_target(remote, sport)
+        except HTTPException as e:
+            return {"error": str(getattr(e, "detail", e)), "exit_code": 1}
+
    # Prefer the persisted /tmp/odysseus-tmux/SESSION.log file over the
    # live tmux pane. The pane is what the user would see scrolling on
    # their screen — including the post-crash neofetch banner and the
@@ -3023,8 +2445,17 @@ async def do_tail_serve_output(content: str, owner: Optional[str] = None) -> Dic
        MAX_CHARS = 8000
        if len(output_text) > MAX_CHARS:
            output_text = "…(earlier output truncated)…\n" + output_text[-MAX_CHARS:]
+        if not output_text:
+            output_text = (
+                f"No log output captured yet for {session_id} on {host_label}. "
+                "This usually means the tmux wrapper has started but the model process "
+                "has not printed anything yet. Do not stop here: call list_served_models "
+                "again to check whether it is still loading, ready, or crashed; if it is "
+                "still not ready, call tail_serve_output again with a larger tail after "
+                "the next status check."
+            )
        return {
-            "output": output_text or "(empty pane)",
+            "output": output_text,
            "session_id": session_id,
            "host": host_label,
            "tail_lines": tail,
@@ -3138,7 +2569,7 @@ async def do_adopt_served_model(content: str, owner: Optional[str] = None) -> Di
    except ValueError:
        return {"error": "Invalid JSON arguments", "exit_code": 1}

-    host = (args.get("host") or args.get("remote_host") or "").strip()
+    host = _string_arg(args.get("host") or args.get("remote_host"))
    sess = (args.get("tmux_session") or args.get("session_id") or "").strip()
    model = (args.get("model") or args.get("repo_id") or "").strip()
    port = args.get("port") or 8000
@@ -3149,6 +2580,12 @@ async def do_adopt_served_model(content: str, owner: Optional[str] = None) -> Di
        return {"error": "tmux_session and model are required", "exit_code": 1}

    # Verify tmux session exists on the target host
+    if host:
+        try:
+            host, _ = _validate_cookbook_ssh_target(host)
+        except HTTPException as e:
+            return {"error": str(getattr(e, "detail", e)), "exit_code": 1}
+
    headers = _internal_headers()
    if host:
        check = f"ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no {shlex.quote(host)} 'tmux has-session -t {shlex.quote(sess)} 2>&1'"
@@ -3232,7 +2669,7 @@ async def do_adopt_served_model(content: str, owner: Optional[str] = None) -> Di
        host_only = host.split("@", 1)[-1] if host else "localhost"
        endpoint_url = f"http://{host_only}:{int(port)}/v1"
        try:
-            from src.tool_implementations import do_manage_endpoints  # avoid forward ref issues
+            from src.agent_tools.admin_tools import do_manage_endpoints  # moved in #3629
        except Exception:
            do_manage_endpoints = None
        if do_manage_endpoints is not None:
@@ -3763,7 +3200,7 @@ async def do_resolve_contact(content: str, owner: Optional[str] = None) -> Dict:
    if not name:
        return {"error": "name is required", "exit_code": 1}

-    contacts = {}  # email -> {name, source}
+    contacts = {}  # email_or_phone -> {name, source, phone?}

    # 1. CardDAV (Radicale) — structured contacts. Call in-process: a
    # server-side httpx GET to /api/contacts/search carries no session
@@ -3778,10 +3215,18 @@ async def do_resolve_contact(content: str, owner: Optional[str] = None) -> Dict:
            match = q in hay_name or any(q in (e or "").lower() for e in c.get("emails", []))
            if not match:
                continue
+            has_email = False
            for email in (c.get("emails") or []):
                email = (email or "").strip().lower()
                if email and "@" in email:
                    contacts[email] = {"name": c.get("name") or email, "source": "contacts"}
+                    has_email = True
+            # Fall back to phone numbers when the contact has no email address
+            if not has_email:
+                for phone in (c.get("phones") or []):
+                    phone = (phone or "").strip()
+                    if phone:
+                        contacts[phone] = {"name": c.get("name") or phone, "source": "contacts", "phone": phone}
    except Exception:
        pass

@@ -3801,8 +3246,11 @@ async def do_resolve_contact(content: str, owner: Optional[str] = None) -> Dict:
        return {"output": f"No contacts found matching '{name}'.", "exit_code": 0}

    lines = [f"Contacts matching '{name}':"]
-    for email, info in contacts.items():
-        lines.append(f"- {info['name']} <{email}> ({info['source']})")
+    for key, info in contacts.items():
+        if info.get("phone"):
+            lines.append(f"- {info['name']} — phone: {info['phone']} ({info['source']})")
+        else:
+            lines.append(f"- {info['name']} <{key}> ({info['source']})")
    return {"output": "\n".join(lines), "exit_code": 0}


@@ -88,23 +88,24 @@ BUILTIN_TOOL_DESCRIPTIONS: Dict[str, str] = {
    "pipeline": "Run a multi-step AI pipeline with multiple models. Chain tasks together in sequence.",
    "list_models": "List all available AI models and their endpoints.",
    "manage_session": "Chat management: rename, archive, delete, or fork chats (the UI calls these 'chats'; internally 'sessions'). Use for 'rename my chats', 'rename this chat', 'archive/delete a chat'.",
-    "manage_memory": "Memory management: list, add, edit, delete, or search persistent memories.",
+    "manage_memory": "Memory management: list, add, edit, delete, or search persistent memories. For facts about the USER (their name, preferences, where they live). NOT for info about ANOTHER person — addresses, phones, emails belonging to a contact go in manage_contact, not memory.",
    "manage_skills": "Skill management: add, update, publish, or search reusable skills/presets.",
    "manage_tasks": "Scheduled task management: list, create, edit, delete, pause, resume, or run cron tasks.",
    "manage_endpoints": "Endpoint management: list, add, delete, enable, or disable model API endpoints.",
    "manage_mcp": "MCP server management: list, add, delete, reconnect servers, or list available tools.",
    "manage_webhooks": "Webhook management: list, add, delete, enable, or disable webhooks.",
+    "api_call": "Call a configured API integration by name (Home Assistant, Miniflux, Gitea, Linkding, Jellyfin, RSS reader, git forge, bookmark manager, smart home, or any other registered service). Make a GET/POST/PUT/PATCH/DELETE request to the integration's endpoint path, with an optional JSON body. Use whenever the user asks to query or control one of their connected integrations/services.",
    "manage_tokens": "API token management: list, create, or delete API access tokens.",
-    "manage_documents": "List, read, delete, or tidy documents in the editor panel. action='list' returns clickable rows (most-recent first) so the user can open any doc by clicking. action='read' (aka view/open/get) with document_id returns the content. action='delete' with document_id removes a doc (only way to delete). Use this for ANY 'show/read/list/open my documents/docs/files/notes' request — never shell or curl.",
+    "manage_documents": "List, read, delete, or tidy documents in the editor panel. action='list' returns clickable rows (most-recent first) so the user can open any doc by clicking. action='read' (aka view/open/get) with document_id returns the content; supports offset=<N> + limit=<N> to page through large docs (response includes next_offset when more remains, so you can keep calling with offset=next_offset). action='delete' with document_id removes a doc (only way to delete). Use this for ANY 'show/read/list/open my documents/docs/files/notes' request — never shell or curl.",
    "manage_research": "List, read/open, or delete saved DEEP RESEARCH results from the Library. action='list' returns clickable [query](#research-<id>) rows (most-recent first). action='read' (aka open/view/get) with id returns the report + sources. action='delete' with id removes it. Use this for ANY 'open/read/find/delete my research / that report / the research on X' request. NOTE: this is for EXISTING research; to START new research use trigger_research.",
    "manage_settings": "Change ANY real app setting (the ones the Settings panel writes) so the user never has to open it: TTS voice/provider/speed, STT, search engine + result count, default/teacher/task/utility/vision/image/research models, image quality, reminder channel (browser/email/ntfy), agent timeout/tool-call budget, and more. action=set with key (friendly aliases ok: voice, 'search engine', 'default model', 'teacher model', 'image quality', 'reminder channel'...) + value; get/list/reset too. Also toggles tools on/off (disable_tool/enable_tool/list_tools). Secrets/API keys are read-only. Use for any 'change my…/set my…/use X for…/turn on…' preference request.",
    "create_session": "Create a new chat with a name and model.",
    "list_sessions": "List all chats with their metadata (the UI calls these 'chats'). Use for 'list my chats', 'rename all my chats' (list first, then manage_session to rename each).",
    "send_to_session": "Send a message to another chat. Cross-chat communication.",
    "search_chats": "Search past session transcripts across chats.",
-    "ask_user": "Ask the user a multiple-choice question to get a decision or clarification. Use this when the task is genuinely ambiguous and the answer changes what you do next — pick between approaches, confirm an assumption, choose among options — instead of guessing. Provide a clear `question` and 2-6 `options` (each with a short `label`, optional `description`). Calling this ENDS your turn: the user sees clickable buttons and their choice arrives as your next message. Don't use it for things you can decide from context or sensible defaults, or for irreversible-action confirmation if a dedicated flow exists.",
+    "ask_user": "Ask the user a multiple-choice question to get a decision or clarification. Use this when the task is genuinely ambiguous and the answer changes what you do next — pick between approaches, confirm an assumption, choose among options — instead of guessing. Provide a clear `question` and 2-6 `options` (each with a short `label`, optional `description`). Omit `multi`/keep it false unless the question explicitly permits choosing multiple options. Calling this ENDS your turn: the user sees clickable buttons and their choice arrives as your next message. Don't use it for things you can decide from context or sensible defaults, or for irreversible-action confirmation if a dedicated flow exists.",
    "update_plan": "Write back to the ACTIVE PLAN while executing an approved plan: mark steps done or revise them. After finishing a step call this with the full checklist and that step marked done; when the user asks to change the plan call it with the revised checklist. Always pass the COMPLETE markdown checklist (`- [ ]` / `- [x]`), not a diff. The user's docked plan window updates live. No effect when there is no active plan.",
-    "ui_control": "Control the UI and toggle tools on/off. Use this to turn off / turn on / disable / enable individual tools and features: shell (bash), search (web), research, browser, documents, incognito. Open panels (documents library, gallery, email inbox, sessions, notes, memories/brain, skills, settings, cookbook) via `open_panel <name>`. Use `open_email_reply <uid> <folder> reply` to open an email reply draft document without sending. Also switches between chat/agent modes, changes the current model, and applies/creates themes.",
+    "ui_control": "Control the UI and toggle tools on/off. Use this to turn off / turn on / disable / enable individual tools and features: shell (bash), search (web), research, browser, documents, incognito. Open panels (documents library, gallery, email inbox, sessions, notes, memories/brain, skills, settings, cookbook) via `open_panel <name>`. Use `open_email_reply <uid> <folder> reply` to open an email reply draft document without sending. To pre-fill the reply body in one shot (USE THIS whenever the user told you what to say — opening an empty draft when they asked you to write is wrong), append the body after the mode: `open_email_reply <uid> <folder> reply <body text>`. Body can continue on subsequent lines for multi-line replies. Also switches between chat/agent modes, changes the current model, and applies/creates themes.",
    "list_email_accounts": "List configured email accounts and default status. Use before reading or sending mail when the user mentions Gmail, work mail, custom domain mail, another mailbox, or asks to compare/check multiple inboxes.",
    "list_emails": "List emails for a folder/account, newest first, including read messages by default. Shows subject, sender, date, UID, account, and AI summary. Check inbox, find emails needing replies. Supports account from list_email_accounts for Gmail/work/custom mailboxes. For last/latest/newest email, use max_results=1 and unread_only=false.",
    "read_email": "Read the full content of a specific email by UID or Message-ID. View email body, check details. Supports account from list_email_accounts when the UID belongs to a non-default mailbox.",
@@ -115,7 +116,7 @@ BUILTIN_TOOL_DESCRIPTIONS: Dict[str, str] = {
    "mark_email_read": "Mark an email as read or unread by toggling the \\Seen flag.",
    "bulk_email": "Perform one action on many emails at once. Use for delete all those, archive these, mark all read, move spam to junk. Takes explicit UIDs from list_emails or all_unread=true. Always pass account for Gmail/work/custom mailbox results.",
    "resolve_contact": "Look up a contact's email address by name. Searches CardDAV address book and sent email history. Use when the user says 'message [name]', 'email [name]', or 'send to [name]' without an email address.",
-    "manage_contact": "Create, update, delete, or list CardDAV contacts. Use to save a new contact, change an existing one's email/phone, or remove one. Action=list returns uids needed for update/delete. Use when the user says 'save this contact', 'add [name] to contacts', 'update [name]'s email', 'delete [name] from contacts'. Do not use for user identity facts like 'my name is <name>'; those are memory.",
+    "manage_contact": "Save / update / delete / list address-book contacts (CardDAV). Use for info about ANOTHER person — name, email, phone, postal address. Args: action=list|add|update|delete, name, email, phones, address, uid (from list). For 'save this for <person>' / address pastes / phone numbers next to a name, this is the right tool — NOT manage_memory. Do NOT use for facts about the USER ('my name is X'); those are manage_memory.",
    "manage_notes": "Create and manage notes and checklists (Google Keep-style). ALWAYS use this for note/todo/checklist/reminder creation — NEVER hit /api/notes via app_api. Accepts natural-language `due_date` like 'tomorrow at 9am' or '11pm today' (parsed in the USER'S timezone). The due_date IS the reminder — it fires a notification at that time, so do NOT also create a calendar event for the same reminder. Set colors, labels, pin, archive. Do NOT use manage_memory for note content.",
    "manage_calendar": "Calendar event management: list, create, update, delete. Each event can carry a tag/category (event_type — work/personal/health/travel/meal/social/admin/other) and importance (low/normal/high/critical). Resolve today/tomorrow using the Current date and time context, then use ISO datetimes in the user's local wall time; supports all-day events. For event reminders/alarms, pass reminder_minutes; this creates the Notes reminder, so do not also call manage_notes for the same reminder.",
    "download_model": "Download a HuggingFace model to a local or remote server. Specify repo_id (e.g. 'Qwen/Qwen3-8B'), optional server host, and optional include filter for specific files.",
@@ -134,6 +135,7 @@ BUILTIN_TOOL_DESCRIPTIONS: Dict[str, str] = {
    "app_api": "Generic loopback to allowed Odysseus internal endpoints. Use this when the user wants something the UI can do but there's no named tool for it. Covers calendar, gallery, library/documents, memory, notes, tasks, settings, research, compare, cookbook GPUs/state — allowed UI buttons hit /api/* endpoints and you can hit them too. Sensitive auth/user/admin/shell paths and host-control Cookbook mutation routes are blocked; do NOT use app_api for shell commands, package installs, engine rebuilds, or PID signalling. Use named command tooling for shell commands. action='endpoints' with filter=<keyword> lists available endpoints. action='call' takes method+path+body. Hits same routes the UI uses — auth flows free. NOTE: themes are NOT an API endpoint — use the ui_control tool (create_theme / set_theme), not app_api. SESSIONS/CHATS: do NOT use app_api for these — GET /api/sessions returns EMPTY for tool calls (it's owner-filtered and tool calls authenticate as a different identity). EMAIL ACCOUNTS: do NOT use /api/email/accounts via app_api; use list_email_accounts, list_emails, and read_email instead. To list/rename/archive/delete/fork chats use the list_sessions and manage_session tools instead.",
    "edit_image": "Edit an image in the gallery: upscale (increase resolution), remove background (rembg), inpaint (fill selected area), or harmonize (blend edits). Specify image ID and action.",
    "trigger_research": "Start a deep research job on any topic — appears in the Deep Research sidebar, streams progress, produces a detailed report. Use for 'research X', 'look into Y', 'do deep research on Z', 'investigate'. NOT a scheduled task — it runs now and surfaces in the sidebar.",
+    "manage_bg_jobs": "Inspect and control detached background `bash` jobs (the ones started with a `#!bg` marker). action='list' shows this chat's jobs (id/status/age/command); action='output' returns a job's captured output so far (check on a long-running job, or re-read a finished one); action='kill' stops a runaway job by id. Use for 'is the background job done', 'check on that job', 'show the build output', 'kill the background job', 'stop the bg task'. output/kill need a job_id from list.",
 }


@@ -348,6 +350,12 @@ class ToolIndex:
            {"list_email_accounts", "list_emails", "read_email", "send_email", "reply_to_email", "bulk_email", "delete_email", "archive_email", "mark_email_read", "resolve_contact", "ui_control"},
        frozenset({"calendar", "event", "meeting", "schedule", "appointment"}):
            {"manage_calendar"},
+        # Detached background `bash` jobs (#!bg): check on / read output / kill.
+        frozenset({"background job", "background jobs", "bg job", "bg jobs",
+                   "background task", "is the job done", "check the job",
+                   "check on that job", "job output", "kill the job",
+                   "kill the background", "stop the background", "running job"}):
+            {"manage_bg_jobs"},
        frozenset({"note", "todo", "reminder", "remind", "checklist", "remember to"}):
            {"manage_notes"},
        # Chat/session management. "rename" alone maps to documents below, so a
@@ -372,7 +380,19 @@ class ToolIndex:
            {"resolve_contact", "manage_contact"},
        frozenset({"save contact", "add contact", "new contact", "update contact",
                   "edit contact", "delete contact", "remove contact",
-                   "save this person", "add to contacts", "save to contacts"}):
+                   "save this person", "add to contacts", "save to contacts",
+                   # "add <name> to (my) contacts" — words between 'add' and
+                   # 'contacts' break the literal phrase match above, so anchor
+                   # on the tail.
+                   "to my contacts", "to contacts", "to address book",
+                   # "save this for <person>" / "save it for <person>" — the user
+                   # is storing info on a known person without using the literal
+                   # word 'contact'. Catches the address/phone-paste pattern.
+                   "save this for", "save it for", "save for",
+                   "save this one for", "save that for",
+                   # Postal-address-like signals
+                   "postal code", "zip code", "street address",
+                   "mailing address", "their address"}):
            {"manage_contact"},
        # "Ask another model" intent → chat_with_model relays to a
        # different model and returns its answer. ask_teacher escalates
@@ -402,6 +422,14 @@ class ToolIndex:
                   "my settings", "change setting", "change a setting", "set setting",
                   "preference", "preferences", "configure"}):
            {"manage_settings", "ui_control"},
+        # API-integration intent → the api_call tool. Mirrors the agent-loop
+        # "integrations" domain so api_call still surfaces on the retrieval and
+        # keyword-fallback paths (not just the deterministic domain seed) when a
+        # user names a connected service.
+        frozenset({"api_call", "api call", "integration", "integrations",
+                   "home assistant", "homeassistant", "miniflux", "gitea",
+                   "linkding", "jellyfin"}):
+            {"api_call"},
        # Managing EXISTING research in the Library — open/read/find/delete.
        frozenset({"my research", "the research", "research on", "open research",
                   "read research", "find research", "delete research",
@@ -507,6 +535,53 @@ class ToolIndex:
        # prompts do not drag web schemas into the agent context.
        if self._WEB_RE.search(query):
            base.update({"web_search", "web_fetch"})
+        # Hard steering: when the query is a clear "save info about a specific
+        # person" pattern (address paste + name, phone next to a name, etc.),
+        # the model has been observed defaulting to manage_memory even with
+        # manage_contact in the toolset. Pull memory out for these queries so
+        # the model literally cannot pick it. ALWAYS_AVAILABLE includes
+        # manage_memory by default; we override that here.
+        # The "for/to <word>" check needs to allow lowercase names (users
+        # don't always capitalize) but filter out timing/pronoun stopwords
+        # so "save this for later" / "save for tomorrow" don't trigger.
+        _CONTACT_STOPWORDS_AFTER_FOR = {
+            "later", "tomorrow", "yesterday", "now", "then", "today",
+            "tonight", "me", "us", "you", "him", "her", "them", "myself",
+            "yourself", "next", "this", "that", "the", "a", "an", "future",
+            "real", "use", "uses", "another", "future", "reference",
+        }
+        # Regex catches "save (this|it|the|her|...|<noun>) for <name>" / "to my
+        # contacts" patterns. More forgiving than literal-keyword matching —
+        # 'save this address for Alex' uses one extra word between 'save' and
+        # 'for' that breaks the contiguous 'save this for' phrase.
+        save_for_match = re.search(
+            r"\bsave\b(?:\s+\w+){0,3}\s+(?:for|to)\s+([A-Za-z]+)",
+            ql,
+        )
+        # "to my contacts", "into my contacts", "in my address book", etc.
+        to_contacts = re.search(r"\b(?:to|in|into)\s+(?:my\s+)?(?:contacts|address\s+book)\b", ql)
+        # Possessive: "save (his|her|their) (address|phone|email|number) ..."
+        # — strong contact signal even without "for <name>". Force-include
+        # manage_contact here too since the keyword fallback misses this
+        # construction.
+        possessive_contact = re.search(
+            r"\bsave\b(?:\s+\w+){0,2}\s+(?:his|her|their)\s+(?:address|phone|number|email|contact|details)",
+            ql,
+        )
+        word_after = (
+            save_for_match.group(1).lower() if save_for_match else None
+        )
+        contact_only_signal = (
+            (save_for_match is not None
+             and word_after is not None
+             and word_after not in _CONTACT_STOPWORDS_AFTER_FOR)
+            or to_contacts is not None
+            or possessive_contact is not None
+        )
+        if possessive_contact is not None:
+            base.add("manage_contact")
+        if contact_only_signal and "manage_contact" in base:
+            base.discard("manage_memory")
        return base


@@ -39,6 +39,10 @@ _XML_TOOL_CALL_RE = re.compile(
    r"<(?:[\w]+:)?(?:tool_call|function_call)>\s*([\s\S]*?)</(?:[\w]+:)?(?:tool_call|function_call)>",
    re.IGNORECASE,
 )
+_XML_OPEN_TOOL_CALL_RE = re.compile(
+    r"<(?:[\w]+:)?(?:tool_call|function_call)>\s*([\s\S]*)\Z",
+    re.IGNORECASE,
+)
 _XML_INVOKE_RE = re.compile(
    r'<invoke\s+name=["\'](\w+)["\']>\s*([\s\S]*?)</invoke>',
    re.IGNORECASE,
@@ -47,6 +51,21 @@ _XML_PARAM_RE = re.compile(
    r'<parameter\s+name=["\'](\w+)["\']>([\s\S]*?)</parameter>',
    re.IGNORECASE,
 )
+_XML_DIRECT_TOOL_RE = re.compile(
+    r"<\s*([A-Za-z_][\w-]*)\s*>([\s\S]*?)</\s*\1\s*>",
+    re.IGNORECASE,
+)
+
+# Pattern 3b: StepFun Step-3.x native tool-call tokens. The tokenizer defines:
+#   <｜tool▁calls▁begin｜> ... <｜tool▁calls▁end｜>
+#   <｜tool▁call▁begin｜>tool_name<｜tool▁sep｜>{...}<｜tool▁call▁end｜>
+# These can leak as text through llama.cpp/Ollama-style endpoints when the
+# engine does not return structured OpenAI tool_calls.
+_STEPFUN_CALL_BEGIN = "<｜tool▁call▁begin｜>"
+_STEPFUN_CALL_SEP = "<｜tool▁sep｜>"
+_STEPFUN_CALL_END = "<｜tool▁call▁end｜>"
+_STEPFUN_CALLS_BEGIN = "<｜tool▁calls▁begin｜>"
+_STEPFUN_CALLS_END = "<｜tool▁calls▁end｜>"

 # Pattern 4: <tool_code> blocks (MiniMax-M2.5 style)
 # {tool => 'tool_name', args => '<param>value</param>'}
@@ -175,6 +194,9 @@ _TOOL_NAME_MAP = {
    "notes": "manage_notes",
    "todo": "manage_notes",
    "todos": "manage_notes",
+    "manage_bg_jobs": "manage_bg_jobs",
+    "bg_jobs": "manage_bg_jobs",
+    "background_jobs": "manage_bg_jobs",
 }

 _MISFENCED_WEB_TOOL_NAMES = {
@@ -286,6 +308,88 @@ def _parse_misfenced_web_lookup(content: str) -> Optional[ToolBlock]:
    return ToolBlock("web_fetch", url)


+
+def _parse_misfenced_read_file_lookup(content: str, *, allow_shell_style: bool = False) -> Optional[ToolBlock]:
+    """Recover simple read_file calls wrapped in python/bash fences."""
+    stripped = content.strip()
+    if not stripped:
+        return None
+
+    try:
+        module = ast.parse(stripped, mode="exec")
+    except SyntaxError:
+        module = None
+    if module and len(module.body) == 1 and isinstance(module.body[0], ast.Expr):
+        call = module.body[0].value
+        if isinstance(call, ast.Call) and isinstance(call.func, ast.Name):
+            if call.func.id.lower() != "read_file" or len(call.args) > 1:
+                return None
+            args = {}
+            if call.args:
+                path = _literal_string(call.args[0])
+                if not path:
+                    return None
+                args["path"] = path
+            allowed = {"path", "file", "file_path", "offset", "limit"}
+            for keyword in call.keywords:
+                if keyword.arg not in allowed:
+                    return None
+                key = "path" if keyword.arg in ("file", "file_path") else keyword.arg
+                if key == "path":
+                    path = _literal_string(keyword.value)
+                    if not path:
+                        return None
+                    args["path"] = path
+                    continue
+                try:
+                    value = ast.literal_eval(keyword.value)
+                except (ValueError, SyntaxError, TypeError):
+                    return None
+                if not isinstance(value, int) or value < 0:
+                    return None
+                args[key] = value
+            if not args.get("path"):
+                return None
+            from src.tool_schemas import function_call_to_tool_block
+            return function_call_to_tool_block("read_file", json.dumps(args))
+
+    if not allow_shell_style:
+        return None
+    lines = [line.strip() for line in stripped.splitlines() if line.strip()]
+    if len(lines) != 1:
+        return None
+    match = re.fullmatch(r"read_file\s+(.+)", lines[0], re.IGNORECASE)
+    if not match:
+        return None
+    path = match.group(1).strip()
+    if not path:
+        return None
+    if path.startswith("{"):
+        try:
+            args = json.loads(path)
+        except json.JSONDecodeError:
+            return None
+        if not isinstance(args, dict):
+            return None
+        normalized = {}
+        raw_path = args.get("path") or args.get("file") or args.get("file_path")
+        if isinstance(raw_path, str) and raw_path.strip():
+            normalized["path"] = raw_path.strip()
+        for key in ("offset", "limit"):
+            value = args.get(key)
+            if isinstance(value, int) and value >= 0:
+                normalized[key] = value
+        if not normalized.get("path"):
+            return None
+        from src.tool_schemas import function_call_to_tool_block
+        return function_call_to_tool_block("read_file", json.dumps(normalized))
+    if len(path) >= 2 and path[0] == path[-1] and path[0] in "'\"":
+        path = path[1:-1].strip()
+    if not path:
+        return None
+    return ToolBlock("read_file", path)
+
+
 def _coerce_raw_web_query(value) -> Optional[str]:
    if isinstance(value, str) and value.strip():
        return value.strip()
@@ -443,6 +547,138 @@ def _parse_xml_invoke(inv_match) -> Optional[ToolBlock]:
    return function_call_to_tool_block(tool_name, json.dumps(params))


+def _parse_xml_direct_tool(tool_match) -> Optional[ToolBlock]:
+    """Parse direct XML tool tags inside <tool_call>.
+
+    Some local models emit:
+      <tool_call><web_search>query</web_search></tool_call>
+    instead of the invoke/parameter shape:
+      <tool_call><invoke name="web_search"><parameter name="query">query</parameter></invoke></tool_call>
+    Keep this as an adapter to the canonical function-call converter so aliases
+    and per-tool argument formatting stay in one place.
+    """
+    tool_name = tool_match.group(1).lower().replace("-", "_")
+    if tool_name in {"invoke", "parameter", "tool_call", "function_call"}:
+        return None
+    mapped = _TOOL_NAME_MAP.get(tool_name) or (tool_name if tool_name in TOOL_TAGS else None)
+    if not mapped:
+        return None
+    body = tool_match.group(2).strip()
+    if not body:
+        return None
+    try:
+        params = json.loads(body)
+        if not isinstance(params, dict):
+            params = {}
+    except json.JSONDecodeError:
+        if mapped == "web_search":
+            params = {"query": body}
+        elif mapped == "web_fetch":
+            params = {"url": body}
+        elif mapped == "bash":
+            params = {"command": body}
+        elif mapped == "python":
+            params = {"code": body}
+        elif mapped in ("read_file", "write_file"):
+            params = {"path": body}
+        else:
+            params = {"content": body}
+    from src.tool_schemas import function_call_to_tool_block
+    return function_call_to_tool_block(mapped, json.dumps(params))
+
+
+def _iter_stepfun_tool_calls(text: str):
+    """Yield StepFun native tool-call token bodies without regex backtracking."""
+    pos = 0
+    while True:
+        start = text.find(_STEPFUN_CALL_BEGIN, pos)
+        if start < 0:
+            return
+        name_start = start + len(_STEPFUN_CALL_BEGIN)
+        sep = text.find(_STEPFUN_CALL_SEP, name_start)
+        if sep < 0:
+            return
+        end = text.find(_STEPFUN_CALL_END, sep + len(_STEPFUN_CALL_SEP))
+        if end < 0:
+            return
+        raw_name = text[name_start:sep].strip()
+        body = text[sep + len(_STEPFUN_CALL_SEP):end].strip()
+        if raw_name and len(raw_name) <= 128:
+            yield raw_name, body
+        pos = end + len(_STEPFUN_CALL_END)
+
+
+def _strip_stepfun_tool_markup(text: str) -> str:
+    """Remove StepFun tool-call token blocks and wrappers using literal scans."""
+    out = []
+    pos = 0
+    while True:
+        start = text.find(_STEPFUN_CALL_BEGIN, pos)
+        if start < 0:
+            out.append(text[pos:])
+            break
+        end = text.find(_STEPFUN_CALL_END, start + len(_STEPFUN_CALL_BEGIN))
+        if end < 0:
+            out.append(text[pos:])
+            break
+        out.append(text[pos:start])
+        pos = end + len(_STEPFUN_CALL_END)
+    cleaned = "".join(out)
+    return cleaned.replace(_STEPFUN_CALLS_BEGIN, "").replace(_STEPFUN_CALLS_END, "")
+
+
+def _strip_bare_invoke_markup(text: str) -> str:
+    """Remove bare <invoke ...>...</invoke> blocks without regex backtracking."""
+    out = []
+    pos = 0
+    while True:
+        start = text.lower().find("<invoke", pos)
+        if start < 0:
+            out.append(text[pos:])
+            break
+        tag_end = text.find(">", start)
+        if tag_end < 0:
+            out.append(text[pos:])
+            break
+        close = text.lower().find("</invoke>", tag_end + 1)
+        if close < 0:
+            out.append(text[pos:])
+            break
+        out.append(text[pos:start])
+        pos = close + len("</invoke>")
+    return "".join(out)
+
+
+def _parse_stepfun_tool_call(tool_name: str, body: str) -> Optional[ToolBlock]:
+    """Parse StepFun native tool-call tokens into an Odysseus ToolBlock."""
+    tool_name = tool_name.lower().replace("-", "_").replace(".", "_")
+    mapped = _TOOL_NAME_MAP.get(tool_name) or (tool_name if tool_name in TOOL_TAGS else None)
+    if not mapped:
+        return None
+    body = (body or "").strip()
+    if not body:
+        return None
+    try:
+        params = json.loads(body)
+        if not isinstance(params, dict):
+            params = {}
+    except json.JSONDecodeError:
+        if mapped == "web_search":
+            params = {"query": body}
+        elif mapped == "web_fetch":
+            params = {"url": body}
+        elif mapped == "bash":
+            params = {"command": body}
+        elif mapped == "python":
+            params = {"code": body}
+        elif mapped in ("read_file", "write_file"):
+            params = {"path": body}
+        else:
+            params = {"content": body}
+    from src.tool_schemas import function_call_to_tool_block
+    return function_call_to_tool_block(mapped, json.dumps(params))
+
+
 def _parse_tool_code_block(raw: str) -> Optional[ToolBlock]:
    """Parse a <tool_code>{tool => 'name', args => '...'}</tool_code> block (MiniMax style)."""
    # Extract tool name
@@ -508,8 +744,9 @@ def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]:
    2. [TOOL_CALL] ... [/TOOL_CALL] blocks (some models)
    3. XML-style <tool_call>/<invoke> blocks
    4. <tool_code> blocks (MiniMax-M2.5 style)
-    5. DeepSeek DSML markup (normalized to <invoke> first)
-    6. Non-native local model fallback: prose mentioning web_search followed by
+    5. StepFun Step-3 native <｜tool▁call▁begin｜> tokens
+    6. DeepSeek DSML markup (normalized to <invoke> first)
+    7. Non-native local model fallback: prose mentioning web_search followed by
       bare JSON args, e.g. {"query":"...", "time_filter":"week"}

    `skip_fenced`: when True, Pattern 1 (fenced ```bash/```python/```json code
@@ -549,7 +786,8 @@ def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]:
                # _XML_INVOKE_RE's \w+ can't match would otherwise be executed as code.
                continue
            if tag in ("python", "bash"):
-                block = _parse_misfenced_web_lookup(content)
+                block = (_parse_misfenced_web_lookup(content)
+                         or _parse_misfenced_read_file_lookup(content, allow_shell_style=(tag == "bash")))
                if block:
                    blocks.append(block)
                    continue
@@ -564,12 +802,38 @@ def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]:

    # Pattern 3: XML-style <tool_call>/<invoke> blocks
    if not blocks:
+        for tool_name, body in _iter_stepfun_tool_calls(text):
+            block = _parse_stepfun_tool_call(tool_name, body)
+            if block:
+                blocks.append(block)
+        if blocks:
+            return blocks
        # Try wrapped: <tool_call><invoke ...>...</invoke></tool_call>
        for m in _XML_TOOL_CALL_RE.finditer(text):
            for inv in _XML_INVOKE_RE.finditer(m.group(1)):
                block = _parse_xml_invoke(inv)
                if block:
                    blocks.append(block)
+            if not blocks:
+                for direct in _XML_DIRECT_TOOL_RE.finditer(m.group(1)):
+                    block = _parse_xml_direct_tool(direct)
+                    if block:
+                        blocks.append(block)
+        # Some local models stream an opening <tool_call> wrapper and a
+        # complete inner tool tag, but forget the closing </tool_call>.
+        if not blocks:
+            for m in _XML_OPEN_TOOL_CALL_RE.finditer(text):
+                body = m.group(1)
+                for inv in _XML_INVOKE_RE.finditer(body):
+                    block = _parse_xml_invoke(inv)
+                    if block:
+                        blocks.append(block)
+                if blocks:
+                    break
+                for direct in _XML_DIRECT_TOOL_RE.finditer(body):
+                    block = _parse_xml_direct_tool(direct)
+                    if block:
+                        blocks.append(block)
        # Try bare <invoke> without wrapper
        if not blocks:
            for inv in _XML_INVOKE_RE.finditer(text):
@@ -611,7 +875,9 @@ def strip_tool_blocks(text: str, skip_fenced: bool = False) -> str:
    text = _normalize_dsml(text)
    cleaned = text if skip_fenced else _TOOL_BLOCK_RE.sub('', text)
    cleaned = _TOOL_CALL_RE.sub('', cleaned)
+    cleaned = _strip_stepfun_tool_markup(cleaned)
    cleaned = _XML_TOOL_CALL_RE.sub('', cleaned)
+    cleaned = _XML_OPEN_TOOL_CALL_RE.sub('', cleaned)
    cleaned = _TOOL_CODE_RE.sub('', cleaned)
    if not skip_fenced:
        raw_web_json = _parse_raw_web_json_lookup(cleaned)
@@ -619,6 +885,6 @@ def strip_tool_blocks(text: str, skip_fenced: bool = False) -> str:
            _, (start, end) = raw_web_json
            cleaned = cleaned[:start] + cleaned[end:]
    # Strip bare <invoke> blocks not wrapped in <tool_call>
-    cleaned = re.sub(r'<invoke\s+name=["\'].*?</invoke>', '', cleaned, flags=re.DOTALL | re.IGNORECASE)
+    cleaned = _strip_bare_invoke_markup(cleaned)
    cleaned = re.sub(r'\n{3,}', '\n\n', cleaned)
    return cleaned.strip()
@@ -68,11 +68,12 @@ FUNCTION_TOOL_SCHEMAS = [
        "type": "function",
        "function": {
            "name": "web_fetch",
-            "description": "Fetch and read the text content of a specific URL the user names (e.g. 'check example.com', 'what's on this page <url>'). Use when you already have a concrete URL/domain. NOT for open-ended searches (use web_search) or 'research X' jobs (use trigger_research).",
+            "description": "Fetch and read the text content of a specific URL the user names (e.g. 'check example.com', 'what's on this page <url>'). Use when you already have a concrete URL/domain. NOT for open-ended searches (use web_search) or 'research X' jobs (use trigger_research). Downloads are size-budgeted; a '[partial content: ...]' notice in the result means the body was cut short and you can re-call with full=true for the rest.",
            "parameters": {
                "type": "object",
                "properties": {
-                    "url": {"type": "string", "description": "The URL or domain to fetch (http/https; a bare domain like example.com is fine)"}
+                    "url": {"type": "string", "description": "The URL or domain to fetch (http/https; a bare domain like example.com is fine)"},
+                    "full": {"type": "boolean", "description": "Raise the download budget to the hard cap for large pages/files. Use only after a result reported partial content."}
                },
                "required": ["url"]
            }
@@ -466,7 +467,7 @@ FUNCTION_TOOL_SCHEMAS = [
                    "question": {"type": "string", "description": "The question to ask. Be specific and self-contained."},
                    "options": {
                        "type": "array",
-                        "description": "2-6 mutually exclusive choices. Each is an object with a short `label` and an optional `description` explaining the trade-off.",
+                        "description": "2-6 choices. Each is an object with a short `label` and an optional `description` explaining the trade-off.",
                        "items": {
                            "type": "object",
                            "properties": {
@@ -476,7 +477,7 @@ FUNCTION_TOOL_SCHEMAS = [
                            "required": ["label"]
                        }
                    },
-                    "multi": {"type": "boolean", "description": "Set true to let the user select multiple options instead of one. Default false."}
+                    "multi": {"type": "boolean", "description": "Set true ONLY when the question explicitly allows choosing more than one option. Otherwise omit it or set false. Default false."}
                },
                "required": ["question", "options"]
            }
@@ -1008,7 +1009,7 @@ FUNCTION_TOOL_SCHEMAS = [
        "type": "function",
        "function": {
            "name": "resolve_contact",
-            "description": "Look up a contact's email address by name. Searches CardDAV address book and sent email history. Use when the user says 'message [name]' or 'email [name]' without an email address.",
+            "description": "Look up a contact by name. Searches CardDAV address book and sent email history. Returns email addresses (when available) or phone numbers. Use when the user says 'message [name]', 'email [name]', or asks for someone's contact details.",
            "parameters": {
                "type": "object",
                "properties": {
@@ -1022,7 +1023,7 @@ FUNCTION_TOOL_SCHEMAS = [
        "type": "function",
        "function": {
            "name": "manage_contact",
-            "description": "Create, update, delete, or list the user's CardDAV contacts. Use to save a new contact ('save Jonathan's email jon@x.com'), update an existing one ('change Maria's number'), or remove one. For update/delete you need the contact's uid — call action='list' first to find it. Writes go through the same dedupe + validation as the Contacts UI.",
+            "description": "Create, update, delete, or list the user's CardDAV contacts. Use to save a new contact, update an existing one (email/phone/address), or remove one. For update/delete you need the contact's uid — call action='list' first to find it. Writes go through the same dedupe + validation as the Contacts UI.",
            "parameters": {
                "type": "object",
                "properties": {
@@ -1033,6 +1034,7 @@ FUNCTION_TOOL_SCHEMAS = [
                    "email": {"type": "string", "description": "Single email address (convenience for add, or the primary email for update)."},
                    "emails": {"type": "array", "items": {"type": "string"}, "description": "Full list of email addresses (for update; first is primary)."},
                    "phones": {"type": "array", "items": {"type": "string"}, "description": "Full list of phone numbers (for update)."},
+                    "address": {"type": "string", "description": "Postal/mailing address as a single human-readable string."},
                },
                "required": ["action"]
            }
@@ -1186,6 +1188,21 @@ FUNCTION_TOOL_SCHEMAS = [
            }
        }
    },
+    {
+        "type": "function",
+        "function": {
+            "name": "manage_bg_jobs",
+            "description": "Inspect and control detached background `bash` jobs (started with the `#!bg` marker). action='list' shows this chat's jobs with id/status/age/command; action='output' returns a job's captured output so far (use for a still-running job, or to re-read a finished one); action='kill' terminates a runaway job's process tree instead of waiting out its max-runtime. output and kill need job_id from list.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "action": {"type": "string", "enum": ["list", "output", "kill"], "description": "list | output | kill (default: list)"},
+                    "job_id": {"type": "string", "description": "Background job id (required for output/kill; from action='list')"},
+                },
+                "required": ["action"]
+            }
+        }
+    },
 ]


@@ -1204,23 +1221,26 @@ def function_call_to_tool_block(name: str, arguments: str) -> Optional[ToolBlock
        logger.error(f"Failed to parse function call arguments for {name}: {arguments}")
        return None

+    tool_type = _TOOL_NAME_MAP.get(name, name)
+    _BUILTIN_EMAIL_TOOLS = {"list_email_accounts", "send_email", "list_emails", "read_email", "reply_to_email",
+                            "archive_email", "delete_email", "mark_email_read", "bulk_email", "download_attachment"}
+
    # Some models emit valid JSON that isn't an object (e.g. a bare array
-    # ["ls -la"], string, or number) as the function arguments. Every branch
-    # below assumes a dict and calls args.get(...), so a non-dict would raise
-    # AttributeError and abort the whole agent stream. Coerce to {} instead.
+    # ["ls -la"], string, or number) as function arguments. Most local tools keep
+    # the legacy empty-object coercion for stream robustness, but email MCP tools
+    # must fail closed so a malformed call cannot read the default mailbox.
    if not isinstance(args, dict):
+        if tool_type.startswith("mcp__email__") or name in _BUILTIN_EMAIL_TOOLS:
+            logger.warning(f"Non-object email function call arguments for {name}: {args!r}; rejecting")
+            return None
        logger.warning(f"Non-object function call arguments for {name}: {args!r}; treating as empty")
        args = {}

-    tool_type = _TOOL_NAME_MAP.get(name, name)
-
    # Allow MCP tools through (namespaced as mcp__serverid__toolname)
    if tool_type.startswith("mcp__"):
        content = json.dumps(args) if args else "{}"
        return ToolBlock(tool_type, content)
    # Email tools are implemented as MCP — route them to email
-    _BUILTIN_EMAIL_TOOLS = {"list_email_accounts", "send_email", "list_emails", "read_email", "reply_to_email",
-                            "archive_email", "delete_email", "mark_email_read", "bulk_email", "download_attachment"}
    if name in _BUILTIN_EMAIL_TOOLS:
        return ToolBlock(f"mcp__email__{name}", json.dumps(args) if args else "{}")
    if tool_type not in TOOL_TAGS:
@@ -1386,6 +1406,12 @@ def function_call_to_tool_block(name: str, arguments: str) -> Optional[ToolBlock
        content = json.dumps(args)
    elif tool_type == "ask_teacher":
        content = args.get("model", "auto") + "\n" + args.get("problem", "")
+    elif tool_type == "ask_user":
+        # Keep user-facing labels readable in the tool trace.  The outer SSE
+        # JSON encoder will escape them for transport and JSON.parse restores
+        # them once; pre-escaping here caused literal ``\u00f1`` sequences to
+        # remain visible in the debug panel.
+        content = json.dumps(args, ensure_ascii=False)
    else:
        content = json.dumps(args)

@@ -14,6 +14,7 @@ logger = logging.getLogger(__name__)
 NON_ADMIN_BLOCKED_TOOLS = {
    "bash",
    "python",
+    "manage_bg_jobs",
    "read_file",
    "write_file",
    "edit_file",
@@ -114,6 +115,8 @@ _PLAN_MODE_KNOWN_MUTATORS = {
    # Shell is never read-only-safe; block it explicitly so it stays out of plan
    # mode even if the schema list fails to load.
    "bash", "python",
+    # Controls shell processes (kill); plan mode can't run bash anyway.
+    "manage_bg_jobs",
 }


@@ -4,6 +4,8 @@ src.constants which imports nothing from src). Adding a project import here
 will reintroduce the circular dependency that this module exists to break.
 """

+import json
+
 from src.constants import MAX_OUTPUT_CHARS

 _mcp_manager = None
@@ -37,3 +39,36 @@ def _truncate(text: str, limit: int = MAX_OUTPUT_CHARS) -> str:
    if len(text) > limit:
        return text[:limit] + f"\n... (truncated, {len(text)} chars total)"
    return text
+
+
+def _parse_tool_args(content):
+    """Parse a tool-call argument blob.
+
+    Accepts either a JSON string or an already-decoded dict. Unwraps the
+    common `{"body": {...}}` envelope that smaller models emit when they
+    read tool descriptions like "Body is JSON: {...}" literally and
+    pass `body` as a field name rather than treating it as a noun.
+
+    Returns a dict on success, raises ValueError on bad JSON.
+    """
+    if isinstance(content, str):
+        try:
+            args = json.loads(content) if content.strip() else {}
+        except (json.JSONDecodeError, TypeError) as e:
+            raise ValueError(str(e))
+    elif isinstance(content, dict):
+        args = content
+    else:
+        args = {}
+    # Unwrap {"body": {...}} envelope, but only if `body` is the sole key
+    # and points at a dict. We don't want to clobber a legitimate `body`
+    # field on tools where it's a real arg (e.g. send_email body text).
+    if (
+        isinstance(args, dict)
+        and len(args) == 1
+        and "body" in args
+        and isinstance(args["body"], dict)
+        and "action" in args["body"]  # extra safety: only unwrap if the inner dict looks like a tool call
+    ):
+        args = args["body"]
+    return args