Merge origin/dev into main

2026-06-22 20:55:29 -04:00 · 2026-06-21 11:08:50 +00:00
parent c504214925 160267417e
commit 75f04bc088
203 changed files with 11283 additions and 1649 deletions
@@ -267,6 +267,10 @@ _DOMAIN_RULES = {
 - Use `resolve_contact` to look up a contact's email or phone number by name. Searches the CardDAV address book and sent email history.
 - Use `manage_contact` to list, add, update, or delete contacts in the address book.
 - Do NOT use `manage_memory` for contact lookups — contact details live in the address book, not memory.""",
+    "integrations": """\
+## Integration/API rules
+- To query or control a configured service integration (Home Assistant, Miniflux, Gitea, Linkding, Jellyfin, or any other registered service), use `api_call` with the integration name, HTTP method, path, and optional JSON body.
+- Do not use shell, curl, or `app_api` to reach a user's connected integration when `api_call` is available.""",
 }

 _DOMAIN_TOOL_MAP = {
@@ -277,9 +281,10 @@ _DOMAIN_TOOL_MAP = {
    "notes_calendar_tasks": {"manage_notes", "manage_calendar", "manage_tasks"},
    "ui": {"ui_control"},
    "sessions": {"create_session", "list_sessions", "manage_session", "send_to_session", "search_chats"},
-    "files": {"bash", "python", "read_file", "write_file", "edit_file", "grep", "glob", "ls", "get_workspace"},
+    "files": {"bash", "python", "read_file", "write_file", "edit_file", "grep", "glob", "ls", "get_workspace", "manage_bg_jobs"},
    "settings": {"manage_settings", "manage_endpoints", "manage_mcp", "manage_webhooks", "manage_tokens", "app_api"},
    "contacts": {"resolve_contact", "manage_contact"},
+    "integrations": {"api_call"},
 }

 def _domain_rules_for_tools(tool_names: set) -> list[str]:
@@ -524,7 +529,7 @@ def get_builtin_overrides() -> dict:
        ov = get_setting("builtin_tool_overrides", {})
        return ov if isinstance(ov, dict) else {}
    except Exception as e:
-        logger.warning('Failed to load builtin tool overrides: %s', e)
+        logger.warning("Failed to load builtin tool overrides, using defaults", exc_info=e)
        return {}


@@ -909,10 +914,25 @@ def _classify_agent_request(messages: List[Dict], last_user: str) -> Dict[str, o
        domains.add("sessions")
    if has(r"\b(file|folder|directory|repo|git|grep|find in files|read file|edit file|shell|terminal|bash|python)\b"):
        domains.add("files")
+    # Managing detached bash jobs: "kill the background job", "stop the job",
+    # "kill that job", "check the job output", "is the bg job done".
+    if (has(r"\b(background|bg)\s+(jobs?|task)\b")
+            or has(r"\b(kill|stop|cancel|terminate|check|tail|show|list)\b.{0,16}\bjobs?\b")
+            or has(r"\bjobs?\b.{0,16}\b(output|status|done|finished|running)\b")):
+        domains.add("files")
    if has(r"\b(endpoint|api token|mcp|webhook|preference|configure|config|setting)\b"):
        domains.add("settings")
    if has(r"\b(contact|contacts|phone|phone number|address book|vcard)\b"):
        domains.add("contacts")
+    # API-integration intent — calling a configured service via the api_call
+    # tool. Without this the #3794 repro ("Use the api_call tool to call Home
+    # Assistant GET /api/states") matched no domain, classified as low-signal,
+    # and the tool never reached the schema filter. Detect it explicitly so the
+    # "integrations" domain seeds api_call deterministically (see
+    # _DOMAIN_TOOL_MAP), independent of embedding retrieval.
+    if has(r"\bapi[ _]call\b", r"\bintegrations?\b",
+           r"\b(?:home ?assistant|miniflux|gitea|linkding|jellyfin)\b"):
+        domains.add("integrations")

    low_signal = not continuation and not domains
    return {
@@ -941,8 +961,11 @@ def _recent_context_for_retrieval(messages: List[Dict], max_user: int = 3, max_c
        if isinstance(content, list):
            content = " ".join(b.get("text", "") for b in content if isinstance(b, dict))
        content = (content or "").strip()
-        # Skip injected tool-result envelopes — role=user but not human intent.
-        if not content or content.startswith("[Tool execution results]"):
+        # Skip injected envelopes — role=user but not human intent. Tool results
+        # are now wrapped via untrusted_context_message (metadata.trusted=False);
+        # keep the legacy "[Tool execution results]" prefix for older histories.
+        meta = msg.get("metadata") or {}
+        if not content or meta.get("trusted") is False or content.startswith("[Tool execution results]"):
            continue
        collected.append(content)
        if len(collected) >= max_user:
@@ -1030,8 +1053,8 @@ def _build_system_prompt(
    try:
        from src.user_time import current_datetime_context_message
        _datetime_message = current_datetime_context_message()
-    except Exception:
-        pass
+    except Exception as e:
+        logger.warning("Failed to build datetime context message", exc_info=e)

    # Document context is kept as a SEPARATE message (not merged into the tool
    # prompt) so the context trimmer doesn't destroy it when truncating the
@@ -1074,8 +1097,8 @@ def _build_system_prompt(
            try:
                from src.pdf_form_doc import find_source_upload_id
                _is_form_backed = bool(find_source_upload_id(active_document.current_content or ""))
-            except Exception:
-                pass
+            except Exception as e:
+                logger.warning("Failed to detect if document is form-backed, assuming plain", exc_info=e)

            if _is_form_backed:
                doc_ctx = (
@@ -1664,8 +1687,14 @@ def _append_tool_results(
        if round_reasoning:
            msg["reasoning_content"] = round_reasoning
        messages.append(msg)
+        # Tool output (shell/python stdout, file reads, fetched pages, email
+        # bodies, MCP results) is sourced from outside the server. Wrap it as
+        # untrusted data so prompt-injection inside a tool result is treated as
+        # data, not instructions — same hardening as skills (#788) and the
+        # web/RAG context. THREAT_MODEL.md lists tool output as a surface that
+        # must go through untrusted_context_message.
        messages.append(
-            {"role": "user", "content": f"[Tool execution results]\n\n{tool_output_text}"}
+            untrusted_context_message("tool execution results", tool_output_text)
        )


@@ -22,6 +22,9 @@ from .subprocess_tools import BashTool, PythonTool
 from .web_tools import WebSearchTool, WebFetchTool
 from .filesystem_tools import ReadFileTool, WriteFileTool, EditFileTool, LsTool, GlobTool, GrepTool, GetWorkspaceTool
 from .document_tools import CreateDocumentTool, UpdateDocumentTool, EditDocumentTool, SuggestDocumentTool, ManageDocumentTool
+from .model_interaction_tools import ChatWithModelTool, AskTeacherTool, ListModelsTool
+from .bg_job_tools import ManageBgJobsTool
+from .session_tools import CreateSessionTool, ListSessionsTool, SendToSessionTool, ManageSessionTool

 TOOL_HANDLERS = {
    "bash": BashTool().execute,
@@ -40,6 +43,14 @@ TOOL_HANDLERS = {
    "suggest_document": SuggestDocumentTool().execute,
    "manage_documents": ManageDocumentTool().execute,
    "get_workspace": GetWorkspaceTool().execute,
+    "chat_with_model": ChatWithModelTool().execute,
+    "ask_teacher": AskTeacherTool().execute,
+    "list_models": ListModelsTool().execute,
+    "manage_bg_jobs": ManageBgJobsTool().execute,
+    "create_session": CreateSessionTool().execute,
+    "list_sessions": ListSessionsTool().execute,
+    "send_to_session": SendToSessionTool().execute,
+    "manage_session": ManageSessionTool().execute,
 }

 # ---------------------------------------------------------------------------
@@ -52,7 +63,7 @@ PYTHON_TIMEOUT = 30

 # Tool types that trigger execution
 TOOL_TAGS = {"bash", "python", "web_search", "web_fetch", "read_file", "write_file", "edit_file",
-             "grep", "glob", "ls", "get_workspace",
+             "grep", "glob", "ls", "get_workspace", "manage_bg_jobs",
             "create_document", "update_document", "edit_document",
             "search_chats",
             "chat_with_model", "create_session", "list_sessions",
@@ -0,0 +1,98 @@
+"""Agent tool to inspect and control detached background `bash` jobs.
+
+`bash` blocks prefixed with a `#!bg` marker run detached via `src.bg_jobs`; the
+agent is auto-re-invoked with the output when they finish. This tool covers the
+gaps in that flow: list the jobs in the current chat, read a still-running job's
+output on demand, and kill a runaway job instead of waiting out its max-runtime.
+
+Registry tool (`TOOL_HANDLERS["manage_bg_jobs"]`). Jobs are scoped to the chat
+that launched them, so every action requires the caller's `session_id` and a job
+from another session is treated as not found.
+"""
+
+import json
+import time
+from typing import Any, Dict, List
+
+_LIST_ACTIONS = {"list", "ls", "jobs"}
+_OUTPUT_ACTIONS = {"output", "get", "read", "tail", "status", "show"}
+_KILL_ACTIONS = {"kill", "stop", "cancel", "terminate"}
+
+
+def _age(rec: Dict[str, Any]) -> str:
+    start = rec.get("started_at")
+    if not start:
+        return "?"
+    secs = int(time.time() - start)
+    if secs < 60:
+        return f"{secs}s"
+    if secs < 3600:
+        return f"{secs // 60}m"
+    return f"{secs // 3600}h{(secs % 3600) // 60}m"
+
+
+def _status_label(rec: Dict[str, Any]) -> str:
+    status = rec.get("status", "?")
+    if rec.get("killed"):
+        return "killed"
+    if rec.get("timed_out"):
+        return "timed out"
+    if rec.get("died"):
+        return "died"
+    if status in ("done", "failed"):
+        return f"{status} (exit {rec.get('exit_code')})"
+    return status
+
+
+def _row(rec: Dict[str, Any]) -> str:
+    cmd = (rec.get("command") or "").strip().splitlines()[0][:80]
+    return f"[{rec.get('id')}] {_status_label(rec)} | {_age(rec)} | {cmd}"
+
+
+class ManageBgJobsTool:
+    async def execute(self, content: str, ctx: dict) -> dict:
+        from src import bg_jobs
+
+        session_id = ctx.get("session_id")
+        raw = (content or "").strip()
+        try:
+            args = json.loads(raw) if raw else {}
+        except (ValueError, TypeError):
+            args = {}
+        if not isinstance(args, dict):
+            args = {}
+        action = str(args.get("action", "list")).strip().lower()
+        job_id = str(args.get("job_id") or args.get("id") or "").strip()
+
+        if not session_id:
+            return {"error": "manage_bg_jobs: no active chat session; background jobs are scoped to a chat.", "exit_code": 1}
+
+        if action in _LIST_ACTIONS:
+            jobs: List[Dict[str, Any]] = bg_jobs.list_for_session(session_id)
+            if not jobs:
+                return {"output": "No background jobs in this chat.", "exit_code": 0}
+            jobs.sort(key=lambda r: r.get("started_at") or 0, reverse=True)
+            lines = "\n".join(_row(r) for r in jobs)
+            return {"output": f"{len(jobs)} background job(s):\n{lines}", "exit_code": 0}
+
+        if action in _OUTPUT_ACTIONS or action in _KILL_ACTIONS:
+            if not job_id:
+                return {"error": f"manage_bg_jobs: action '{action}' requires a job_id (see action='list').", "exit_code": 1}
+            rec = bg_jobs.get(job_id)
+            # Scope: only the chat that launched a job may see or control it.
+            if rec is None or rec.get("session_id") != session_id:
+                return {"error": f"manage_bg_jobs: no background job '{job_id}' in this chat.", "exit_code": 1}
+
+            if action in _KILL_ACTIONS:
+                if rec.get("status") != "running":
+                    return {"output": f"Job `{job_id}` already {_status_label(rec)}; nothing to kill.", "exit_code": 0}
+                killed = bg_jobs.kill(job_id)
+                return {"output": f"Killed background job `{job_id}` ({(killed or {}).get('command', '').splitlines()[0][:80]}).", "exit_code": 0}
+
+            out = rec.get("output") or "(no output yet)"
+            return {
+                "output": f"Job `{job_id}` [{_status_label(rec)}, {_age(rec)}]\nCommand: {rec.get('command')}\n\nOutput:\n{out}",
+                "exit_code": 0,
+            }
+
+        return {"error": f"manage_bg_jobs: unknown action '{action}'. Use list, output, or kill.", "exit_code": 1}
@@ -1,6 +1,7 @@
 import asyncio
 import json
 import os
+import re
 import difflib
 import fnmatch
 import shutil
@@ -16,6 +17,31 @@ _CODENAV_SKIP_DIRS = frozenset({
 _CODENAV_MAX_HITS = 200
 _CODENAV_MAX_LINE = 400

+
+def _glob_to_regex(pat: str) -> "re.Pattern":
+    """Translate a forward-slash glob (**, *, ?) into a compiled regex.
+    `**/` matches zero or more complete directories.
+    `*` matches within a single path segment (does not cross /).
+    """
+    i, n, out = 0, len(pat), []
+    while i < n:
+        if pat[i : i + 3] == "**/":
+            out.append("(?:[^/]+/)*")
+            i += 3
+        elif pat[i : i + 2] == "**":
+            out.append(".*")
+            i += 2
+        elif pat[i] == "*":
+            out.append("[^/]*")
+            i += 1
+        elif pat[i] == "?":
+            out.append("[^/]")
+            i += 1
+        else:
+            out.append(re.escape(pat[i]))
+            i += 1
+    return re.compile("".join(out))
+
 def _unified_diff(old: str, new: str, path: str) -> Optional[Dict[str, Any]]:
    if old == new:
        return None
@@ -259,23 +285,38 @@ class GlobTool:
            return {"error": f"glob: {e}", "exit_code": 1}

        def _glob():
-            from pathlib import Path
-            base = Path(root)
-            if not base.is_dir():
+            base = os.path.abspath(root)
+            if not os.path.isdir(base):
                return None, f"glob: {root}: not a directory"
+            norm_pat = pattern.replace("\\", "/")
+            # Fast path: literal pattern (no wildcards) → direct path lookup.
+            if not any(c in norm_pat for c in "*?["):
+                cand = os.path.normpath(os.path.join(base, norm_pat))
+                if os.path.exists(cand):
+                    return [cand], None
+                # Literal not at exact path — fall through to walk so
+                # e.g. "foo.py" still matches at any depth (like rglob).
+            # Compile glob to regex: * stays within one segment, **/ spans dirs.
+            regex = _glob_to_regex(norm_pat)
            matched = []
+            cap = _CODENAV_MAX_HITS * 5
            try:
-                for p in base.rglob(pattern):
-                    if set(p.relative_to(base).parts) & _CODENAV_SKIP_DIRS:
-                        continue
-                    try:
-                        mtime = p.stat().st_mtime
-                    except OSError:
-                        mtime = 0
-                    matched.append((mtime, str(p)))
-                    if len(matched) > _CODENAV_MAX_HITS * 5:
+                for dp, dns, fns in os.walk(base):
+                    # Prune skipped dirs before descending (unlike rglob which
+                    # descends first then filters — fatal on large node_modules).
+                    dns[:] = [d for d in dns if d not in _CODENAV_SKIP_DIRS]
+                    for name in fns + dns:
+                        full = os.path.join(dp, name)
+                        rel = os.path.relpath(full, base).replace(os.sep, "/")
+                        if regex.fullmatch(rel) or regex.fullmatch(name):
+                            try:
+                                mtime = os.stat(full).st_mtime
+                            except OSError:
+                                mtime = 0
+                            matched.append((mtime, full))
+                    if len(matched) > cap:
                        break
-            except (OSError, ValueError) as _e:
+            except OSError as _e:
                return None, f"glob: {_e}"
            matched.sort(key=lambda t: t[0], reverse=True)
            return [pth for _, pth in matched[:_CODENAV_MAX_HITS]], None
@@ -0,0 +1,208 @@
+"""model_interaction_tools.py - agent tools for talking to other models.
+
+Owns the model-interaction tool implementations (chat_with_model, ask_teacher,
+list_models) and their handler classes, registered in ``TOOL_HANDLERS``. Part
+of the tool -> registry migration (#3629): the implementations were moved here
+out of ``src.ai_interaction`` so dispatch flows through the registry instead of
+the elif chain / dispatch_ai_tool in tool_execution.py.
+
+Shared helpers that still live in ``src.ai_interaction`` and are used by tools
+not yet migrated (``_resolve_model``, ``AI_CHAT_TIMEOUT``) are imported lazily
+inside the functions to avoid an import cycle at module load.
+"""
+import logging
+from typing import Dict, Optional
+
+logger = logging.getLogger(__name__)
+
+
+_TEACHER_SYSTEM_PROMPT = (
+    "You are a senior AI mentor. A less capable model is stuck on a problem and asking for help. "
+    "Provide clear, actionable guidance:\n"
+    "1. Brief analysis of the problem\n"
+    "2. Recommended approach (step by step)\n"
+    "3. Key things to watch out for\n\n"
+    "Be concise and practical. No preamble."
+)
+
+
+async def chat_with_model(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
+    """Send a message to a specific model and return its response.
+
+    Content format:
+      Line 1: model_name (or model_name@endpoint_name)
+      Line 2+: the message to send
+    """
+    from src.ai_interaction import _resolve_model, AI_CHAT_TIMEOUT
+    from src.llm_core import llm_call_async
+
+    lines = content.strip().split("\n", 1)
+    if not lines or not lines[0].strip():
+        return {"error": "First line must be the model name"}
+
+    model_spec = lines[0].strip()
+    message = lines[1].strip() if len(lines) > 1 else ""
+    if not message:
+        return {"error": "No message provided (line 2+ is the message)"}
+
+    try:
+        url, model, headers = _resolve_model(model_spec, owner=owner)
+    except ValueError as e:
+        return {"error": str(e)}
+
+    try:
+        response = await llm_call_async(
+            url, model,
+            [{"role": "user", "content": message}],
+            headers=headers,
+            timeout=AI_CHAT_TIMEOUT,
+        )
+        # Truncate very long responses
+        if len(response) > 10000:
+            response = response[:10000] + "\n... (truncated)"
+        return {"model": model, "response": response}
+    except Exception as e:
+        logger.error(f"chat_with_model failed: {e}")
+        return {"error": f"Failed to get response from {model_spec}: {e}"}
+
+
+async def ask_teacher(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
+    """Ask a more capable model for help.
+
+    Content format:
+      Line 1: model_name (or 'auto')
+      Line 2+: the problem description
+    """
+    from src.ai_interaction import _resolve_model, AI_CHAT_TIMEOUT
+    from src.llm_core import llm_call_async
+    from src.settings import get_setting
+
+    lines = content.strip().split("\n", 1)
+    model_spec = lines[0].strip() if lines else "auto"
+    problem = lines[1].strip() if len(lines) > 1 else ""
+
+    if not problem:
+        return {"error": "No problem description provided"}
+
+    if model_spec.lower() in ("auto", ""):
+        model_spec = get_setting("teacher_model", "")
+        if not model_spec:
+            return {"error": "No teacher model configured. Specify a model name or set teacher_model in settings."}
+
+    try:
+        url, model, headers = _resolve_model(model_spec, owner=owner)
+    except ValueError as e:
+        return {"error": str(e)}
+
+    try:
+        response = await llm_call_async(
+            url, model,
+            [
+                {"role": "system", "content": _TEACHER_SYSTEM_PROMPT},
+                {"role": "user", "content": f"Problem:\n{problem}"},
+            ],
+            headers=headers,
+            timeout=AI_CHAT_TIMEOUT,
+        )
+        if len(response) > 8000:
+            response = response[:8000] + "\n... (truncated)"
+        return {"model": model, "response": response, "teacher": True}
+    except Exception as e:
+        logger.error(f"ask_teacher failed: {e}")
+        return {"error": f"Teacher call failed ({model_spec}): {e}"}
+
+
+async def list_models(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
+    """List all available models across configured endpoints.
+
+    Content = optional filter keyword.
+    """
+    import json
+    import httpx
+    from src.database import SessionLocal, ModelEndpoint
+    from src.llm_core import _detect_provider, ANTHROPIC_MODELS
+    from src.auth_helpers import owner_filter
+    from src.endpoint_resolver import resolve_endpoint_runtime, build_headers, build_models_url
+
+    keyword = content.strip().lower() if content.strip() else None
+
+    db = SessionLocal()
+    try:
+        query = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True)
+        if owner:
+            query = owner_filter(query, ModelEndpoint, owner)
+        endpoints = query.all()
+        if not endpoints:
+            return {"results": "No enabled model endpoints configured."}
+
+        result_lines = []
+        total_models = 0
+
+        for ep in endpoints:
+            try:
+                base, api_key = resolve_endpoint_runtime(ep, owner=owner)
+            except Exception:
+                continue
+            provider = _detect_provider(base)
+            headers = build_headers(api_key, base)
+
+            model_ids = []
+            if provider == "anthropic":
+                model_ids = list(ANTHROPIC_MODELS)
+            else:
+                try:
+                    models_url = build_models_url(base)
+                    if models_url:
+                        r = httpx.get(models_url, headers=headers, timeout=5)
+                        r.raise_for_status()
+                        data = r.json()
+                        model_ids = [m.get("id") for m in (data.get("data") or []) if m.get("id")]
+                        if not model_ids:
+                            model_ids = [
+                                m.get("name") or m.get("model")
+                                for m in (data.get("models") or [])
+                                if m.get("name") or m.get("model")
+                            ]
+                    else:
+                        model_ids = json.loads(ep.cached_models or "[]")
+                except Exception:
+                    model_ids = ["(endpoint offline)"]
+
+            if keyword:
+                model_ids = [m for m in model_ids if keyword in m.lower() or keyword in (ep.name or "").lower()]
+
+            if model_ids:
+                result_lines.append(f"\n**{ep.name or base}** ({provider}):")
+                for mid in model_ids:
+                    result_lines.append(f"  - `{mid}`")
+                    total_models += 1
+
+        if not result_lines:
+            return {"results": "No models found" + (f" matching '{keyword}'" if keyword else "") + "."}
+
+        header = f"Available models ({total_models} total):"
+        return {"results": header + "\n".join(result_lines)}
+    except Exception as e:
+        logger.error(f"list_models failed: {e}")
+        return {"error": str(e)}
+    finally:
+        db.close()
+
+
+# ---------------------------------------------------------------------------
+# Handler classes registered in TOOL_HANDLERS
+# ---------------------------------------------------------------------------
+
+class ChatWithModelTool:
+    async def execute(self, content: str, ctx: dict) -> Dict:
+        return await chat_with_model(content, ctx.get("session_id"), owner=ctx.get("owner"))
+
+
+class AskTeacherTool:
+    async def execute(self, content: str, ctx: dict) -> Dict:
+        return await ask_teacher(content, ctx.get("session_id"), owner=ctx.get("owner"))
+
+
+class ListModelsTool:
+    async def execute(self, content: str, ctx: dict) -> Dict:
+        return await list_models(content, ctx.get("session_id"), owner=ctx.get("owner"))
@@ -0,0 +1,464 @@
+"""session_tools.py - agent tools for AI-to-AI session management.
+
+Owns create_session, list_sessions, send_to_session and manage_session, moved
+out of src.ai_interaction as part of the tool -> registry migration (#3629), and
+their handler classes registered in TOOL_HANDLERS.
+
+The session manager is a runtime-set singleton in src.ai_interaction, so each
+function fetches it via get_session_manager() (imported here); _resolve_model and
+AI_CHAT_TIMEOUT are reused from there too.
+"""
+import json
+import logging
+import uuid
+from typing import Dict, Optional
+
+from src.ai_interaction import get_session_manager, _resolve_model, AI_CHAT_TIMEOUT
+
+logger = logging.getLogger(__name__)
+
+
+async def create_session(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
+    """Create a new chat session.
+
+    Content format:
+      Line 1: session name
+      Line 2: model_name (or model_name@endpoint_name)
+    """
+    _session_manager = get_session_manager()
+    if not _session_manager:
+        return {"error": "Session manager not available"}
+
+    lines = content.strip().split("\n")
+    if len(lines) < 2:
+        return {"error": "Need 2 lines: session name, then model spec"}
+
+    name = lines[0].strip()
+    model_spec = lines[1].strip()
+
+    if not name:
+        return {"error": "Session name cannot be empty"}
+
+    try:
+        url, model, headers = _resolve_model(model_spec, owner=owner)
+    except ValueError as e:
+        return {"error": str(e)}
+
+    sid = str(uuid.uuid4())[:8]
+    try:
+        _session_manager.create_session(
+            session_id=sid,
+            name=name,
+            endpoint_url=url,
+            model=model,
+            rag=False,
+            owner=owner,
+        )
+        # Store headers on session for future calls
+        sess = _session_manager.get_session(sid)
+        if sess and headers:
+            sess.headers = headers
+        try:
+            from src.event_bus import fire_event
+            fire_event("session_created", owner)
+        except Exception:
+            logger.debug("session_created event dispatch failed", exc_info=True)
+
+        return {"session_id": sid, "name": name, "model": model, "endpoint_url": url}
+    except Exception as e:
+        logger.error(f"create_session failed: {e}")
+        return {"error": f"Failed to create session: {e}"}
+
+async def list_sessions(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
+    """List sessions sorted by most-recently-active first.
+
+    Output includes a relative "last active" timestamp per row so the
+    agent can answer "open my last chat" without guessing from titles.
+    The most-recent session is always first in the list.
+
+    Content = optional filter keyword (matches session name).
+    """
+    _session_manager = get_session_manager()
+    if not _session_manager:
+        return {"error": "Session manager not available"}
+
+    keyword = content.strip().lower() if content.strip() else None
+
+    try:
+        from core.database import SessionLocal, Session as DbSession
+        from datetime import datetime, timezone
+
+        # Pull every session's last_accessed from the DB so we can sort
+        # by recency. In-memory sessions hold name + model + msg_count;
+        # the DB row holds the timestamps.
+        db = SessionLocal()
+        try:
+            db_rows = {r.id: r for r in db.query(DbSession).all()}
+        finally:
+            db.close()
+
+        # SECURITY: scope to the caller's sessions. Passing None returned
+        # every user's sessions, which the agent tool then exposed via the
+        # "list my chats" reply.
+        sessions = _session_manager.get_sessions_for_user(owner)
+        rows = []
+        for sid, sess in sessions.items():
+            if keyword and keyword not in (sess.name or "").lower():
+                continue
+            db_row = db_rows.get(sid)
+            # Prefer last_accessed; fall back to updated_at, then created_at.
+            ts = None
+            if db_row:
+                ts = getattr(db_row, 'last_accessed', None) or getattr(db_row, 'updated_at', None) or getattr(db_row, 'created_at', None)
+            rows.append((ts, sid, sess))
+
+        # Sort by timestamp DESC; rows without a timestamp sink to the bottom.
+        rows.sort(key=lambda r: r[0] or datetime.min, reverse=True)
+
+        def _rel(ts):
+            if not ts:
+                return 'never'
+            now = datetime.utcnow()
+            try:
+                if ts.tzinfo is not None:
+                    now = datetime.now(timezone.utc)
+                diff = (now - ts).total_seconds()
+            except Exception:
+                return 'unknown'
+            if diff < 60: return 'just now'
+            if diff < 3600: return f'{int(diff / 60)}m ago'
+            if diff < 86400: return f'{int(diff / 3600)}h ago'
+            if diff < 86400 * 7: return f'{int(diff / 86400)}d ago'
+            return ts.strftime('%Y-%m-%d')
+
+        lines = []
+        for i, (ts, sid, sess) in enumerate(rows):
+            if i >= 50:
+                lines.append(f"... and {len(rows) - 50} more (showing first 50)")
+                break
+            safe_name = (sess.name or "Untitled").replace("[", "\\[").replace("]", "\\]")
+            msg_count = getattr(sess, "message_count", 0) or 0
+            model = getattr(sess, "model", "unknown")
+            marker = " ← most recent" if i == 0 else ""
+            lines.append(f"- **[{safe_name}](#session-{sid})** (id: `{sid}`, model: {model}, {msg_count} msgs, last active {_rel(ts)}){marker}")
+
+        if not lines:
+            return {"results": "No sessions found" + (f" matching '{keyword}'" if keyword else "") + "."}
+
+        return {
+            "results": (
+                f"Found {len(rows)} session(s), sorted most-recent first:\n"
+                + "\n".join(lines)
+                + "\n\nAssistant: when replying to the user, preserve the chat-title markdown links exactly as shown, e.g. `[Chat](#session-id)`. Do not rewrite this as a plain, non-clickable table."
+            )
+        }
+    except Exception as e:
+        logger.error(f"list_sessions failed: {e}")
+        return {"error": str(e)}
+
+async def send_to_session(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
+    """Send a message to an existing session and get a response.
+
+    Content format:
+      Line 1: session_id
+      Line 2+: message
+    """
+    _session_manager = get_session_manager()
+    from src.llm_core import llm_call_async
+    from core.models import ChatMessage
+
+    if not _session_manager:
+        return {"error": "Session manager not available"}
+
+    lines = content.strip().split("\n", 1)
+    if len(lines) < 2:
+        return {"error": "Need 2 lines: session_id, then message"}
+
+    target_sid = lines[0].strip()
+    message = lines[1].strip()
+
+    sess = _session_manager.get_session(target_sid)
+    if not sess:
+        return {"error": f"Session '{target_sid}' not found"}
+
+    # Owner-scope: reject access to another user's session
+    if owner and getattr(sess, "owner", None) and sess.owner != owner:
+        return {"error": f"Session '{target_sid}' not found"}
+
+    if not message:
+        return {"error": "No message provided"}
+
+    try:
+        # Build context from session history
+        context = sess.get_context_messages()
+        context.append({"role": "user", "content": message})
+
+        response = await llm_call_async(
+            sess.endpoint_url, sess.model, context,
+            headers=sess.headers,
+            timeout=AI_CHAT_TIMEOUT,
+        )
+
+        # Save both messages to session
+        sess.add_message(ChatMessage("user", message))
+        sess.add_message(ChatMessage("assistant", response))
+
+        # Truncate for tool output
+        if len(response) > 10000:
+            response = response[:10000] + "\n... (truncated)"
+
+        return {
+            "session_id": target_sid,
+            "session_name": sess.name,
+            "response": response,
+        }
+    except Exception as e:
+        logger.error(f"send_to_session failed: {e}")
+        return {"error": f"Failed to send to session: {e}"}
+
+async def manage_session(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
+    """Manage sessions: rename, archive, delete, important, truncate, fork.
+
+    Content format:
+      Line 1: action (rename|archive|unarchive|delete|important|unimportant|truncate|fork)
+      Line 2: target session_id (or "current" to use the active session)
+      Line 3+: action-specific params (e.g. new name for rename, keep_count for truncate)
+    """
+    _session_manager = get_session_manager()
+    if not _session_manager:
+        return {"error": "Session manager not available"}
+
+    from src.database import SessionLocal, Session as DbSession
+
+    # Accept BOTH the structured JSON args the tool schema advertises
+    # ({action, session_id, value}) AND the legacy line-based format
+    # (line1=action, line2=session_id, line3=value). Native function-calling
+    # models send JSON; fenced-block callers send lines. Previously only the
+    # line format was parsed, so a model that followed the schema (JSON) got
+    # "Need at least 2 lines" / "Rename needs line 3" and couldn't drive it.
+    _raw = (content or "").strip()
+    action = ""
+    target_sid = ""
+    value = None      # the action param: new name (rename) / keep_count (truncate, fork)
+    _list_filter = ""
+    _parsed = None
+    if _raw.startswith("{"):
+        try:
+            _parsed = json.loads(_raw)
+        except Exception:
+            _parsed = None
+    if isinstance(_parsed, dict):
+        action = str(_parsed.get("action") or "").strip().lower()
+        target_sid = str(_parsed.get("session_id") or _parsed.get("session") or _parsed.get("id") or "").strip()
+        _v = _parsed.get("value")
+        if _v is None:
+            _v = (_parsed.get("name") or _parsed.get("new_name")
+                  or _parsed.get("title") or _parsed.get("keep_count"))
+        value = None if _v is None else str(_v).strip()
+        _list_filter = str(_parsed.get("filter") or "").strip()
+    else:
+        lines = _raw.split("\n")
+        if not lines or not lines[0].strip():
+            return {"error": "Missing action (rename|archive|delete|important|truncate|fork|list|switch)"}
+        action = lines[0].strip().lower()
+        target_sid = lines[1].strip() if len(lines) >= 2 else ""
+        value = lines[2].strip() if len(lines) >= 3 else None
+        _list_filter = "\n".join(lines[1:]).strip()
+
+    if not action:
+        return {"error": "Missing action (rename|archive|delete|important|truncate|fork|list|switch)"}
+
+    # `list` alias - dispatch to list_sessions so the agent's natural
+    # first guess (every other manage_* tool has a `list` action) works.
+    if action == "list":
+        return await list_sessions(_list_filter, session_id, owner=owner)
+
+    if not target_sid:
+        return {"error": "Need a session_id (or 'current' for the active chat)"}
+
+    # Allow "current" to refer to the active session
+    if target_sid.lower() == "current" and session_id:
+        target_sid = session_id
+
+    # `switch` / `open` / `select` / `view` - the agent reaches for
+    # these when the user asks to "open" or "switch to" a session.
+    # There's no server-side way to make the browser navigate, so we
+    # just return a clickable anchor link the user can click. The
+    # frontend's chat-history click delegate routes `#session-<id>`
+    # to selectSession(). The agent's reply naturally embeds this
+    # result so the user sees a single clickable line.
+    def _session_query(db):
+        query = db.query(DbSession).filter(DbSession.id == target_sid)
+        if owner is not None:
+            query = query.filter(DbSession.owner == owner)
+        return query
+
+    if action in ("switch", "open", "select", "view"):
+        db = SessionLocal()
+        try:
+            db_sess = _session_query(db).first()
+            if not db_sess:
+                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
+            name = db_sess.name or target_sid
+        finally:
+            db.close()
+        return {
+            "action": action,
+            "session_id": target_sid,
+            "name": name,
+            "results": f"[{name}](#session-{target_sid}) - click to open.",
+        }
+
+    db = SessionLocal()
+    try:
+        if action == "rename":
+            if not value:
+                return {"error": "rename needs a new name (the `value` arg, or line 3 in the legacy format)"}
+            new_name = value
+            db_sess = _session_query(db).first()
+            if not db_sess:
+                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
+            db_sess.name = new_name
+            db.commit()
+            _session_manager.update_session_name(target_sid, new_name)
+            return {"action": "rename", "session_id": target_sid, "name": new_name,
+                    "results": f"Session renamed to '{new_name}'"}
+
+        elif action == "archive":
+            db_sess = _session_query(db).first()
+            if not db_sess:
+                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
+            db_sess.archived = True
+            db.commit()
+            return {"action": "archive", "session_id": target_sid,
+                    "results": f"Session '{db_sess.name}' archived"}
+
+        elif action == "unarchive":
+            db_sess = _session_query(db).first()
+            if not db_sess:
+                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
+            db_sess.archived = False
+            db.commit()
+            return {"action": "unarchive", "session_id": target_sid,
+                    "results": f"Session '{db_sess.name}' unarchived"}
+
+        elif action == "delete":
+            if target_sid == session_id:
+                return {"error": "Cannot delete the current session while chatting in it. Delete other sessions first."}
+            db_sess = _session_query(db).first()
+            if not db_sess:
+                return {"error": f"Session '{target_sid}' not found. Refusing to delete an unknown chat id; use the exact id from list_sessions."}
+            if db_sess and db_sess.is_important:
+                return {"error": f"Session '{db_sess.name}' is starred/favorited. Unstar it first before deleting."}
+            try:
+                ok = _session_manager.delete_session(target_sid)
+                if not ok:
+                    return {"error": f"Session '{target_sid}' was not deleted because it no longer exists."}
+                return {"action": "delete", "session_id": target_sid,
+                        "results": f"Session '{db_sess.name or target_sid}' deleted"}
+            except Exception as e:
+                return {"error": f"Failed to delete session: {e}"}
+
+        elif action in ("important", "unimportant"):
+            is_important = action == "important"
+            db_sess = _session_query(db).first()
+            if not db_sess:
+                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
+            # Prevent AI from unstarring sessions - only the user can do that manually
+            if not is_important and db_sess.is_important:
+                return {"error": f"Session '{db_sess.name}' is starred by the user. Only the user can unstar sessions manually."}
+            db_sess.is_important = is_important
+            db.commit()
+            status = "marked as important" if is_important else "unmarked as important"
+            return {"action": action, "session_id": target_sid,
+                    "results": f"Session '{db_sess.name}' {status}"}
+
+        elif action == "truncate":
+            db_sess = _session_query(db).first()
+            if not db_sess:
+                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
+            keep_count = 10
+            if value:
+                try:
+                    keep_count = int(value)
+                except ValueError:
+                    pass
+            success = _session_manager.truncate_messages(target_sid, keep_count)
+            if success:
+                return {"action": "truncate", "session_id": target_sid,
+                        "results": f"Session truncated to last {keep_count} messages"}
+            return {"error": f"Failed to truncate session '{target_sid}'"}
+
+        elif action == "fork":
+            db_sess = _session_query(db).first()
+            if not db_sess:
+                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
+            keep_count = 0  # 0 = all messages
+            if value:
+                try:
+                    keep_count = int(value)
+                except ValueError:
+                    pass
+
+            source = _session_manager.get_session(target_sid)
+            if not source:
+                return {"error": f"Session '{target_sid}' not found"}
+
+            new_sid = str(uuid.uuid4())[:8]
+            _session_manager.create_session(
+                session_id=new_sid,
+                name=f"Fork: {source.name}",
+                endpoint_url=source.endpoint_url,
+                model=source.model,
+                rag=False,
+                owner=owner,
+            )
+            # Copy messages
+            history = source.get_context_messages()
+            if keep_count > 0:
+                history = history[:keep_count]
+            from core.models import ChatMessage as InMemoryMsg
+            new_sess = _session_manager.get_session(new_sid)
+            for msg in history:
+                new_sess.add_message(InMemoryMsg(msg["role"], msg["content"]))
+            try:
+                from src.event_bus import fire_event
+                fire_event("session_created", owner)
+            except Exception:
+                logger.debug("session_created event dispatch failed", exc_info=True)
+
+            return {"action": "fork", "session_id": new_sid,
+                    "source_session": target_sid, "messages_copied": len(history),
+                    "results": f"Forked session '{source.name}' -> new session {new_sid} ({len(history)} messages)"}
+
+        else:
+            return {"error": f"Unknown action '{action}'. Use: list, switch, rename, archive, unarchive, delete, important, unimportant, truncate, fork"}
+    except Exception as e:
+        logger.error(f"manage_session failed: {e}")
+        return {"error": str(e)}
+    finally:
+        db.close()
+
+
+# ---------------------------------------------------------------------------
+# Handler classes registered in TOOL_HANDLERS
+# ---------------------------------------------------------------------------
+
+class CreateSessionTool:
+    async def execute(self, content: str, ctx: dict) -> Dict:
+        return await create_session(content, ctx.get("session_id"), owner=ctx.get("owner"))
+
+
+class ListSessionsTool:
+    async def execute(self, content: str, ctx: dict) -> Dict:
+        return await list_sessions(content, ctx.get("session_id"), owner=ctx.get("owner"))
+
+
+class SendToSessionTool:
+    async def execute(self, content: str, ctx: dict) -> Dict:
+        return await send_to_session(content, ctx.get("session_id"), owner=ctx.get("owner"))
+
+
+class ManageSessionTool:
+    async def execute(self, content: str, ctx: dict) -> Dict:
+        return await manage_session(content, ctx.get("session_id"), owner=ctx.get("owner"))
@@ -79,13 +79,23 @@ class WebSearchTool:
 class WebFetchTool:
    async def execute(self, content: str, ctx: dict) -> dict:
        from src.search.content import fetch_webpage_content
+        from src.constants import WEB_FETCH_HARD_MAX_BYTES
        raw = content.strip()
        url = ""
+        max_bytes = None
        if raw.startswith("{"):
            try:
                parsed = json.loads(raw)
                if isinstance(parsed, dict):
                    url = str(parsed.get("url") or "").strip()
+                    # Download-budget override (#3812): "full": true raises the
+                    # budget to the hard cap; an explicit max_bytes is clamped
+                    # to the hard cap downstream. Default stays the soft cap.
+                    if parsed.get("full") is True:
+                        max_bytes = WEB_FETCH_HARD_MAX_BYTES
+                    mb = parsed.get("max_bytes")
+                    if isinstance(mb, int) and mb > 0:
+                        max_bytes = mb
            except json.JSONDecodeError:
                url = ""
        if not url:
@@ -100,7 +110,7 @@ class WebFetchTool:
        loop = asyncio.get_running_loop()
        try:
            result = await asyncio.wait_for(
-                loop.run_in_executor(None, lambda: fetch_webpage_content(url, timeout=10)),
+                loop.run_in_executor(None, lambda: fetch_webpage_content(url, timeout=10, max_bytes=max_bytes)),
                timeout=30,
            )
        except asyncio.TimeoutError:
@@ -116,8 +126,28 @@ class WebFetchTool:
                return {"error": f"web_fetch: {url}: {err}", "exit_code": 1}
            return {"error": f"web_fetch: {url}: no readable text content (not HTML, or the page needs JS/login)", "exit_code": 1}

+        # Tell the model when the download budget cut the body short and how
+        # to get the rest, instead of silently presenting a partial page as
+        # the whole thing.
+        size_note = ""
+        if result.get("truncated"):
+            fetched = result.get("fetched_bytes") or 0
+            total = result.get("total_bytes")
+            total_txt = f" of {total:,} bytes" if total else ""
+            size_note = (
+                f"[partial content: download stopped at {fetched:,} bytes{total_txt}. "
+                f'Re-call with {{"url": "{url}", "full": true}} to fetch up to '
+                f"{WEB_FETCH_HARD_MAX_BYTES:,} bytes.]\n\n"
+            )
+
+        # The notice must lead the output so the MAX_OUTPUT_CHARS trim below can
+        # never drop it. The title is untrusted, uncapped page content, so a
+        # giant title ahead of the notice could push it out of range; keep the
+        # notice first and cap the title as a second guard.
+        if len(title) > 300:
+            title = title[:300] + "..."
        header = (f"# {title}\n" if title else "") + f"Source: {url}\n\n"
-        output = header + text
+        output = size_note + header + text
        if len(output) > MAX_OUTPUT_CHARS:
            output = output[:MAX_OUTPUT_CHARS] + "\n\n[...truncated]"
        return {"output": output, "exit_code": 0}
@@ -1,8 +1,14 @@
 """
 ai_interaction.py

-AI-to-AI interaction tools: chat_with_model, create_session, list_sessions,
-send_to_session, pipeline.
+AI-to-AI interaction tools: pipeline and manage_memory, plus shared model
+resolution (_resolve_model), the session-manager singleton, and dispatch_ai_tool.
+
+As part of the tool -> registry migration (#3629), chat_with_model, ask_teacher
+and list_models moved to src/agent_tools/model_interaction_tools.py, and
+create_session, list_sessions, send_to_session and manage_session moved to
+src/agent_tools/session_tools.py. Those modules reuse get_session_manager /
+_resolve_model / AI_CHAT_TIMEOUT from here.

 These are agent tools — the LLM writes fenced code blocks and they execute
 through the standard agent_tools.py pipeline.
@@ -159,440 +165,6 @@ def _resolve_model(spec: str, owner: Optional[str] = None) -> Tuple[str, str, Di
 # Tool implementations
 # ---------------------------------------------------------------------------

-async def do_chat_with_model(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
-    """Send a message to a specific model and return its response.
-
-    Content format:
-      Line 1: model_name (or model_name@endpoint_name)
-      Line 2+: the message to send
-    """
-    from src.llm_core import llm_call_async
-
-    lines = content.strip().split("\n", 1)
-    if not lines or not lines[0].strip():
-        return {"error": "First line must be the model name"}
-
-    model_spec = lines[0].strip()
-    message = lines[1].strip() if len(lines) > 1 else ""
-    if not message:
-        return {"error": "No message provided (line 2+ is the message)"}
-
-    try:
-        url, model, headers = _resolve_model(model_spec, owner=owner)
-    except ValueError as e:
-        return {"error": str(e)}
-
-    try:
-        response = await llm_call_async(
-            url, model,
-            [{"role": "user", "content": message}],
-            headers=headers,
-            timeout=AI_CHAT_TIMEOUT,
-        )
-        # Truncate very long responses
-        if len(response) > 10000:
-            response = response[:10000] + "\n... (truncated)"
-        return {"model": model, "response": response}
-    except Exception as e:
-        logger.error(f"chat_with_model failed: {e}")
-        return {"error": f"Failed to get response from {model_spec}: {e}"}
-
-
-_TEACHER_SYSTEM_PROMPT = (
-    "You are a senior AI mentor. A less capable model is stuck on a problem and asking for help. "
-    "Provide clear, actionable guidance:\n"
-    "1. Brief analysis of the problem\n"
-    "2. Recommended approach (step by step)\n"
-    "3. Key things to watch out for\n\n"
-    "Be concise and practical. No preamble."
-)
-
-
-async def do_ask_teacher(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
-    """Ask a more capable model for help.
-
-    Content format:
-      Line 1: model_name (or 'auto')
-      Line 2+: the problem description
-    """
-    from src.llm_core import llm_call_async
-    from src.settings import get_setting
-
-    lines = content.strip().split("\n", 1)
-    model_spec = lines[0].strip() if lines else "auto"
-    problem = lines[1].strip() if len(lines) > 1 else ""
-
-    if not problem:
-        return {"error": "No problem description provided"}
-
-    if model_spec.lower() in ("auto", ""):
-        model_spec = get_setting("teacher_model", "")
-        if not model_spec:
-            return {"error": "No teacher model configured. Specify a model name or set teacher_model in settings."}
-
-    try:
-        url, model, headers = _resolve_model(model_spec, owner=owner)
-    except ValueError as e:
-        return {"error": str(e)}
-
-    try:
-        response = await llm_call_async(
-            url, model,
-            [
-                {"role": "system", "content": _TEACHER_SYSTEM_PROMPT},
-                {"role": "user", "content": f"Problem:\n{problem}"},
-            ],
-            headers=headers,
-            timeout=AI_CHAT_TIMEOUT,
-        )
-        if len(response) > 8000:
-            response = response[:8000] + "\n... (truncated)"
-        return {"model": model, "response": response, "teacher": True}
-    except Exception as e:
-        logger.error(f"ask_teacher failed: {e}")
-        return {"error": f"Teacher call failed ({model_spec}): {e}"}
-
-
-async def do_second_opinion(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
-    """Get a second opinion from another model, then have the original model
-    evaluate the feedback and produce a unified version.
-
-    Content format:
-      Line 1: model_name (or model_name@endpoint_name)
-      Line 2+ (optional): specific question or focus area
-
-    Flow:
-      1. Pull recent conversation context
-      2. Send to reviewer model → get honest feedback
-      3. Send feedback back to the session's own model → evaluate & unify
-      4. Return both the review and the unified response
-    """
-    from src.llm_core import llm_call_async
-
-    lines = content.strip().split("\n", 1)
-    if not lines or not lines[0].strip():
-        return {"error": "First line must be the model name"}
-
-    model_spec = lines[0].strip()
-    focus = lines[1].strip() if len(lines) > 1 else ""
-
-    try:
-        reviewer_url, reviewer_model, reviewer_headers = _resolve_model(model_spec, owner=owner)
-    except ValueError as e:
-        return {"error": str(e)}
-
-    # Pull recent conversation context from current session
-    context_text = ""
-    sess = None
-    if session_id and _session_manager:
-        sess = _session_manager.get_session(session_id)
-        if sess:
-            messages = sess.get_context_messages()
-            recent = messages[-15:] if len(messages) > 15 else messages
-            parts = []
-            for m in recent:
-                role = m.get("role", "unknown").upper()
-                text = m.get("content", "")
-                if isinstance(text, list):
-                    text = " ".join(
-                        p.get("text", "") for p in text if isinstance(p, dict)
-                    )
-                if text:
-                    parts.append(f"[{role}]: {text[:2000]}")
-            context_text = "\n\n".join(parts)
-
-    if not context_text:
-        return {"error": "No conversation context found to review"}
-
-    # ── Step 1: Get the reviewer's feedback ──
-    reviewer_system = (
-        "You are giving a second opinion on a conversation between a user and an AI assistant. "
-        "Your job is to be genuinely helpful and honest — not a yes-man, but not a contrarian either.\n\n"
-        "Guidelines:\n"
-        "- If the plan/idea is solid, say so clearly. Don't manufacture problems that aren't there.\n"
-        "- If you spot a real flaw, blind spot, or simpler approach — call it out directly.\n"
-        "- Be practical. Don't over-engineer or over-analyze. Real-world tradeoffs matter.\n"
-        "- If there's a meaningfully better way to do something, suggest it concretely.\n"
-        "- Give credit where it's due — highlight what's working well.\n"
-        "- Keep it concise and actionable. No fluff.\n"
-        "- You're a second pair of eyes, not a professor grading a paper."
-    )
-
-    reviewer_message = f"Here's the conversation so far:\n\n{context_text}"
-    if focus:
-        reviewer_message += f"\n\n---\nSpecifically, I want your take on: {focus}"
-    else:
-        reviewer_message += "\n\n---\nGive me your honest second opinion on what's being discussed."
-
-    try:
-        review = await llm_call_async(
-            reviewer_url, reviewer_model,
-            [
-                {"role": "system", "content": reviewer_system},
-                {"role": "user", "content": reviewer_message},
-            ],
-            headers=reviewer_headers,
-            timeout=AI_CHAT_TIMEOUT,
-        )
-        if len(review) > 8000:
-            review = review[:8000] + "\n... (truncated)"
-    except Exception as e:
-        logger.error(f"second_opinion reviewer call failed: {e}")
-        return {"error": f"Failed to get second opinion from {model_spec}: {e}"}
-
-    # ── Step 2: Send review back to session's own model for evaluation ──
-    unified = ""
-    original_model = "unknown"
-    if sess:
-        original_url = sess.endpoint_url
-        original_model = sess.model
-        original_headers = getattr(sess, "headers", None) or {}
-
-        unify_system = (
-            "Another AI model just reviewed the conversation you've been having with the user. "
-            "Read their feedback carefully, then respond with:\n\n"
-            "1. **What you agree with** — acknowledge valid points honestly.\n"
-            "2. **What you disagree with** — explain why, briefly.\n"
-            "3. **Unified version** — produce an updated/refined version of whatever was being discussed, "
-            "incorporating the feedback you found valid. Don't accept every note blindly — "
-            "use your judgment on what actually improves things vs what's unnecessary.\n\n"
-            "Be concise and practical. The user wants a better result, not a meta-discussion."
-        )
-
-        unify_message = (
-            f"Here's the conversation context:\n\n{context_text}\n\n"
-            f"---\n\n"
-            f"**Review from {reviewer_model}:**\n\n{review}\n\n"
-            f"---\n\n"
-            f"Evaluate this feedback and produce a unified improved version."
-        )
-
-        try:
-            unified = await llm_call_async(
-                original_url, original_model,
-                [
-                    {"role": "system", "content": unify_system},
-                    {"role": "user", "content": unify_message},
-                ],
-                headers=original_headers,
-                timeout=AI_CHAT_TIMEOUT,
-            )
-            if len(unified) > 10000:
-                unified = unified[:10000] + "\n... (truncated)"
-        except Exception as e:
-            logger.error(f"second_opinion unify call failed: {e}")
-            unified = f"(Failed to get unified response: {e})"
-
-    # Build combined result
-    combined = (
-        f"## Second Opinion from {reviewer_model}\n\n{review}"
-        f"\n\n---\n\n"
-        f"## {original_model}'s Response\n\n{unified}"
-    )
-
-    return {
-        "model": reviewer_model,
-        "response": combined,
-        "instruction": "Present these results to the user exactly as they are. Do NOT call second_opinion again. The user can continue the conversation from here.",
-    }
-
-
-async def do_create_session(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
-    """Create a new chat session.
-
-    Content format:
-      Line 1: session name
-      Line 2: model_name (or model_name@endpoint_name)
-    """
-    if not _session_manager:
-        return {"error": "Session manager not available"}
-
-    lines = content.strip().split("\n")
-    if len(lines) < 2:
-        return {"error": "Need 2 lines: session name, then model spec"}
-
-    name = lines[0].strip()
-    model_spec = lines[1].strip()
-
-    if not name:
-        return {"error": "Session name cannot be empty"}
-
-    try:
-        url, model, headers = _resolve_model(model_spec, owner=owner)
-    except ValueError as e:
-        return {"error": str(e)}
-
-    sid = str(uuid.uuid4())[:8]
-    try:
-        _session_manager.create_session(
-            session_id=sid,
-            name=name,
-            endpoint_url=url,
-            model=model,
-            rag=False,
-            owner=owner,
-        )
-        # Store headers on session for future calls
-        sess = _session_manager.get_session(sid)
-        if sess and headers:
-            sess.headers = headers
-        try:
-            from src.event_bus import fire_event
-            fire_event("session_created", owner)
-        except Exception:
-            logger.debug("session_created event dispatch failed", exc_info=True)
-
-        return {"session_id": sid, "name": name, "model": model, "endpoint_url": url}
-    except Exception as e:
-        logger.error(f"create_session failed: {e}")
-        return {"error": f"Failed to create session: {e}"}
-
-
-async def do_list_sessions(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
-    """List sessions sorted by most-recently-active first.
-
-    Output includes a relative "last active" timestamp per row so the
-    agent can answer "open my last chat" without guessing from titles.
-    The most-recent session is always first in the list.
-
-    Content = optional filter keyword (matches session name).
-    """
-    if not _session_manager:
-        return {"error": "Session manager not available"}
-
-    keyword = content.strip().lower() if content.strip() else None
-
-    try:
-        from core.database import SessionLocal, Session as DbSession
-        from datetime import datetime, timezone
-
-        # Pull every session's last_accessed from the DB so we can sort
-        # by recency. In-memory sessions hold name + model + msg_count;
-        # the DB row holds the timestamps.
-        db = SessionLocal()
-        try:
-            db_rows = {r.id: r for r in db.query(DbSession).all()}
-        finally:
-            db.close()
-
-        # SECURITY: scope to the caller's sessions. Passing None returned
-        # every user's sessions, which the agent tool then exposed via the
-        # "list my chats" reply.
-        sessions = _session_manager.get_sessions_for_user(owner)
-        rows = []
-        for sid, sess in sessions.items():
-            if keyword and keyword not in (sess.name or "").lower():
-                continue
-            db_row = db_rows.get(sid)
-            # Prefer last_accessed; fall back to updated_at, then created_at.
-            ts = None
-            if db_row:
-                ts = getattr(db_row, 'last_accessed', None) or getattr(db_row, 'updated_at', None) or getattr(db_row, 'created_at', None)
-            rows.append((ts, sid, sess))
-
-        # Sort by timestamp DESC; rows without a timestamp sink to the bottom.
-        rows.sort(key=lambda r: r[0] or datetime.min, reverse=True)
-
-        def _rel(ts):
-            if not ts:
-                return 'never'
-            now = datetime.utcnow()
-            try:
-                if ts.tzinfo is not None:
-                    now = datetime.now(timezone.utc)
-                diff = (now - ts).total_seconds()
-            except Exception:
-                return 'unknown'
-            if diff < 60: return 'just now'
-            if diff < 3600: return f'{int(diff / 60)}m ago'
-            if diff < 86400: return f'{int(diff / 3600)}h ago'
-            if diff < 86400 * 7: return f'{int(diff / 86400)}d ago'
-            return ts.strftime('%Y-%m-%d')
-
-        lines = []
-        for i, (ts, sid, sess) in enumerate(rows):
-            if i >= 50:
-                lines.append(f"... and {len(rows) - 50} more (showing first 50)")
-                break
-            safe_name = (sess.name or "Untitled").replace("[", "\\[").replace("]", "\\]")
-            msg_count = getattr(sess, "message_count", 0) or 0
-            model = getattr(sess, "model", "unknown")
-            marker = " ← most recent" if i == 0 else ""
-            lines.append(f"- **[{safe_name}](#session-{sid})** (id: `{sid}`, model: {model}, {msg_count} msgs, last active {_rel(ts)}){marker}")
-
-        if not lines:
-            return {"results": "No sessions found" + (f" matching '{keyword}'" if keyword else "") + "."}
-
-        return {
-            "results": (
-                f"Found {len(rows)} session(s), sorted most-recent first:\n"
-                + "\n".join(lines)
-                + "\n\nAssistant: when replying to the user, preserve the chat-title markdown links exactly as shown, e.g. `[Chat](#session-id)`. Do not rewrite this as a plain, non-clickable table."
-            )
-        }
-    except Exception as e:
-        logger.error(f"list_sessions failed: {e}")
-        return {"error": str(e)}
-
-
-async def do_send_to_session(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
-    """Send a message to an existing session and get a response.
-
-    Content format:
-      Line 1: session_id
-      Line 2+: message
-    """
-    from src.llm_core import llm_call_async
-    from core.models import ChatMessage
-
-    if not _session_manager:
-        return {"error": "Session manager not available"}
-
-    lines = content.strip().split("\n", 1)
-    if len(lines) < 2:
-        return {"error": "Need 2 lines: session_id, then message"}
-
-    target_sid = lines[0].strip()
-    message = lines[1].strip()
-
-    sess = _session_manager.get_session(target_sid)
-    if not sess:
-        return {"error": f"Session '{target_sid}' not found"}
-
-    # Owner-scope: reject access to another user's session
-    if owner and getattr(sess, "owner", None) and sess.owner != owner:
-        return {"error": f"Session '{target_sid}' not found"}
-
-    if not message:
-        return {"error": "No message provided"}
-
-    try:
-        # Build context from session history
-        context = sess.get_context_messages()
-        context.append({"role": "user", "content": message})
-
-        response = await llm_call_async(
-            sess.endpoint_url, sess.model, context,
-            headers=sess.headers,
-            timeout=AI_CHAT_TIMEOUT,
-        )
-
-        # Save both messages to session
-        sess.add_message(ChatMessage("user", message))
-        sess.add_message(ChatMessage("assistant", response))
-
-        # Truncate for tool output
-        if len(response) > 10000:
-            response = response[:10000] + "\n... (truncated)"
-
-        return {
-            "session_id": target_sid,
-            "session_name": sess.name,
-            "response": response,
-        }
-    except Exception as e:
-        logger.error(f"send_to_session failed: {e}")
-        return {"error": f"Failed to send to session: {e}"}


 async def stream_ai_tool(tool: str, content: str, session_id: Optional[str] = None, owner: Optional[str] = None):
@@ -715,229 +287,6 @@ async def do_pipeline(content: str, session_id: Optional[str] = None, owner: Opt
 # Session management tool
 # ---------------------------------------------------------------------------

-async def do_manage_session(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
-    """Manage sessions: rename, archive, delete, important, truncate, fork.
-
-    Content format:
-      Line 1: action (rename|archive|unarchive|delete|important|unimportant|truncate|fork)
-      Line 2: target session_id (or "current" to use the active session)
-      Line 3+: action-specific params (e.g. new name for rename, keep_count for truncate)
-    """
-    if not _session_manager:
-        return {"error": "Session manager not available"}
-
-    from src.database import SessionLocal, Session as DbSession
-
-    # Accept BOTH the structured JSON args the tool schema advertises
-    # ({action, session_id, value}) AND the legacy line-based format
-    # (line1=action, line2=session_id, line3=value). Native function-calling
-    # models send JSON; fenced-block callers send lines. Previously only the
-    # line format was parsed, so a model that followed the schema (JSON) got
-    # "Need at least 2 lines" / "Rename needs line 3" and couldn't drive it.
-    _raw = (content or "").strip()
-    action = ""
-    target_sid = ""
-    value = None      # the action param: new name (rename) / keep_count (truncate, fork)
-    _list_filter = ""
-    _parsed = None
-    if _raw.startswith("{"):
-        try:
-            _parsed = json.loads(_raw)
-        except Exception:
-            _parsed = None
-    if isinstance(_parsed, dict):
-        action = str(_parsed.get("action") or "").strip().lower()
-        target_sid = str(_parsed.get("session_id") or _parsed.get("session") or _parsed.get("id") or "").strip()
-        _v = _parsed.get("value")
-        if _v is None:
-            _v = (_parsed.get("name") or _parsed.get("new_name")
-                  or _parsed.get("title") or _parsed.get("keep_count"))
-        value = None if _v is None else str(_v).strip()
-        _list_filter = str(_parsed.get("filter") or "").strip()
-    else:
-        lines = _raw.split("\n")
-        if not lines or not lines[0].strip():
-            return {"error": "Missing action (rename|archive|delete|important|truncate|fork|list|switch)"}
-        action = lines[0].strip().lower()
-        target_sid = lines[1].strip() if len(lines) >= 2 else ""
-        value = lines[2].strip() if len(lines) >= 3 else None
-        _list_filter = "\n".join(lines[1:]).strip()
-
-    if not action:
-        return {"error": "Missing action (rename|archive|delete|important|truncate|fork|list|switch)"}
-
-    # `list` alias — dispatch to do_list_sessions so the agent's natural
-    # first guess (every other manage_* tool has a `list` action) works.
-    if action == "list":
-        return await do_list_sessions(_list_filter, session_id, owner=owner)
-
-    if not target_sid:
-        return {"error": "Need a session_id (or 'current' for the active chat)"}
-
-    # Allow "current" to refer to the active session
-    if target_sid.lower() == "current" and session_id:
-        target_sid = session_id
-
-    # `switch` / `open` / `select` / `view` — the agent reaches for
-    # these when the user asks to "open" or "switch to" a session.
-    # There's no server-side way to make the browser navigate, so we
-    # just return a clickable anchor link the user can click. The
-    # frontend's chat-history click delegate routes `#session-<id>`
-    # to selectSession(). The agent's reply naturally embeds this
-    # result so the user sees a single clickable line.
-    def _session_query(db):
-        query = db.query(DbSession).filter(DbSession.id == target_sid)
-        if owner is not None:
-            query = query.filter(DbSession.owner == owner)
-        return query
-
-    if action in ("switch", "open", "select", "view"):
-        db = SessionLocal()
-        try:
-            db_sess = _session_query(db).first()
-            if not db_sess:
-                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
-            name = db_sess.name or target_sid
-        finally:
-            db.close()
-        return {
-            "action": action,
-            "session_id": target_sid,
-            "name": name,
-            "results": f"[{name}](#session-{target_sid}) — click to open.",
-        }
-
-    db = SessionLocal()
-    try:
-        if action == "rename":
-            if not value:
-                return {"error": "rename needs a new name (the `value` arg, or line 3 in the legacy format)"}
-            new_name = value
-            db_sess = _session_query(db).first()
-            if not db_sess:
-                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
-            db_sess.name = new_name
-            db.commit()
-            _session_manager.update_session_name(target_sid, new_name)
-            return {"action": "rename", "session_id": target_sid, "name": new_name,
-                    "results": f"Session renamed to '{new_name}'"}
-
-        elif action == "archive":
-            db_sess = _session_query(db).first()
-            if not db_sess:
-                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
-            db_sess.archived = True
-            db.commit()
-            return {"action": "archive", "session_id": target_sid,
-                    "results": f"Session '{db_sess.name}' archived"}
-
-        elif action == "unarchive":
-            db_sess = _session_query(db).first()
-            if not db_sess:
-                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
-            db_sess.archived = False
-            db.commit()
-            return {"action": "unarchive", "session_id": target_sid,
-                    "results": f"Session '{db_sess.name}' unarchived"}
-
-        elif action == "delete":
-            if target_sid == session_id:
-                return {"error": "Cannot delete the current session while chatting in it. Delete other sessions first."}
-            db_sess = _session_query(db).first()
-            if not db_sess:
-                return {"error": f"Session '{target_sid}' not found. Refusing to delete an unknown chat id; use the exact id from list_sessions."}
-            if db_sess and db_sess.is_important:
-                return {"error": f"Session '{db_sess.name}' is starred/favorited. Unstar it first before deleting."}
-            try:
-                ok = _session_manager.delete_session(target_sid)
-                if not ok:
-                    return {"error": f"Session '{target_sid}' was not deleted because it no longer exists."}
-                return {"action": "delete", "session_id": target_sid,
-                        "results": f"Session '{db_sess.name or target_sid}' deleted"}
-            except Exception as e:
-                return {"error": f"Failed to delete session: {e}"}
-
-        elif action in ("important", "unimportant"):
-            is_important = action == "important"
-            db_sess = _session_query(db).first()
-            if not db_sess:
-                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
-            # Prevent AI from unstarring sessions — only the user can do that manually
-            if not is_important and db_sess.is_important:
-                return {"error": f"Session '{db_sess.name}' is starred by the user. Only the user can unstar sessions manually."}
-            db_sess.is_important = is_important
-            db.commit()
-            status = "marked as important" if is_important else "unmarked as important"
-            return {"action": action, "session_id": target_sid,
-                    "results": f"Session '{db_sess.name}' {status}"}
-
-        elif action == "truncate":
-            db_sess = _session_query(db).first()
-            if not db_sess:
-                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
-            keep_count = 10
-            if value:
-                try:
-                    keep_count = int(value)
-                except ValueError:
-                    pass
-            success = _session_manager.truncate_messages(target_sid, keep_count)
-            if success:
-                return {"action": "truncate", "session_id": target_sid,
-                        "results": f"Session truncated to last {keep_count} messages"}
-            return {"error": f"Failed to truncate session '{target_sid}'"}
-
-        elif action == "fork":
-            db_sess = _session_query(db).first()
-            if not db_sess:
-                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
-            keep_count = 0  # 0 = all messages
-            if value:
-                try:
-                    keep_count = int(value)
-                except ValueError:
-                    pass
-
-            source = _session_manager.get_session(target_sid)
-            if not source:
-                return {"error": f"Session '{target_sid}' not found"}
-
-            new_sid = str(uuid.uuid4())[:8]
-            _session_manager.create_session(
-                session_id=new_sid,
-                name=f"Fork: {source.name}",
-                endpoint_url=source.endpoint_url,
-                model=source.model,
-                rag=False,
-                owner=owner,
-            )
-            # Copy messages
-            history = source.get_context_messages()
-            if keep_count > 0:
-                history = history[:keep_count]
-            from core.models import ChatMessage as InMemoryMsg
-            new_sess = _session_manager.get_session(new_sid)
-            for msg in history:
-                new_sess.add_message(InMemoryMsg(msg["role"], msg["content"]))
-            try:
-                from src.event_bus import fire_event
-                fire_event("session_created", owner)
-            except Exception:
-                logger.debug("session_created event dispatch failed", exc_info=True)
-
-            return {"action": "fork", "session_id": new_sid,
-                    "source_session": target_sid, "messages_copied": len(history),
-                    "results": f"Forked session '{source.name}' -> new session {new_sid} ({len(history)} messages)"}
-
-        else:
-            return {"error": f"Unknown action '{action}'. Use: list, switch, rename, archive, unarchive, delete, important, unimportant, truncate, fork"}
-    except Exception as e:
-        logger.error(f"manage_session failed: {e}")
-        return {"error": str(e)}
-    finally:
-        db.close()
-
-
 # ---------------------------------------------------------------------------
 # Memory management tool
 # ---------------------------------------------------------------------------
@@ -1104,83 +453,6 @@ async def do_manage_memory(content: str, session_id: Optional[str] = None, owner
        return {"error": f"Unknown action '{action}'. Use: list, add, edit, delete, search"}


-# ---------------------------------------------------------------------------
-# List models tool
-# ---------------------------------------------------------------------------
-
-async def do_list_models(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
-    """List all available models across configured endpoints.
-
-    Content = optional filter keyword.
-    """
-    import httpx
-    from src.database import SessionLocal, ModelEndpoint
-    from src.llm_core import _detect_provider, ANTHROPIC_MODELS
-    from src.auth_helpers import owner_filter
-
-    keyword = content.strip().lower() if content.strip() else None
-
-    db = SessionLocal()
-    try:
-        query = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True)
-        if owner:
-            query = owner_filter(query, ModelEndpoint, owner)
-        endpoints = query.all()
-        if not endpoints:
-            return {"results": "No enabled model endpoints configured."}
-
-        result_lines = []
-        total_models = 0
-
-        for ep in endpoints:
-            try:
-                base, api_key = resolve_endpoint_runtime(ep, owner=owner)
-            except Exception:
-                continue
-            provider = _detect_provider(base)
-            headers = build_headers(api_key, base)
-
-            model_ids = []
-            if provider == "anthropic":
-                model_ids = list(ANTHROPIC_MODELS)
-            else:
-                try:
-                    models_url = build_models_url(base)
-                    if models_url:
-                        r = httpx.get(models_url, headers=headers, timeout=5)
-                        r.raise_for_status()
-                        data = r.json()
-                        model_ids = [m.get("id") for m in (data.get("data") or []) if m.get("id")]
-                        if not model_ids:
-                            model_ids = [
-                                m.get("name") or m.get("model")
-                                for m in (data.get("models") or [])
-                                if m.get("name") or m.get("model")
-                            ]
-                    else:
-                        model_ids = json.loads(ep.cached_models or "[]")
-                except Exception:
-                    model_ids = ["(endpoint offline)"]
-
-            if keyword:
-                model_ids = [m for m in model_ids if keyword in m.lower() or keyword in (ep.name or "").lower()]
-
-            if model_ids:
-                result_lines.append(f"\n**{ep.name or base}** ({provider}):")
-                for mid in model_ids:
-                    result_lines.append(f"  - `{mid}`")
-                    total_models += 1
-
-        if not result_lines:
-            return {"results": "No models found" + (f" matching '{keyword}'" if keyword else "") + "."}
-
-        header = f"Available models ({total_models} total):"
-        return {"results": header + "\n".join(result_lines)}
-    except Exception as e:
-        logger.error(f"list_models failed: {e}")
-        return {"error": str(e)}
-    finally:
-        db.close()


 # ---------------------------------------------------------------------------
@@ -1613,7 +885,9 @@ async def do_generate_image(content: str, session_id: Optional[str] = None, owne
    """
    import base64
    import httpx
+    import os
    from pathlib import Path
+    from src.url_safety import check_outbound_url

    lines = content.strip().split("\n")
    prompt = lines[0].strip() if lines else ""
@@ -1779,8 +1053,15 @@ async def do_generate_image(content: str, session_id: Optional[str] = None, owne

            elif img.get("url"):
                # Download external URL and save locally (DALL-E returns temp URLs)
+                result_url = img["url"]
+                ok, reason = check_outbound_url(
+                    result_url,
+                    block_private=os.getenv("IMAGE_BLOCK_PRIVATE_IPS", "false").lower() == "true",
+                )
+                if not ok:
+                    return {"error": f"Image API returned unsafe image URL: {reason}"}
                try:
-                    dl_resp = httpx.get(img["url"], timeout=60)
+                    dl_resp = httpx.get(result_url, timeout=60)
                    if dl_resp.status_code == 200:
                        img_dir = Path(GENERATED_IMAGES_DIR)
                        img_dir.mkdir(parents=True, exist_ok=True)
@@ -1790,10 +1071,10 @@ async def do_generate_image(content: str, session_id: Optional[str] = None, owne
                        image_url = f"/api/generated-image/{filename}"
                        image_id = _save_to_gallery(filename)
                    else:
-                        image_url = img["url"]  # fallback to external URL
+                        image_url = result_url  # fallback to external URL
                except Exception as _dl_e:
                    logger.warning(f"Failed to download DALL-E image: {_dl_e}")
-                    image_url = img["url"]  # fallback to external URL
+                    image_url = result_url  # fallback to external URL
            else:
                return {"error": "Image API returned unexpected format (no b64_json or url)"}

@@ -1822,55 +1103,20 @@ async def dispatch_ai_tool(
 ) -> Tuple[str, Dict]:
    """Dispatch an AI interaction tool. Returns (description, result_dict)."""

-    if tool == "chat_with_model":
-        model_spec = content.split("\n")[0].strip()[:60]
-        desc = f"chat_with_model: {model_spec}"
-        result = await do_chat_with_model(content, session_id, owner=owner)
-
-    elif tool == "create_session":
-        name = content.split("\n")[0].strip()[:60]
-        desc = f"create_session: {name}"
-        result = await do_create_session(content, session_id, owner=owner)
-
-    elif tool == "list_sessions":
-        keyword = content.strip()[:40]
-        desc = f"list_sessions{': ' + keyword if keyword else ''}"
-        result = await do_list_sessions(content, session_id, owner=owner)
-
-    elif tool == "send_to_session":
-        sid = content.split("\n")[0].strip()[:20]
-        desc = f"send_to_session: {sid}"
-        result = await do_send_to_session(content, session_id, owner=owner)
-
-    elif tool == "pipeline":
+    if tool == "pipeline":
        desc = "pipeline: running steps"
        result = await do_pipeline(content, session_id, owner=owner)

-    elif tool == "manage_session":
-        action = content.split("\n")[0].strip()[:40]
-        desc = f"manage_session: {action}"
-        result = await do_manage_session(content, session_id, owner=owner)
-
    elif tool == "manage_memory":
        action = content.split("\n")[0].strip()[:40]
        desc = f"manage_memory: {action}"
        result = await do_manage_memory(content, session_id, owner=owner)

-    elif tool == "list_models":
-        keyword = content.strip()[:40]
-        desc = f"list_models{': ' + keyword if keyword else ''}"
-        result = await do_list_models(content, session_id, owner=owner)
-
    elif tool == "ui_control":
        action = content.split("\n")[0].strip()[:60]
        desc = f"ui_control: {action}"
        result = await do_ui_control(content, session_id, owner=owner)

-    elif tool == "ask_teacher":
-        problem = content.split("\n", 1)[-1].strip()[:60]
-        desc = f"ask_teacher: {problem}"
-        result = await do_ask_teacher(content, session_id, owner=owner)
-
    else:
        desc = f"unknown ai tool: {tool}"
        result = {"error": f"Unknown AI interaction tool: {tool}"}
@@ -263,10 +263,32 @@ def list_for_session(session_id: str) -> List[Dict[str, Any]]:
    return [r for r in refresh().values() if r.get("session_id") == session_id]


+def kill(job_id: str) -> Optional[Dict[str, Any]]:
+    """Terminate a running job's process tree and mark it killed. Returns the
+    updated record, or None if the id is unknown. Idempotent: a job that already
+    finished is returned unchanged. Sets followed_up so the monitor does not also
+    fire an auto-continue for a job the agent deliberately stopped."""
+    jobs = _load()
+    rec = jobs.get(job_id)
+    if rec is None:
+        return None
+    if rec.get("status") == "running":
+        _kill(rec.get("pid"))
+        rec["status"] = "failed"
+        rec["exit_code"] = -1
+        rec["ended_at"] = time.time()
+        rec["killed"] = True
+        rec["followed_up"] = True
+        _save(jobs)
+    return rec
+
+
 def result_text(rec: Dict[str, Any]) -> str:
    """Human/agent-readable summary of a finished job, for the follow-up."""
    out = _read_output(rec)
-    if rec.get("timed_out"):
+    if rec.get("killed"):
+        head = "Background job was killed."
+    elif rec.get("timed_out"):
        head = f"Background job timed out after {rec.get('max_runtime_s')}s."
    elif rec.get("died"):
        head = "Background job process died unexpectedly (no exit code)."
@@ -14,6 +14,7 @@ import subprocess
 import sys

 from core.platform_compat import IS_WINDOWS, which_tool
+from src.runtime_paths import get_app_root

 logger = logging.getLogger(__name__)

@@ -81,7 +82,7 @@ _BUILTIN_NPX_SERVERS = {
        "name": "Built-in: Browser",
        "command": "npx",
        "args": ["-y", "@playwright/mcp@latest", "--headless", "--caps", "vision"],
-    },
+    }
 }

 # Global flag to disable MCP if there are compatibility issues
@@ -94,7 +95,7 @@ async def register_builtin_servers(mcp_manager):
        logger.info("Built-in MCP servers disabled via ODYSSEUS_DISABLE_MCP")
        return

-    base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    base_dir = get_app_root()
    python = sys.executable

    async def _connect_python_server(server_id: str, script_path: str, name: str):
@@ -5,6 +5,7 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
 from pydantic import Field, field_validator

 from src.constants import DATA_DIR as _DATA_DIR_CONST
+from src.runtime_paths import get_app_root

 # Cross-platform OS flag, exposed here so callers can `from src.config import
 # IS_WINDOWS`. Defined locally (a trivial `os.name == "nt"`) rather than imported
@@ -19,7 +20,7 @@ IS_WINDOWS = os.name == "nt"
 class DataConfig(BaseSettings):
    """Configuration for data storage and file handling."""
    # Base directory
-    base_dir: Path = Field(default=Path(__file__).parent.parent, description="Base directory for the application")
+    base_dir: Path = Field(default=Path(get_app_root()), description="Base directory for the application")
    
    # Data paths
    data_dir: Path = Field(default=Path(_DATA_DIR_CONST), description="Main data directory")
@@ -138,7 +139,7 @@ class AppConfig(BaseSettings):
        if isinstance(v, dict) and "base_dir" in v:
            base_dir = v["base_dir"]
        else:
-            base_dir = Path(__file__).parent.parent
+            base_dir = Path(get_app_root())
        
        # Convert string paths to Path objects relative to base_dir
        data_dir = Path(_DATA_DIR_CONST)
@@ -2,12 +2,14 @@
 """Application-wide constants and configuration values."""
 import os

+from src.runtime_paths import get_app_root, get_default_data_dir
+
 APP_VERSION = "1.0.1"

 # Base paths
-BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + "/"
+BASE_DIR = os.path.join(get_app_root(), "")
 STATIC_DIR = os.path.join(BASE_DIR, "static")
-DATA_DIR = os.getenv("ODYSSEUS_DATA_DIR", os.path.join(BASE_DIR, "data"))
+DATA_DIR = os.getenv("ODYSSEUS_DATA_DIR", get_default_data_dir())

 # Data file paths
 # Single source of truth: every persisted file/dir lives under DATA_DIR, which
@@ -55,7 +57,13 @@ MEMORY_VECTORS_DIR = os.path.join(DATA_DIR, "memory_vectors")

 # Paths with an intentional dedicated env override, defaulting under DATA_DIR.
 MAIL_ATTACHMENTS_DIR = os.getenv("ODYSSEUS_MAIL_ATTACHMENTS_DIR", os.path.join(DATA_DIR, "mail-attachments"))
-FASTEMBED_CACHE_DIR = os.getenv("FASTEMBED_CACHE_PATH", os.path.join(DATA_DIR, "fastembed_cache"))
+# `or` (not os.getenv's default arg) so a PRESENT-but-EMPTY value falls back to
+# the default. docker-compose.yml injects `FASTEMBED_CACHE_PATH=${FASTEMBED_CACHE_PATH:-}`,
+# which sets the var to "" when the host hasn't defined it. os.getenv(name, default)
+# only returns the default when the var is ABSENT, so the empty string would win →
+# os.makedirs("") raises [Errno 2] No such file or directory: '' → FastEmbed fails to
+# init and all vector features (RAG, semantic memory, tool index) silently degrade.
+FASTEMBED_CACHE_DIR = os.getenv("FASTEMBED_CACHE_PATH") or os.path.join(DATA_DIR, "fastembed_cache")

 # Agent tool output limits (single source of truth — imported by tool_execution.py,
 # tool_implementations.py, agent_tools.py, and any other module that needs them)
@@ -63,11 +71,26 @@ MAX_OUTPUT_CHARS = 10_000       # cap for bash/python/web_search/web_fetch outpu
 MAX_READ_CHARS = 20_000         # cap for read_file / document preview
 MAX_DIFF_LINES = 400            # cap for edit_file unified-diff display

+# web_fetch response-size policy (#3812). MAX_OUTPUT_CHARS above only trims
+# what the agent SEES; these caps bound what the server downloads, parses,
+# and writes to the content cache. The soft cap is the default download
+# budget; the agent can raise it per call (full/max_bytes) but never past
+# the hard cap, so a model can't decide to pull a multi-GB file.
+WEB_FETCH_SOFT_MAX_BYTES = 2_000_000    # default download budget (2 MB)
+WEB_FETCH_HARD_MAX_BYTES = 20_000_000   # absolute ceiling, even with override (20 MB)
+
 # API Configuration
 MAX_CONTEXT_MESSAGES = 90
 REQUEST_TIMEOUT = 20
 OPENAI_COMPAT_PATH = "/v1/chat/completions"

+# Outbound UA for web_fetch / web_search scraping; common desktop UA so pages serve normal HTML.
+WEB_FETCH_USER_AGENT = os.environ.get(
+    "WEB_FETCH_USER_AGENT",
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
+    "(KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36",
+)
+
 # Environment variables with defaults
 DEFAULT_HOST = os.getenv("LLM_HOST", "localhost")
 LLM_HOSTS = [h.strip() for h in os.getenv("LLM_HOSTS", "").split(",") if h.strip()]
@@ -79,6 +102,9 @@ SEARXNG_INSTANCE = os.getenv("SEARXNG_INSTANCE", "http://localhost:8080")
 CLEANUP_ENABLED = os.getenv("CLEANUP_ENABLED", "True").lower() == "true"
 CLEANUP_INTERVAL_HOURS = int(os.getenv("CLEANUP_INTERVAL_HOURS", "24"))

+# Auth policy
+PASSWORD_MIN_LENGTH = 8
+
 # Default parameters
 DEFAULT_TEMPERATURE = 1.0
 DEFAULT_MAX_TOKENS = 0
@@ -161,11 +161,13 @@ async def _tick() -> None:
    # Re-read state once before writing so we capture any updates from
    # concurrent UI syncs.
    stopped_any = False
+    successfully_stopped_sids = set()
    for sid, host, port in to_stop:
        ok = await _stop_serve(sid, host, port)
        logger.info(f"cookbook_serve_lifecycle: stop {sid} (host={host or 'local'}): {'ok' if ok else 'failed'}")
        if ok:
            stopped_any = True
+            successfully_stopped_sids.add(sid)
            # Drop the auto-registered endpoint so the model picker and
            # the chat router don't keep pointing at a dead server.
            for t in tasks:
@@ -188,12 +190,11 @@ async def _tick() -> None:
            except Exception:
                fresh = state
                fresh_tasks = tasks
-            stopped_sids = {sid for sid, _, _ in to_stop}
            for ft in fresh_tasks:
                if not isinstance(ft, dict):
                    continue
                ft_sid = ft.get("sessionId") or ft.get("id")
-                if ft_sid in stopped_sids:
+                if ft_sid in successfully_stopped_sids:
                    ft["status"] = "stopped"
                    ft["_scheduledStopAtMs"] = None
                    ft["_lastStatusFlipAt"] = now_ms
@@ -31,6 +31,8 @@ import numpy as np
 import httpx
 from typing import List, Optional

+from src.runtime_paths import get_app_root
+
 logger = logging.getLogger(__name__)

 _DEFAULT_MODEL = "all-minilm:l6-v2"
@@ -161,6 +161,32 @@ def normalize_base(url: str) -> str:
    return url


+def _validated_endpoint_base(url: str) -> str:
+    """Return a base URL that is safe for endpoint path appends."""
+    base = (url or "").strip().rstrip("/")
+    if "?" in base or "#" in base:
+        raise ValueError("Endpoint base URL must not include query or fragment")
+    return urlunparse(urlparse(base)._replace(query="", fragment="")).rstrip("/")
+
+
+def _prepare_endpoint_base(base: str) -> str:
+    base = _validated_endpoint_base(normalize_base(base))
+    return _validated_endpoint_base(normalize_base(resolve_url(base)))
+
+
+def _append_endpoint_path(base: str, suffix: str) -> str:
+    parsed = urlparse(base)
+    current = (parsed.path or "").rstrip("/")
+    extra = "/" + suffix.lstrip("/")
+    path = f"{current}{extra}" if current else extra
+    return urlunparse(parsed._replace(path=path, query="", fragment=""))
+
+
+def _pathless_host(base: str, host: str) -> bool:
+    parsed = urlparse(base)
+    return (parsed.hostname or "").lower() == host and not (parsed.path or "").strip("/")
+
+
 def _anthropic_api_root(base: str) -> str:
    """Return Anthropic's API root, preserving /v1 for OpenAI-compatible APIs elsewhere."""
    base = (base or "").strip().rstrip("/")
@@ -171,15 +197,17 @@ def _anthropic_api_root(base: str) -> str:

 def build_chat_url(base: str) -> str:
    """Return the correct chat endpoint URL for a given base."""
-    base = resolve_url(base)
+    base = _prepare_endpoint_base(base)
    provider = _detect_provider(base)
    if provider == "anthropic":
-        return _anthropic_api_root(base) + "/v1/messages"
+        return _append_endpoint_path(_anthropic_api_root(base), "/v1/messages")
    if provider == "ollama":
-        return _ollama_api_root(base) + "/chat"
+        return _append_endpoint_path(_ollama_api_root(base), "/chat")
    if provider == "chatgpt-subscription":
-        return base.rstrip("/") + "/responses"
-    return base + "/chat/completions"
+        return _append_endpoint_path(base, "/responses")
+    if _pathless_host(base, "api.openai.com"):
+        base = _append_endpoint_path(base, "/v1")
+    return _append_endpoint_path(base, "/chat/completions")


 def build_models_url(base: str) -> Optional[str]:
@@ -193,12 +221,12 @@ def build_models_url(base: str) -> Optional[str]:
    untouched (so custom prefixes like ``/openai`` or ``/api/openai/v1`` keep
    their semantics).
    """
-    base = normalize_base(resolve_url(base))
+    base = _prepare_endpoint_base(base)
    provider = _detect_provider(base)
    if provider == "anthropic":
-        return _anthropic_api_root(base) + "/v1/models"
+        return _append_endpoint_path(_anthropic_api_root(base), "/v1/models")
    if provider == "ollama":
-        return _ollama_api_root(base) + "/tags"
+        return _append_endpoint_path(_ollama_api_root(base), "/tags")
    if provider == "chatgpt-subscription":
        return None
    # Generic OpenAI-compatible fallback: local model servers with no explicit
@@ -208,10 +236,10 @@ def build_models_url(base: str) -> Optional[str]:
    parsed = urlparse(base)
    host = (parsed.hostname or "").lower()
    is_local = host in {"localhost", "127.0.0.1", "::1", "host.docker.internal"}
-    uses_v1_models_by_default = is_local or host in {"api.deepseek.com"}
+    uses_v1_models_by_default = is_local or host in {"api.deepseek.com", "api.openai.com"}
    if not parsed.path and uses_v1_models_by_default:
-        base = base + "/v1"
-    return base + "/models"
+        base = _append_endpoint_path(base, "/v1")
+    return _append_endpoint_path(base, "/models")


 def build_headers(api_key: Optional[str], base: str) -> Dict[str, str]:
@@ -4,6 +4,7 @@ import uuid
 import logging
 import re
 from typing import Dict, List, Optional, Any
+from urllib.parse import urljoin, urlparse, urlunparse

 import httpx
 from fastapi import HTTPException
@@ -202,6 +203,22 @@ def mask_integration_secret(integration: Dict[str, Any]) -> Dict[str, Any]:
    return safe


+def _normalize_integration_base_url(base_url: Any) -> str:
+    if not isinstance(base_url, str) or not base_url.strip():
+        raise ValueError("Integration base URL is required")
+    cleaned = base_url.strip().rstrip("/")
+    if "?" in cleaned or "#" in cleaned:
+        raise ValueError("Integration base URL must not include query or fragment")
+    parsed = urlparse(cleaned)
+    if parsed.scheme.lower() not in ("http", "https") or not parsed.hostname:
+        raise ValueError("Integration base URL must be an HTTP(S) URL")
+    return urlunparse(parsed._replace(scheme=parsed.scheme.lower(), query="", fragment="")).rstrip("/")
+
+
+def _join_integration_url(base_url: str, path: str) -> str:
+    return urljoin(base_url.rstrip("/") + "/", path.lstrip("/"))
+
+
 def load_integrations() -> List[Dict[str, Any]]:
    """Load all integrations from disk with secrets decrypted for runtime use."""
    if not os.path.exists(DATA_FILE):
@@ -261,8 +278,10 @@ def add_integration(data: Dict[str, Any]) -> Dict[str, Any]:

    if not isinstance(integration.get("name"), str) or not integration["name"].strip():
        raise HTTPException(400, "Integration name is required")
-    if not isinstance(integration.get("base_url"), str) or not integration["base_url"].strip():
-        raise HTTPException(400, "Integration base URL is required")
+    try:
+        integration["base_url"] = _normalize_integration_base_url(integration.get("base_url"))
+    except ValueError as exc:
+        raise HTTPException(400, str(exc)) from exc

    integrations = load_integrations()
    integrations.append(integration)
@@ -272,10 +291,14 @@ def add_integration(data: Dict[str, Any]) -> Dict[str, Any]:

 def update_integration(integration_id: str, data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
    """Update fields on an existing integration. Returns updated integration or None."""
+    data = dict(data)
    if "name" in data and (not isinstance(data["name"], str) or not data["name"].strip()):
        raise HTTPException(400, "Integration name is required")
-    if "base_url" in data and (not isinstance(data["base_url"], str) or not data["base_url"].strip()):
-        raise HTTPException(400, "Integration base URL is required")
+    if "base_url" in data:
+        try:
+            data["base_url"] = _normalize_integration_base_url(data["base_url"])
+        except ValueError as exc:
+            raise HTTPException(400, str(exc)) from exc

    integrations = load_integrations()
    for item in integrations:
@@ -341,9 +364,10 @@ async def execute_api_call(
    if not integration.get("enabled", True):
        return {"error": f"Integration '{integration.get('name')}' is disabled", "exit_code": 1}

-    base_url = integration.get("base_url", "").rstrip("/")
-    if not base_url:
-        return {"error": "Integration has no base_url configured", "exit_code": 1}
+    try:
+        base_url = _normalize_integration_base_url(integration.get("base_url", ""))
+    except ValueError as exc:
+        return {"error": str(exc), "exit_code": 1}

    # Strip common API path suffixes users might accidentally include
    # (e.g. "http://host/v1/" → "http://host"). The integration's preset
@@ -366,7 +390,10 @@ async def execute_api_call(
    if re.search(r"^https?://", path) or "://" in path:
        return {"error": "Path must not contain a protocol scheme", "exit_code": 1}

-    url = base_url + path
+    if "#" in path:
+        return {"error": "Path must not contain a fragment", "exit_code": 1}
+
+    url = _join_integration_url(base_url, path)
    method = method.upper()

    # Build headers
@@ -87,7 +87,7 @@ _host_health_lock = threading.Lock()
 _model_activity: Dict[str, float] = {}

 _HARMONY_MARKER_RE = re.compile(
-    r"<\|channel\|>(analysis|final)"
+    r"<\|channel\|>(analysis|commentary|final)"
    r"|<\|start\|>(?:assistant|system|user|tool)?"
    r"|<\|message\|>"
    r"|<\|end\|>"
@@ -96,6 +96,7 @@ _HARMONY_MARKER_RE = re.compile(
 )
 _HARMONY_MARKERS = (
    "<|channel|>analysis",
+    "<|channel|>commentary",
    "<|channel|>final",
    "<|start|>assistant",
    "<|start|>system",
@@ -145,7 +146,10 @@ class _HarmonyStreamRouter:
            out.append((text, False))
            return
        if self._in_message:
-            out.append((text, self._channel == "analysis"))
+            # analysis + commentary (tool-call preambles / function-arg bodies)
+            # are internal, not user-facing — route them to thinking so they
+            # don't leak into the visible answer; only `final` is visible.
+            out.append((text, self._channel in ("analysis", "commentary")))

    def _handle_marker(self, match: re.Match[str]) -> None:
        marker = match.group(0)
@@ -283,7 +287,8 @@ def _is_ollama_native_url(url: str) -> bool:
    """Return True for native Ollama API URLs, including Ollama Cloud."""
    try:
        parsed = urlparse(url or "")
-    except Exception:
+    except Exception as e:
+        logger.warning("Failed to parse URL for Ollama detection", exc_info=e)
        return False
    host = parsed.hostname or ""
    path = (parsed.path or "").rstrip("/")
@@ -1345,8 +1350,8 @@ def list_model_ids(
                r = httpx.get(root + "/api/tags", timeout=timeout)
                r.raise_for_status()
                return [m.get("name") or m.get("model") for m in (r.json().get("models") or []) if m.get("name") or m.get("model")]
-        except Exception:
-            pass
+        except Exception as e:
+            logger.warning("Failed to fetch model list from configured endpoint", exc_info=e)
        return []

 def normalize_model_id(
@@ -11,6 +11,8 @@ import os
 import re
 from typing import Any, Dict, List, Optional, Set, Tuple

+from src.runtime_paths import get_app_root
+
 logger = logging.getLogger(__name__)

 def _format_mcp_connection_error(name: str, command: str = "", args: Optional[List[str]] = None, error: Exception = None) -> str:
@@ -508,7 +510,7 @@ class McpManager:
            return False

        script_rel, name = _BUILTIN_SERVERS[server_id]
-        base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+        base_dir = get_app_root()
        script_path = os.path.join(base_dir, script_rel)

        # Clean up old connection
@@ -17,10 +17,11 @@ import httpx
 logger = logging.getLogger(__name__)

 _LOCAL_HOSTS = {"localhost", "127.0.0.1", "0.0.0.0", "::1", "host.docker.internal"}
-_PRIVATE_PREFIXES = ("10.", "172.16.", "172.17.", "172.18.", "172.19.",
-                     "172.20.", "172.21.", "172.22.", "172.23.", "172.24.",
-                     "172.25.", "172.26.", "172.27.", "172.28.", "172.29.",
-                     "172.30.", "172.31.", "192.168.")
+_PRIVATE_NETWORKS = (
+    ipaddress.ip_network("10.0.0.0/8"),
+    ipaddress.ip_network("172.16.0.0/12"),
+    ipaddress.ip_network("192.168.0.0/16"),
+)

 # Tailscale uses the CGNAT range 100.64.0.0/10, NOT all of 100.0.0.0/8.
 # A bare "100." prefix would classify public addresses (e.g. AWS ranges
@@ -36,6 +37,14 @@ def _in_tailscale_range(host: str) -> bool:
        return False


+def _is_private_ip_literal(host: str) -> bool:
+    try:
+        ip = ipaddress.ip_address(host)
+    except ValueError:
+        return False
+    return any(ip in network for network in _PRIVATE_NETWORKS)
+
+
 def _normalize_base_for_compare(url: str) -> str:
    url = (url or "").strip().rstrip("/")
    for suffix in ("/chat/completions", "/models", "/completions", "/v1/messages"):
@@ -87,7 +96,7 @@ def is_local_endpoint(url: str) -> bool:
        return True
    try:
        host = urlparse(url).hostname or ""
-        return host in _LOCAL_HOSTS or host.startswith(_PRIVATE_PREFIXES) or _in_tailscale_range(host)
+        return host in _LOCAL_HOSTS or _is_private_ip_literal(host) or _in_tailscale_range(host)
    except Exception:
        return False

@@ -322,6 +322,47 @@ class PersonalDocsManager:
        else:
            logger.info(f"Directory not in index: {directory}")

+    def rename_directory(self, old_directory: str, new_directory: str, *, path_map: Dict[str, str] = None):
+        """Rewrite tracked directory and excluded-file paths after an owner rename."""
+        old_directory = os.path.abspath(old_directory)
+        new_directory = os.path.abspath(new_directory)
+        path_map = {os.path.abspath(k): os.path.abspath(v) for k, v in (path_map or {}).items()}
+
+        def rewrite(path: str) -> str:
+            abs_path = os.path.abspath(path)
+            mapped = path_map.get(abs_path)
+            if mapped:
+                return mapped
+            if abs_path == old_directory:
+                return new_directory
+            if abs_path.startswith(old_directory + os.sep):
+                return new_directory + abs_path[len(old_directory):]
+            return abs_path
+
+        changed_dirs = False
+        rewritten_dirs = []
+        for directory in self.indexed_directories:
+            rewritten = rewrite(directory)
+            changed_dirs = changed_dirs or rewritten != os.path.abspath(directory)
+            if rewritten not in rewritten_dirs:
+                rewritten_dirs.append(rewritten)
+        if changed_dirs:
+            self.indexed_directories = rewritten_dirs
+            self.save_directories()
+
+        changed_excluded = False
+        rewritten_excluded = set()
+        for path in self.excluded_files:
+            rewritten = rewrite(path)
+            changed_excluded = changed_excluded or rewritten != os.path.abspath(path)
+            rewritten_excluded.add(rewritten)
+        if changed_excluded:
+            self.excluded_files = rewritten_excluded
+            self._save_excluded()
+
+        if changed_dirs or changed_excluded:
+            self.refresh_index()
+
    def get_indexed_directories(self):
        """Get the list of all indexed directories."""
        return self.indexed_directories.copy()
@@ -7,6 +7,7 @@ import time
 from pathlib import Path

 from src.constants import RAG_DIR
+from src.runtime_paths import get_app_root

 logger = logging.getLogger(__name__)

@@ -50,6 +50,23 @@ def _generate_doc_id(text: str, owner: str = "") -> str:
    return f"doc_{hashlib.sha256(key.encode('utf-8')).hexdigest()[:16]}"


+def _rewrite_owner_path(value: str, path_map: Dict[str, str], path_prefixes: List[tuple]) -> str:
+    if not isinstance(value, str) or not value:
+        return value
+    abs_value = os.path.abspath(value)
+    mapped = path_map.get(abs_value)
+    if mapped:
+        return mapped
+    for old_prefix, new_prefix in path_prefixes:
+        old_abs = os.path.abspath(old_prefix)
+        new_abs = os.path.abspath(new_prefix)
+        if abs_value == old_abs:
+            return new_abs
+        if abs_value.startswith(old_abs + os.sep):
+            return new_abs + abs_value[len(old_abs):]
+    return value
+
+
 class VectorRAG:
    """RAG system using ChromaDB vector storage with hybrid search."""

@@ -250,6 +267,75 @@ class VectorRAG:
            "failed_count": len(docs) - len(valid),
        }

+    def rename_owner(
+        self,
+        old_owner: str,
+        new_owner: str,
+        *,
+        path_map: Optional[Dict[str, str]] = None,
+        path_prefixes: Optional[List[tuple]] = None,
+    ) -> Dict[str, Any]:
+        """Rewrite existing RAG metadata after an auth username rename."""
+        if not self.healthy:
+            return {"success": False, "updated_count": 0, "message": "Collection not initialized"}
+
+        old_owner = (old_owner or "").strip().lower()
+        new_owner = (new_owner or "").strip().lower()
+        if not old_owner or not new_owner or old_owner == new_owner:
+            return {"success": True, "updated_count": 0, "message": "No owner rename needed"}
+
+        path_map = {os.path.abspath(k): os.path.abspath(v) for k, v in (path_map or {}).items()}
+        path_prefixes = path_prefixes or []
+        updated_ids = set()
+        failed_count = 0
+
+        for lane_name, collection in self._collections_for_delete():
+            try:
+                results = collection.get(
+                    where={"owner": old_owner},
+                    include=["metadatas"],
+                )
+            except Exception as e:
+                logger.warning("rename_owner metadata scan failed in %s lane: %s", lane_name, e)
+                failed_count += 1
+                continue
+
+            ids = results.get("ids") or []
+            metadatas = results.get("metadatas") or []
+            if not ids:
+                continue
+
+            new_metas = []
+            selected_ids = []
+            for doc_id, meta in zip(ids, metadatas):
+                if not isinstance(meta, dict):
+                    continue
+                next_meta = dict(meta)
+                if str(next_meta.get("owner", "")).strip().lower() == old_owner:
+                    next_meta["owner"] = new_owner
+                for key in ("source", "directory"):
+                    next_meta[key] = _rewrite_owner_path(next_meta.get(key), path_map, path_prefixes)
+                selected_ids.append(doc_id)
+                new_metas.append(next_meta)
+
+            if not selected_ids:
+                continue
+
+            try:
+                collection.update(ids=selected_ids, metadatas=new_metas)
+                updated_ids.update(selected_ids)
+            except Exception as e:
+                logger.warning("rename_owner metadata update failed in %s lane: %s", lane_name, e)
+                failed_count += len(selected_ids)
+
+        success = failed_count == 0
+        return {
+            "success": success,
+            "updated_count": len(updated_ids),
+            "failed_count": failed_count,
+            "message": f"Updated {len(updated_ids)} RAG chunk(s)",
+        }
+
    # ------------------------------------------------------------------
    # Search — hybrid: vector similarity + keyword overlap
    # ------------------------------------------------------------------
@@ -0,0 +1,30 @@
+"""Helpers for resolving runtime paths in source and frozen builds."""
+
+import os
+import sys
+
+
+def get_app_root() -> str:
+    """Return the app root directory.
+
+    In normal source runs, this is the repository root. In a frozen Windows
+    build, it is the bundle content root (PyInstaller's internal directory)
+    so bundled runtime folders like `static/`, `scripts/`, and `data/` stay
+    together with the executable payload.
+    """
+    if getattr(sys, "frozen", False):
+        return getattr(sys, "_MEIPASS", os.path.dirname(os.path.abspath(sys.executable)))
+    return os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+
+def get_default_data_dir() -> str:
+    """Return the default path to the data directory.
+
+    In normal runs, this is a 'data' subdirectory under the app root.
+    In frozen builds, it is a persistent user directory (~/.odysseus/data)
+    to prevent SQLite databases and other persistent files from being
+    written to the ephemeral, temporary extraction bundle directory.
+    """
+    if getattr(sys, "frozen", False):
+        return os.path.join(os.path.expanduser("~"), ".odysseus", "data")
+    return os.path.join(get_app_root(), "data")
@@ -9,6 +9,8 @@ import uuid
 from datetime import datetime, timedelta, timezone
 from typing import Any, Awaitable, Callable, Dict, Tuple

+from core.auth import RESERVED_USERNAMES
+
 logger = logging.getLogger(__name__)


@@ -17,6 +19,34 @@ def _utcnow() -> datetime:
    return datetime.now(timezone.utc).replace(tzinfo=None)


+# Shell/file tools a scheduled task's agent should be offered by default,
+# mirroring the chat agent (where these are on unless a privilege or global
+# setting turns them off). The RAG tool selector + ASSISTANT_ALWAYS_AVAILABLE
+# never include bash/python, so on a host with an empty/degraded tool-embedding
+# index a task could not run shell or Python even for an admin owner. Offering
+# them here is safe: stream_agent_loop's blocked_tools_for_owner() still strips
+# this whole group for non-admin multi-user owners, and only admits it for
+# admins and single-user (AUTH_ENABLED=false) deployments.
+TASK_DEFAULT_SHELL_TOOLS = frozenset({
+    "bash", "python", "read_file", "write_file", "edit_file",
+    "grep", "glob", "ls", "get_workspace",
+})
+
+
+def compose_task_relevant_tools(rag_tools, assistant_always, disabled_tools):
+    """Compose the relevant-tools set offered to a scheduled task's agent.
+
+    Unions the RAG-retrieved tools, the assistant's always-available set, and
+    the default shell/file group, then removes anything the task's crew
+    explicitly disabled via its `enabled_tools` allowlist. Per-owner admin
+    gating is applied later by stream_agent_loop (blocked_tools_for_owner).
+    """
+    tools = set(rag_tools) | set(assistant_always) | set(TASK_DEFAULT_SHELL_TOOLS)
+    if disabled_tools:
+        tools -= set(disabled_tools)
+    return tools
+
+
 # ── Shared TTL cache (singleflight) ────────────────────────────────────────
 # Multiple scheduled tasks firing in the same minute often need the same
 # external data (Miniflux unreads, MCP tool snapshots, etc.). This cache
@@ -236,6 +266,29 @@ def _digest_windows(now):
    ]


+def _checkin_calendar_events(db, owner, start, end):
+    """Calendar events in [start, end] for ONE owner, for the check-in digest.
+
+    Ownership lives on CalendarCal.owner; events inherit it via calendar_id.
+    The digest query had no owner scope, so it pulled EVERY user's events into
+    one user's check-in (a cross-tenant leak of summaries/locations). Scope it
+    by joining CalendarCal, mirroring routes/calendar_routes.list_events.
+    """
+    from core.database import CalendarEvent as _CE, CalendarCal as _CC
+    return (
+        db.query(_CE)
+        .join(_CC, _CE.calendar_id == _CC.id)
+        .filter(
+            _CC.owner == owner,
+            _CE.dtstart >= start,
+            _CE.dtstart <= end,
+            _CE.status != "cancelled",
+        )
+        .order_by(_CE.dtstart)
+        .all()
+    )
+
+
 class TaskScheduler:
    def __init__(self, session_manager):
        self._session_manager = session_manager
@@ -1135,11 +1188,7 @@ class TaskScheduler:
                    # Strip timezone for naive DB comparison
                    _s = start.replace(tzinfo=None) if start.tzinfo else start
                    _e = end.replace(tzinfo=None) if end.tzinfo else end
-                    evs = _db.query(_CE).filter(
-                        _CE.dtstart >= _s,
-                        _CE.dtstart <= _e,
-                        _CE.status != "cancelled",
-                    ).order_by(_CE.dtstart).all()
+                    evs = _checkin_calendar_events(_db, task.owner, _s, _e)
                    if not evs:
                        continue
                    # Group by importance for richer output
@@ -1378,17 +1427,30 @@ class TaskScheduler:
            time_str = _utcnow().strftime("%A, %B %d %Y, %H:%M UTC")
        system_prompt = f"Current time: {time_str}\n\n{system_prompt}"

-        # Compute tool filter from CrewMember.enabled_tools if set
-        disabled_tools = None
+        # Compute the disabled-tools set: the crew's enabled_tools allowlist
+        # (inverted) plus the operator's global disabled_tools setting. The
+        # global list must be merged here — chat does the same merge before
+        # entering the agent loop (routes/chat_routes.py) — otherwise an admin
+        # or AUTH_ENABLED=false scheduled task would still see and call shell/
+        # file tools after the operator disabled them globally, because the
+        # prompt/schema/execution gates only enforce what is passed in.
+        disabled_tools: set[str] = set()
        if crew and crew.enabled_tools:
            try:
                enabled = json.loads(crew.enabled_tools)
                if isinstance(enabled, list) and enabled:
                    from src.tool_index import BUILTIN_TOOL_DESCRIPTIONS
                    all_tools = set(BUILTIN_TOOL_DESCRIPTIONS.keys())
-                    disabled_tools = all_tools - set(enabled)
+                    disabled_tools |= all_tools - set(enabled)
            except Exception:
                pass
+        try:
+            from src.settings import get_setting
+            _global_disabled = get_setting("disabled_tools", [])
+            if isinstance(_global_disabled, list):
+                disabled_tools.update(_global_disabled)
+        except Exception:
+            pass

        # RAG-select relevant tools for this prompt + always-available assistant tools.
        # Without this, all 40+ tools get sent and models hit their tool limit.
@@ -1398,10 +1460,10 @@ class TaskScheduler:
            tool_idx = get_tool_index()
            if tool_idx:
                rag_tools = tool_idx.get_tools_for_query(task.prompt or "", k=8)
-                relevant_tools = (rag_tools | ASSISTANT_ALWAYS_AVAILABLE)
-                if disabled_tools:
-                    relevant_tools -= disabled_tools
-                logger.info(f"[assistant] RAG selected {len(rag_tools)} tools + {len(ASSISTANT_ALWAYS_AVAILABLE)} always-available = {len(relevant_tools)} total for '{task.name}'")
+                relevant_tools = compose_task_relevant_tools(
+                    rag_tools, ASSISTANT_ALWAYS_AVAILABLE, disabled_tools
+                )
+                logger.info(f"[assistant] RAG selected {len(rag_tools)} tools + {len(ASSISTANT_ALWAYS_AVAILABLE)} always-available + shell/file defaults = {len(relevant_tools)} total for '{task.name}'")
        except Exception as e:
            logger.warning(f"[assistant] RAG tool selection failed, using all: {e}")

@@ -1409,7 +1471,7 @@ class TaskScheduler:
        try:
            result = await self._run_agent_loop(
                endpoint_url, model, task, session_id,
-                system_prompt=system_prompt, disabled_tools=disabled_tools,
+                system_prompt=system_prompt, disabled_tools=disabled_tools or None,
                relevant_tools=relevant_tools,
            )
        except Exception as e:
@@ -2221,7 +2283,7 @@ class TaskScheduler:
        # check-ins seeded, which then double-fire alongside the human user's
        # check-ins. This was the root cause of the duplicate 'Morning check-in'
        # rows we had to manually clean up.
-        if not owner or owner in {"internal-tool", "api", "demo", "system"}:
+        if not owner or owner in RESERVED_USERNAMES:
            logger.info(f"ensure_assistant_defaults: skip synthetic owner {owner!r}")
            return
        from core.database import SessionLocal, CrewMember, ScheduledTask
@@ -323,6 +323,24 @@ _MCP_TOOL_MAP = {
    "web_fetch":      ("web_fetch",  "web_fetch"),
    "generate_image": ("image_gen",  "generate_image"),
 }
+_EMAIL_MCP_OWNER_ARG = "_odysseus_owner"
+
+
+def _parse_qualified_mcp_args(tool: str, content: str) -> tuple[Dict, Optional[str]]:
+    raw = (content or "").strip()
+    if not raw:
+        return {}, None
+    try:
+        parsed = json.loads(raw)
+    except (json.JSONDecodeError, TypeError):
+        if tool.startswith("mcp__email__"):
+            return {}, "Email MCP tool arguments must be a JSON object."
+        return {}, None
+    if not isinstance(parsed, dict):
+        if tool.startswith("mcp__email__"):
+            return {}, "Email MCP tool arguments must be a JSON object."
+        return {}, None
+    return parsed, None


 def _parse_generate_image(content: str) -> Dict:
@@ -453,6 +471,8 @@ async def _direct_fallback(
    tool: str,
    content: str,
    progress_cb: Optional[Callable[[Dict], Awaitable[None]]] = None,
+    session_id: Optional[str] = None,
+    owner: Optional[str] = None,
 ) -> Optional[Dict]:
    _subproc_env = {
        **os.environ,
@@ -466,6 +486,8 @@ async def _direct_fallback(
        ctx = {
            "progress_cb": progress_cb,
            "subproc_env": _subproc_env,
+            "session_id": session_id,
+            "owner": owner,
        }

        from src.agent_tools import TOOL_HANDLERS
@@ -713,10 +735,13 @@ async def _execute_tool_block_impl(
            desc = f"bash (background): {short}"
            result = {
                "output": (
-                    f"Started background job `{rec['id']}`. It is running detached — "
+                    f"Started background job `{rec['id']}`. It is running detached; "
                    f"do NOT wait for it or poll it. You will be automatically re-invoked "
                    f"with its full output when it finishes. Continue with other work, or "
-                    f"end your turn now and resume when the result arrives."
+                    f"end your turn now and resume when the result arrives. If the user "
+                    f"later asks to check progress or stop it, call the manage_bg_jobs "
+                    f"tool yourself (output or kill); do not tell them to run a tool "
+                    f"command, and do not surface raw tool syntax in your reply."
                ),
                "exit_code": 0,
                "bg_job_id": rec["id"],
@@ -737,6 +762,11 @@ async def _execute_tool_block_impl(
        desc = f"{tool}: {first_line}"
        result = await _direct_fallback(tool, content, progress_cb=progress_cb) \
            or {"error": f"{tool}: execution failed", "exit_code": 1}
+    elif tool == "manage_bg_jobs":
+        # Inspect/kill detached `bash` jobs; needs session_id to scope to chat.
+        desc = f"manage_bg_jobs: {content.split(chr(10))[0][:80]}"
+        result = await _direct_fallback(tool, content, session_id=session_id, owner=owner) \
+            or {"error": "manage_bg_jobs: execution failed", "exit_code": 1}
    elif tool in ("create_document", "update_document", "edit_document",
                  "suggest_document", "manage_documents"):
        desc = f"{tool}: {content.split(chr(10))[0][:80]}"
@@ -748,10 +778,24 @@ async def _execute_tool_block_impl(
        query = content.split("\n")[0].strip()
        desc = f"search_chats: {query[:80]}"
        result = await do_search_chats(query, owner=owner)
-    elif tool in ("chat_with_model", "create_session", "list_sessions",
-                  "send_to_session", "pipeline",
-                  "manage_session", "manage_memory", "list_models",
-                  "ui_control", "ask_teacher"):
+    elif tool in ("chat_with_model", "ask_teacher", "list_models"):
+        # Migrated to the agent_tools registry (#3629): dispatched through
+        # TOOL_HANDLERS with the owner/session ctx these tools need, instead
+        # of the legacy dispatch_ai_tool elif. The impls live in
+        # src/agent_tools/model_interaction_tools.py.
+        first_line = content.split(chr(10))[0].strip()[:60]
+        desc = f"{tool}: {first_line}" if first_line else tool
+        result = await _document_tool_dispatch(tool, content, session_id, owner) \
+            or {"error": f"{tool}: execution failed", "exit_code": 1}
+    elif tool in ("create_session", "list_sessions", "send_to_session", "manage_session"):
+        # Migrated to the agent_tools registry (#3629): dispatched through
+        # TOOL_HANDLERS with the owner/session ctx these tools need. The impls
+        # live in src/agent_tools/session_tools.py.
+        first_line = content.split(chr(10))[0].strip()[:60]
+        desc = f"{tool}: {first_line}" if first_line else tool
+        result = await _document_tool_dispatch(tool, content, session_id, owner) \
+            or {"error": f"{tool}: execution failed", "exit_code": 1}
+    elif tool in ("pipeline", "manage_memory", "ui_control"):
        from src.ai_interaction import dispatch_ai_tool
        desc, result = await dispatch_ai_tool(tool, content, session_id, owner=owner)
    elif tool == "manage_tasks":
@@ -858,12 +902,15 @@ async def _execute_tool_block_impl(
        # MCP tool dispatch
        mcp = get_mcp_manager()
        if mcp:
-            try:
-                args = json.loads(content) if content.strip().startswith("{") else {}
-            except (json.JSONDecodeError, TypeError):
-                args = {}
            desc = f"mcp: {tool}"
-            result = await mcp.call_tool(tool, args)
+            args, parse_error = _parse_qualified_mcp_args(tool, content)
+            if parse_error:
+                result = {"error": parse_error, "exit_code": 1}
+            else:
+                if tool.startswith("mcp__email__") and owner:
+                    args = dict(args)
+                    args[_EMAIL_MCP_OWNER_ARG] = owner
+                result = await mcp.call_tool(tool, args)
        else:
            desc = f"mcp: {tool}"
            result = {"error": "MCP manager not available", "exit_code": 1}
@@ -12,12 +12,24 @@ import os
 import re
 from typing import Any, Dict, List, Optional

+from fastapi import HTTPException
 from src.constants import MAX_READ_CHARS, DEEP_RESEARCH_DIR, VAULT_FILE
 from src.tool_utils import get_mcp_manager
 from core.constants import internal_api_base
+from routes._validators import validate_remote_host, validate_ssh_port

 logger = logging.getLogger(__name__)

+
+def _string_arg(value: Any) -> str:
+    return "" if value is None else str(value).strip()
+
+
+def _validate_cookbook_ssh_target(remote_host: Any, ssh_port: Any = "") -> tuple[str, str]:
+    remote = validate_remote_host(_string_arg(remote_host) or None) or ""
+    sport = validate_ssh_port(_string_arg(ssh_port) or None) or ""
+    return remote, sport
+
 # ---------------------------------------------------------------------------
 # Active email state
 # ---------------------------------------------------------------------------
@@ -645,6 +657,137 @@ async def do_manage_endpoints(content: str, owner: Optional[str] = None) -> Dict
 # MCP server management tool
 # ---------------------------------------------------------------------------

+# Parallel to routes/cookbook_helpers._validate_serve_cmd but deliberately the
+# opposite policy: that gate guards an admin-only serve command and allows
+# interpreters (python3/etc) because model-serving needs them, whereas this is
+# the model/prompt-injection-reachable manage_mcp path, so interpreters and
+# runners are denied here.
+#
+# Commands that can execute arbitrary code regardless of their arguments. These
+# are NEVER accepted on the manage_mcp agent path, even if an operator lists one
+# in ODYSSEUS_MCP_ALLOWED_COMMANDS -- a stdio server that genuinely needs an
+# interpreter or package runner must be registered via the trusted admin route.
+_MCP_DENIED_COMMANDS = frozenset({
+    "sh", "bash", "zsh", "fish", "dash", "ksh", "csh", "tcsh", "ash", "busybox",
+    "cmd", "command.com", "powershell", "pwsh",
+    "python", "pypy", "node", "nodejs", "deno", "bun", "ruby", "jruby",
+    "perl", "raku", "php", "lua", "luajit", "tclsh", "wish", "expect", "rscript",
+    "groovy", "scala", "elixir", "erl", "iex", "java", "javac", "jshell", "jbang",
+    "kotlin", "kotlinc", "dotnet", "mono", "swift", "osascript", "tsx", "ts-node",
+    "npx", "bunx", "uvx", "pipx", "npm", "pnpm", "yarn", "pip", "uv",
+    "gem", "cargo", "go", "bundle", "poetry", "conda", "mamba", "brew",
+    "apt", "apt-get", "yum", "dnf", "pacman", "apk",
+    "env", "xargs", "nohup", "setsid", "nice", "ionice", "time", "timeout",
+    "watch", "stdbuf", "unbuffer", "script", "ssh", "scp", "sshpass", "sudo",
+    "doas", "su", "make", "cmake", "docker", "podman", "kubectl", "find",
+    "awk", "gawk", "sed", "vi", "vim", "nvim", "emacs", "ed", "tee", "eval",
+})
+
+# Argv flags that make even an allowlisted binary execute inline code. Matched
+# by prefix so glued forms (-cimport os, --eval=...) are caught, not just the
+# exact-token form.
+_MCP_CODE_EXEC_SHORT_FLAGS = ("-c", "-e", "-m")
+_MCP_CODE_EXEC_LONG_FLAGS = ("--eval", "--exec", "--print", "--module", "--command", "--require")
+
+_MCP_URL_SCHEMES = ("http://", "https://", "ftp://", "ftps://", "file://", "data:", "jar:", "blob:")
+
+# Shell metacharacters refused in command/args. Args are passed as an argv list
+# (no shell), but refusing these keeps the surface narrow and obvious.
+_MCP_SHELL_METACHARS = set(";|&$`><\n\r")
+
+# Env vars that let a child process load attacker-supplied code before main().
+_MCP_DANGEROUS_ENV = frozenset({
+    "LD_PRELOAD", "LD_LIBRARY_PATH", "LD_AUDIT", "DYLD_INSERT_LIBRARIES",
+    "DYLD_LIBRARY_PATH", "DYLD_FRAMEWORK_PATH", "PYTHONPATH", "PYTHONSTARTUP",
+    "PYTHONHOME", "PYTHONEXECUTABLE", "NODE_OPTIONS", "NODE_PATH", "BASH_ENV",
+    "ENV", "SHELLOPTS", "PERL5LIB", "PERL5OPT", "RUBYOPT", "RUBYLIB", "GEM_PATH",
+    "R_PROFILE", "R_HOME", "PATH", "IFS", "PROMPT_COMMAND",
+})
+
+
+def _mcp_allowed_commands() -> set:
+    """Operator-configured allowlist of safe MCP launcher basenames for the agent
+    path. Empty by default; set ODYSSEUS_MCP_ALLOWED_COMMANDS (comma-separated)
+    to opt specific trusted binaries in. Denied commands are rejected even if
+    listed here."""
+    raw = os.environ.get("ODYSSEUS_MCP_ALLOWED_COMMANDS", "")
+    return {c.strip().lower() for c in raw.split(",") if c.strip()}
+
+
+def _validate_mcp_command(command, args, env) -> Optional[str]:
+    """Validate a model-supplied stdio MCP registration. Returns an error string
+    if it must be rejected, else None.
+
+    Closes the RCE where manage_mcp 'add' passed prompt-injection-controlled
+    command/args/env straight to a subprocess spawn (issue #438): a payload
+    smuggled into a skill description, memory entry, fetched page, or email body
+    could register a stdio server running arbitrary code as the app UID.
+    """
+    if not isinstance(command, str) or not command.strip():
+        return "command must be a non-empty string"
+    command = command.strip()
+    if "/" in command or "\\" in command:
+        return "command must be a bare executable name, not a path"
+    if any(ch in _MCP_SHELL_METACHARS for ch in command):
+        return "command contains shell metacharacters"
+    base = command.lower()
+    if base.endswith(".exe") or base.endswith(".cmd") or base.endswith(".bat"):
+        base = base.rsplit(".", 1)[0]
+    # Canonicalize a trailing version suffix so versioned aliases collapse to the
+    # family name (python3.11 -> python, node18 -> node, pip3 -> pip); both the
+    # raw basename and the canonical form are denied, so an operator cannot
+    # accidentally allowlist a runtime alias back into the path.
+    canon = re.sub(r"[-_.]?\d+(?:\.\d+)*$", "", base)
+    if base in _MCP_DENIED_COMMANDS or canon in _MCP_DENIED_COMMANDS:
+        return (
+            f"command '{command}' is not allowed on the agent MCP path: "
+            "interpreters, runtimes, package runners, and shells can execute "
+            "arbitrary code. Register such a server via the admin route instead."
+        )
+    if base not in _mcp_allowed_commands():
+        return (
+            f"command '{command}' is not in the MCP allowlist. Add it to "
+            "ODYSSEUS_MCP_ALLOWED_COMMANDS if you trust it, or register the "
+            "server via the admin route."
+        )
+
+    if args is not None:
+        if isinstance(args, str):
+            try:
+                args = json.loads(args)
+            except Exception:
+                return "args must be a JSON list"
+        if not isinstance(args, list):
+            return "args must be a list"
+        for a in args:
+            if not isinstance(a, str):
+                return "args must all be strings"
+            s = a.strip()
+            low = s.lower()
+            if any(s == f or s.startswith(f) for f in _MCP_CODE_EXEC_SHORT_FLAGS):
+                return f"arg '{a}' is a code-execution flag and is not allowed"
+            if any(low == f or low.startswith(f + "=") for f in _MCP_CODE_EXEC_LONG_FLAGS):
+                return f"arg '{a}' is a code-execution flag and is not allowed"
+            if any(low.startswith(u) for u in _MCP_URL_SCHEMES):
+                return f"arg '{a}' is a remote URL and is not allowed"
+            if any(ch in _MCP_SHELL_METACHARS for ch in a):
+                return f"arg '{a}' contains shell metacharacters"
+
+    if env:
+        if isinstance(env, str):
+            try:
+                env = json.loads(env)
+            except Exception:
+                return "env must be a JSON object"
+        if not isinstance(env, dict):
+            return "env must be an object"
+        for k in env:
+            if str(k).strip().upper() in _MCP_DANGEROUS_ENV:
+                return f"env var '{k}' can inject code into the child process and is not allowed"
+
+    return None
+
+
 async def do_manage_mcp(content: str, owner: Optional[str] = None) -> Dict:
    """Manage MCP servers: list, add, delete, enable, disable, reconnect."""
    try:
@@ -684,6 +827,12 @@ async def do_manage_mcp(content: str, owner: Optional[str] = None) -> Dict:
        env = args.get("env", {})
        if not name or not command:
            return {"error": "name and command are required", "exit_code": 1}
+        # Validate BEFORE any DB write or spawn: a rejected registration must
+        # leave no enabled row (which would otherwise auto-reconnect on restart)
+        # and must not attempt a connection.
+        _mcp_err = _validate_mcp_command(command, cmd_args, env)
+        if _mcp_err:
+            return {"error": f"manage_mcp: refused unsafe server registration: {_mcp_err}", "exit_code": 1}
        sid = str(_uuid.uuid4())[:8]
        db = SessionLocal()
        try:
@@ -1579,10 +1728,10 @@ async def do_manage_calendar(content: str, owner: Optional[str] = None) -> Dict:
        text = str(raw).strip().lower()
        if text in {"none", "no", "off", "false"}:
            return None
-        m = re.search(r"(\d+)\s*(?:m|min|minute|minutes)\b", text)
+        m = re.search(r"(\d+)\s*(?:minutes?|mins?|m)\b", text)
        if m:
            return max(0, int(m.group(1)))
-        m = re.search(r"(\d+)\s*(?:h|hr|hour|hours)\b", text)
+        m = re.search(r"(\d+)\s*(?:hours?|hrs?|h)\b", text)
        if m:
            return max(0, int(m.group(1)) * 60)
        if text.isdigit():
@@ -1595,7 +1744,7 @@ async def do_manage_calendar(content: str, owner: Optional[str] = None) -> Dict:
            return desc
        reminder_only = re.compile(
            r"^\s*(?:remind(?:er)?|alarm)\s*:?\s*\d+\s*"
-            r"(?:m|min|minute|minutes|h|hr|hour|hours)\b.*$",
+            r"(?:minutes?|mins?|m|hours?|hrs?|h)\b.*$",
            re.I,
        )
        return "" if reminder_only.match(desc) else desc
@@ -2900,6 +3049,10 @@ async def _cookbook_kill_session(session_id: str, *, remote_host: str = "",
            break

    if remote:
+        try:
+            remote, sport = _validate_cookbook_ssh_target(remote, sport)
+        except HTTPException as e:
+            return {"error": str(getattr(e, "detail", e)), "exit_code": 1}
        _pf = f"-p {shlex.quote(str(sport))} " if sport and str(sport) != "22" else ""
        cmd = (
            f"ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "
@@ -2988,8 +3141,8 @@ async def do_tail_serve_output(content: str, owner: Optional[str] = None) -> Dic
        tail = 400
    tail = max(20, min(tail, 4000))
    headers = _internal_headers()
-    remote = (args.get("remote_host") or args.get("host") or "").strip()
-    sport = (args.get("ssh_port") or "").strip()
+    remote = _string_arg(args.get("remote_host") or args.get("host"))
+    sport = _string_arg(args.get("ssh_port"))
    # Resolve host from cookbook state if caller didn't pass one — same
    # lookup _cookbook_kill_session uses.
    if not remote:
@@ -3007,6 +3160,12 @@ async def do_tail_serve_output(content: str, owner: Optional[str] = None) -> Dic
                    if not sport:
                        sport = t.get("sshPort") or ""
                    break
+    if remote:
+        try:
+            remote, sport = _validate_cookbook_ssh_target(remote, sport)
+        except HTTPException as e:
+            return {"error": str(getattr(e, "detail", e)), "exit_code": 1}
+
    # Prefer the persisted /tmp/odysseus-tmux/SESSION.log file over the
    # live tmux pane. The pane is what the user would see scrolling on
    # their screen — including the post-crash neofetch banner and the
@@ -3193,7 +3352,7 @@ async def do_adopt_served_model(content: str, owner: Optional[str] = None) -> Di
    except ValueError:
        return {"error": "Invalid JSON arguments", "exit_code": 1}

-    host = (args.get("host") or args.get("remote_host") or "").strip()
+    host = _string_arg(args.get("host") or args.get("remote_host"))
    sess = (args.get("tmux_session") or args.get("session_id") or "").strip()
    model = (args.get("model") or args.get("repo_id") or "").strip()
    port = args.get("port") or 8000
@@ -3204,6 +3363,12 @@ async def do_adopt_served_model(content: str, owner: Optional[str] = None) -> Di
        return {"error": "tmux_session and model are required", "exit_code": 1}

    # Verify tmux session exists on the target host
+    if host:
+        try:
+            host, _ = _validate_cookbook_ssh_target(host)
+        except HTTPException as e:
+            return {"error": str(getattr(e, "detail", e)), "exit_code": 1}
+
    headers = _internal_headers()
    if host:
        check = f"ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no {shlex.quote(host)} 'tmux has-session -t {shlex.quote(sess)} 2>&1'"
@@ -3818,7 +3983,7 @@ async def do_resolve_contact(content: str, owner: Optional[str] = None) -> Dict:
    if not name:
        return {"error": "name is required", "exit_code": 1}

-    contacts = {}  # email -> {name, source}
+    contacts = {}  # email_or_phone -> {name, source, phone?}

    # 1. CardDAV (Radicale) — structured contacts. Call in-process: a
    # server-side httpx GET to /api/contacts/search carries no session
@@ -3833,10 +3998,18 @@ async def do_resolve_contact(content: str, owner: Optional[str] = None) -> Dict:
            match = q in hay_name or any(q in (e or "").lower() for e in c.get("emails", []))
            if not match:
                continue
+            has_email = False
            for email in (c.get("emails") or []):
                email = (email or "").strip().lower()
                if email and "@" in email:
                    contacts[email] = {"name": c.get("name") or email, "source": "contacts"}
+                    has_email = True
+            # Fall back to phone numbers when the contact has no email address
+            if not has_email:
+                for phone in (c.get("phones") or []):
+                    phone = (phone or "").strip()
+                    if phone:
+                        contacts[phone] = {"name": c.get("name") or phone, "source": "contacts", "phone": phone}
    except Exception:
        pass

@@ -3856,8 +4029,11 @@ async def do_resolve_contact(content: str, owner: Optional[str] = None) -> Dict:
        return {"output": f"No contacts found matching '{name}'.", "exit_code": 0}

    lines = [f"Contacts matching '{name}':"]
-    for email, info in contacts.items():
-        lines.append(f"- {info['name']} <{email}> ({info['source']})")
+    for key, info in contacts.items():
+        if info.get("phone"):
+            lines.append(f"- {info['name']} — phone: {info['phone']} ({info['source']})")
+        else:
+            lines.append(f"- {info['name']} <{key}> ({info['source']})")
    return {"output": "\n".join(lines), "exit_code": 0}


@@ -94,6 +94,7 @@ BUILTIN_TOOL_DESCRIPTIONS: Dict[str, str] = {
    "manage_endpoints": "Endpoint management: list, add, delete, enable, or disable model API endpoints.",
    "manage_mcp": "MCP server management: list, add, delete, reconnect servers, or list available tools.",
    "manage_webhooks": "Webhook management: list, add, delete, enable, or disable webhooks.",
+    "api_call": "Call a configured API integration by name (Home Assistant, Miniflux, Gitea, Linkding, Jellyfin, RSS reader, git forge, bookmark manager, smart home, or any other registered service). Make a GET/POST/PUT/PATCH/DELETE request to the integration's endpoint path, with an optional JSON body. Use whenever the user asks to query or control one of their connected integrations/services.",
    "manage_tokens": "API token management: list, create, or delete API access tokens.",
    "manage_documents": "List, read, delete, or tidy documents in the editor panel. action='list' returns clickable rows (most-recent first) so the user can open any doc by clicking. action='read' (aka view/open/get) with document_id returns the content; supports offset=<N> + limit=<N> to page through large docs (response includes next_offset when more remains, so you can keep calling with offset=next_offset). action='delete' with document_id removes a doc (only way to delete). Use this for ANY 'show/read/list/open my documents/docs/files/notes' request — never shell or curl.",
    "manage_research": "List, read/open, or delete saved DEEP RESEARCH results from the Library. action='list' returns clickable [query](#research-<id>) rows (most-recent first). action='read' (aka open/view/get) with id returns the report + sources. action='delete' with id removes it. Use this for ANY 'open/read/find/delete my research / that report / the research on X' request. NOTE: this is for EXISTING research; to START new research use trigger_research.",
@@ -134,6 +135,7 @@ BUILTIN_TOOL_DESCRIPTIONS: Dict[str, str] = {
    "app_api": "Generic loopback to allowed Odysseus internal endpoints. Use this when the user wants something the UI can do but there's no named tool for it. Covers calendar, gallery, library/documents, memory, notes, tasks, settings, research, compare, cookbook GPUs/state — allowed UI buttons hit /api/* endpoints and you can hit them too. Sensitive auth/user/admin/shell paths and host-control Cookbook mutation routes are blocked; do NOT use app_api for shell commands, package installs, engine rebuilds, or PID signalling. Use named command tooling for shell commands. action='endpoints' with filter=<keyword> lists available endpoints. action='call' takes method+path+body. Hits same routes the UI uses — auth flows free. NOTE: themes are NOT an API endpoint — use the ui_control tool (create_theme / set_theme), not app_api. SESSIONS/CHATS: do NOT use app_api for these — GET /api/sessions returns EMPTY for tool calls (it's owner-filtered and tool calls authenticate as a different identity). EMAIL ACCOUNTS: do NOT use /api/email/accounts via app_api; use list_email_accounts, list_emails, and read_email instead. To list/rename/archive/delete/fork chats use the list_sessions and manage_session tools instead.",
    "edit_image": "Edit an image in the gallery: upscale (increase resolution), remove background (rembg), inpaint (fill selected area), or harmonize (blend edits). Specify image ID and action.",
    "trigger_research": "Start a deep research job on any topic — appears in the Deep Research sidebar, streams progress, produces a detailed report. Use for 'research X', 'look into Y', 'do deep research on Z', 'investigate'. NOT a scheduled task — it runs now and surfaces in the sidebar.",
+    "manage_bg_jobs": "Inspect and control detached background `bash` jobs (the ones started with a `#!bg` marker). action='list' shows this chat's jobs (id/status/age/command); action='output' returns a job's captured output so far (check on a long-running job, or re-read a finished one); action='kill' stops a runaway job by id. Use for 'is the background job done', 'check on that job', 'show the build output', 'kill the background job', 'stop the bg task'. output/kill need a job_id from list.",
 }


@@ -348,6 +350,12 @@ class ToolIndex:
            {"list_email_accounts", "list_emails", "read_email", "send_email", "reply_to_email", "bulk_email", "delete_email", "archive_email", "mark_email_read", "resolve_contact", "ui_control"},
        frozenset({"calendar", "event", "meeting", "schedule", "appointment"}):
            {"manage_calendar"},
+        # Detached background `bash` jobs (#!bg): check on / read output / kill.
+        frozenset({"background job", "background jobs", "bg job", "bg jobs",
+                   "background task", "is the job done", "check the job",
+                   "check on that job", "job output", "kill the job",
+                   "kill the background", "stop the background", "running job"}):
+            {"manage_bg_jobs"},
        frozenset({"note", "todo", "reminder", "remind", "checklist", "remember to"}):
            {"manage_notes"},
        # Chat/session management. "rename" alone maps to documents below, so a
@@ -414,6 +422,14 @@ class ToolIndex:
                   "my settings", "change setting", "change a setting", "set setting",
                   "preference", "preferences", "configure"}):
            {"manage_settings", "ui_control"},
+        # API-integration intent → the api_call tool. Mirrors the agent-loop
+        # "integrations" domain so api_call still surfaces on the retrieval and
+        # keyword-fallback paths (not just the deterministic domain seed) when a
+        # user names a connected service.
+        frozenset({"api_call", "api call", "integration", "integrations",
+                   "home assistant", "homeassistant", "miniflux", "gitea",
+                   "linkding", "jellyfin"}):
+            {"api_call"},
        # Managing EXISTING research in the Library — open/read/find/delete.
        frozenset({"my research", "the research", "research on", "open research",
                   "read research", "find research", "delete research",
@@ -175,6 +175,9 @@ _TOOL_NAME_MAP = {
    "notes": "manage_notes",
    "todo": "manage_notes",
    "todos": "manage_notes",
+    "manage_bg_jobs": "manage_bg_jobs",
+    "bg_jobs": "manage_bg_jobs",
+    "background_jobs": "manage_bg_jobs",
 }

 _MISFENCED_WEB_TOOL_NAMES = {
@@ -68,11 +68,12 @@ FUNCTION_TOOL_SCHEMAS = [
        "type": "function",
        "function": {
            "name": "web_fetch",
-            "description": "Fetch and read the text content of a specific URL the user names (e.g. 'check example.com', 'what's on this page <url>'). Use when you already have a concrete URL/domain. NOT for open-ended searches (use web_search) or 'research X' jobs (use trigger_research).",
+            "description": "Fetch and read the text content of a specific URL the user names (e.g. 'check example.com', 'what's on this page <url>'). Use when you already have a concrete URL/domain. NOT for open-ended searches (use web_search) or 'research X' jobs (use trigger_research). Downloads are size-budgeted; a '[partial content: ...]' notice in the result means the body was cut short and you can re-call with full=true for the rest.",
            "parameters": {
                "type": "object",
                "properties": {
-                    "url": {"type": "string", "description": "The URL or domain to fetch (http/https; a bare domain like example.com is fine)"}
+                    "url": {"type": "string", "description": "The URL or domain to fetch (http/https; a bare domain like example.com is fine)"},
+                    "full": {"type": "boolean", "description": "Raise the download budget to the hard cap for large pages/files. Use only after a result reported partial content."}
                },
                "required": ["url"]
            }
@@ -1008,7 +1009,7 @@ FUNCTION_TOOL_SCHEMAS = [
        "type": "function",
        "function": {
            "name": "resolve_contact",
-            "description": "Look up a contact's email address by name. Searches CardDAV address book and sent email history. Use when the user says 'message [name]' or 'email [name]' without an email address.",
+            "description": "Look up a contact by name. Searches CardDAV address book and sent email history. Returns email addresses (when available) or phone numbers. Use when the user says 'message [name]', 'email [name]', or asks for someone's contact details.",
            "parameters": {
                "type": "object",
                "properties": {
@@ -1187,6 +1188,21 @@ FUNCTION_TOOL_SCHEMAS = [
            }
        }
    },
+    {
+        "type": "function",
+        "function": {
+            "name": "manage_bg_jobs",
+            "description": "Inspect and control detached background `bash` jobs (started with the `#!bg` marker). action='list' shows this chat's jobs with id/status/age/command; action='output' returns a job's captured output so far (use for a still-running job, or to re-read a finished one); action='kill' terminates a runaway job's process tree instead of waiting out its max-runtime. output and kill need job_id from list.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "action": {"type": "string", "enum": ["list", "output", "kill"], "description": "list | output | kill (default: list)"},
+                    "job_id": {"type": "string", "description": "Background job id (required for output/kill; from action='list')"},
+                },
+                "required": ["action"]
+            }
+        }
+    },
 ]


@@ -1205,23 +1221,26 @@ def function_call_to_tool_block(name: str, arguments: str) -> Optional[ToolBlock
        logger.error(f"Failed to parse function call arguments for {name}: {arguments}")
        return None

+    tool_type = _TOOL_NAME_MAP.get(name, name)
+    _BUILTIN_EMAIL_TOOLS = {"list_email_accounts", "send_email", "list_emails", "read_email", "reply_to_email",
+                            "archive_email", "delete_email", "mark_email_read", "bulk_email", "download_attachment"}
+
    # Some models emit valid JSON that isn't an object (e.g. a bare array
-    # ["ls -la"], string, or number) as the function arguments. Every branch
-    # below assumes a dict and calls args.get(...), so a non-dict would raise
-    # AttributeError and abort the whole agent stream. Coerce to {} instead.
+    # ["ls -la"], string, or number) as function arguments. Most local tools keep
+    # the legacy empty-object coercion for stream robustness, but email MCP tools
+    # must fail closed so a malformed call cannot read the default mailbox.
    if not isinstance(args, dict):
+        if tool_type.startswith("mcp__email__") or name in _BUILTIN_EMAIL_TOOLS:
+            logger.warning(f"Non-object email function call arguments for {name}: {args!r}; rejecting")
+            return None
        logger.warning(f"Non-object function call arguments for {name}: {args!r}; treating as empty")
        args = {}

-    tool_type = _TOOL_NAME_MAP.get(name, name)
-
    # Allow MCP tools through (namespaced as mcp__serverid__toolname)
    if tool_type.startswith("mcp__"):
        content = json.dumps(args) if args else "{}"
        return ToolBlock(tool_type, content)
    # Email tools are implemented as MCP — route them to email
-    _BUILTIN_EMAIL_TOOLS = {"list_email_accounts", "send_email", "list_emails", "read_email", "reply_to_email",
-                            "archive_email", "delete_email", "mark_email_read", "bulk_email", "download_attachment"}
    if name in _BUILTIN_EMAIL_TOOLS:
        return ToolBlock(f"mcp__email__{name}", json.dumps(args) if args else "{}")
    if tool_type not in TOOL_TAGS:
@@ -14,6 +14,7 @@ logger = logging.getLogger(__name__)
 NON_ADMIN_BLOCKED_TOOLS = {
    "bash",
    "python",
+    "manage_bg_jobs",
    "read_file",
    "write_file",
    "edit_file",
@@ -114,6 +115,8 @@ _PLAN_MODE_KNOWN_MUTATORS = {
    # Shell is never read-only-safe; block it explicitly so it stays out of plan
    # mode even if the schema list fails to load.
    "bash", "python",
+    # Controls shell processes (kill); plan mode can't run bash anyway.
+    "manage_bg_jobs",
 }