Merge remote-tracking branch 'origin/dev' into test-main-dev-merge-20260615

# Conflicts: # src/tool_implementations.py # static/js/research/panel.js
2026-06-30 00:22:10 -04:00 · 2026-06-15 21:20:15 +09:00
parent 2cbd55b8bd cd02ac7ef6
commit 6d507f8128
312 changed files with 20047 additions and 2952 deletions
@@ -0,0 +1,31 @@
+import re
+
+from fastapi import HTTPException
+
+
+_REMOTE_HOST_RE = re.compile(
+    r"^(?:[A-Za-z0-9][A-Za-z0-9._-]*@)?[A-Za-z0-9][A-Za-z0-9._-]*$"
+)
+_SSH_PORT_RE = re.compile(r"^\d{1,5}$")
+
+
+def validate_remote_host(v: str | None) -> str | None:
+    if v is None or v == "":
+        return None
+    if not _REMOTE_HOST_RE.match(v):
+        raise HTTPException(
+            400,
+            "Invalid remote_host — must be host or user@host, no SSH option syntax",
+        )
+    return v
+
+
+def validate_ssh_port(v: str | None) -> str | None:
+    if v is None or v == "":
+        return None
+    if not _SSH_PORT_RE.fullmatch(str(v)):
+        raise HTTPException(400, "Invalid ssh_port")
+    port = int(v)
+    if port < 1 or port > 65535:
+        raise HTTPException(400, "Invalid ssh_port")
+    return str(port)
@@ -68,6 +68,7 @@ def _normalize_scopes(scopes: str | list[str] | None = None, profile: str | None
    ensure_before("calendar:write", "calendar:read")
    ensure_before("memory:write", "memory:read")
    ensure_before("email:draft", "email:read")
+    ensure_before("cookbook:launch", "cookbook:read")

    return normalized or [DEFAULT_SCOPES]

@@ -154,6 +155,7 @@ def setup_api_token_routes() -> APIRouter:
    @router.patch("/tokens/{token_id}")
    async def update_token(request: Request, token_id: str):
        require_admin(request)
+        current_user = get_current_user(request)
        try:
            payload = await request.json()
        except Exception:
@@ -162,6 +164,8 @@ def setup_api_token_routes() -> APIRouter:
            token = db.query(ApiToken).filter(ApiToken.id == token_id).first()
            if not token:
                raise HTTPException(404, "Token not found")
+            if current_user and token.owner != current_user:
+                raise HTTPException(403, "Not your token")
            if isinstance(payload.get("name"), str) and payload["name"].strip():
                token.name = payload["name"].strip()[:MAX_NAME_LEN]
            # Only touch scopes when the caller actually sent them. A partial
@@ -189,10 +193,14 @@ def setup_api_token_routes() -> APIRouter:
    @router.delete("/tokens/{token_id}")
    def delete_token(request: Request, token_id: str):
        require_admin(request)
+        current_user = get_current_user(request)
        with get_db_session() as db:
-            deleted = db.query(ApiToken).filter(ApiToken.id == token_id).delete()
-            if not deleted:
+            token = db.query(ApiToken).filter(ApiToken.id == token_id).first()
+            if not token:
                raise HTTPException(404, "Token not found")
+            if current_user and token.owner != current_user:
+                raise HTTPException(403, "Not your token")
+            db.delete(token)
        _invalidate_cache(request)
        return {"status": "deleted"}

@@ -7,7 +7,13 @@ import asyncio
 import logging
 import os

-from core.auth import AuthManager
+import json
+import re
+from pathlib import Path
+
+from core.atomic_io import atomic_write_json, atomic_write_text
+from core.auth import AuthManager, SetAdminResult
+from src.constants import DEEP_RESEARCH_DIR, MEMORY_FILE, SKILLS_DIR
 from src.rate_limiter import RateLimiter
 from src.settings_scrub import scrub_settings
 from src.settings import (
@@ -67,6 +73,11 @@ class DeleteUserRequest(BaseModel):
 class RenameUserRequest(BaseModel):
    username: str

+
+class SetAdminRequest(BaseModel):
+    is_admin: bool
+
+
 class SetOpenRegistrationRequest(BaseModel):
    enabled: bool

@@ -291,9 +302,30 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
        if new_username in auth_manager.users:
            raise HTTPException(409, "Username already taken")

+        # Gate on auth first. Every mutation below is contingent on this
+        # succeeding — doing it last meant a rejected rename (e.g. reserved
+        # username) left file-backed owner fields already rewritten with no
+        # way to roll them back.
+        ok = auth_manager.rename_user(old_username, new_username, user)
+        if not ok:
+            raise HTTPException(400, "Cannot rename user")
+
+        def _rollback_auth_rename() -> bool:
+            # On self-rename the admin session has already moved to the new
+            # username, so the rollback must authenticate as the new user.
+            rollback_user = new_username if user == old_username else user
+            try:
+                return bool(auth_manager.rename_user(new_username, old_username, rollback_user))
+            except Exception as rollback_err:
+                logger.error(
+                    "Failed to roll back auth rename %s -> %s after owner migration failure: %s",
+                    new_username, old_username, rollback_err,
+                )
+                return False
+
        # Usernames are ownership keys for user data. Rename the common
-        # owner-scoped DB rows before changing auth so the account keeps
-        # access to its sessions, docs, email accounts, tasks, etc.
+        # owner-scoped DB rows so the account keeps access to its sessions,
+        # docs, email accounts, tasks, etc.
        try:
            from sqlalchemy import func
            from core.database import Base, SessionLocal
@@ -316,6 +348,11 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
                db.close()
        except Exception as e:
            logger.error("Failed to rename owner references %s -> %s: %s", old_username, new_username, e)
+            if not _rollback_auth_rename():
+                logger.error(
+                    "Auth rename %s -> %s could not be rolled back after owner migration failure",
+                    old_username, new_username,
+                )
            raise HTTPException(500, "Failed to rename user data")

        # Per-user prefs are JSON-backed, not SQL-backed.
@@ -335,9 +372,116 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
        except Exception as e:
            logger.warning("Failed to rename user prefs %s -> %s: %s", old_username, new_username, e)

-        ok = auth_manager.rename_user(old_username, new_username, user)
-        if not ok:
-            raise HTTPException(400, "Cannot rename user")
+        # In-flight deep-research tasks live in the process-local
+        # ResearchHandler registry. They are not covered by the persisted JSON
+        # migration above, but the research routes filter and cancel by this
+        # owner field while the job is running. Do this before sweeping
+        # completed JSON files so a job that finishes during the rename saves
+        # with the new owner or is caught by the disk sweep below.
+        try:
+            rh = getattr(request.app.state, "research_handler", None)
+            rename_owner = getattr(rh, "rename_owner", None)
+            if callable(rename_owner):
+                rename_owner(old_username, new_username)
+        except Exception as e:
+            logger.warning("Failed to rename active research tasks %s -> %s: %s", old_username, new_username, e)
+
+        # deep_research: each completed report is a standalone JSON file with
+        # an `owner` field. research_routes filters by d.get("owner") == user,
+        # so a stale owner makes every report invisible to the renamed user.
+        try:
+            dr_dir = Path(DEEP_RESEARCH_DIR)
+            if dr_dir.is_dir():
+                for p in dr_dir.glob("*.json"):
+                    try:
+                        d = json.loads(p.read_text(encoding="utf-8"))
+                        if str(d.get("owner", "")).strip().lower() == old_username:
+                            d["owner"] = new_username
+                            atomic_write_json(str(p), d)
+                    except Exception as err:
+                        logger.warning("Failed to update research owner in %s: %s", p.name, err)
+        except Exception as e:
+            logger.warning("Failed to rename research owner references %s -> %s: %s", old_username, new_username, e)
+
+        # memory.json: a flat JSON array where each entry carries an `owner`
+        # field. memory_manager.load(owner=user) filters on it, so stale
+        # entries disappear from the memory panel.
+        try:
+            if os.path.isfile(MEMORY_FILE):
+                with open(MEMORY_FILE, encoding="utf-8") as fh:
+                    entries = json.loads(fh.read())
+                if isinstance(entries, list):
+                    changed = False
+                    for entry in entries:
+                        if isinstance(entry, dict) and str(entry.get("owner", "")).strip().lower() == old_username:
+                            entry["owner"] = new_username
+                            changed = True
+                    if changed:
+                        atomic_write_json(MEMORY_FILE, entries)
+        except Exception as e:
+            logger.warning("Failed to rename memory.json owner references %s -> %s: %s", old_username, new_username, e)
+
+        # uploads.json: upload rows use owner metadata for access checks and
+        # owner-prefixed index keys for dedupe. Rename both so attachments keep
+        # resolving after the account username changes.
+        try:
+            upload_handler = getattr(request.app.state, "upload_handler", None)
+            rename_owner = getattr(upload_handler, "rename_owner", None)
+            if callable(rename_owner):
+                rename_owner(old_username, new_username)
+        except Exception as e:
+            logger.warning("Failed to rename upload owner references %s -> %s: %s", old_username, new_username, e)
+
+        # skills: SKILL.md frontmatter carries owner: <username>; the usage
+        # sidecar (_usage.json) keys entries as owner::skill-name. Both must
+        # be updated or the renamed user's Skills panel goes empty.
+        try:
+            skills_root = Path(SKILLS_DIR)
+            if skills_root.is_dir():
+                _owner_re = re.compile(
+                    r'(?m)^(owner:\s*)' + re.escape(old_username) + r'\s*$',
+                    re.IGNORECASE,
+                )
+                for p in skills_root.rglob("SKILL.md"):
+                    try:
+                        text = p.read_text(encoding="utf-8")
+                        new_text = _owner_re.sub(r'\g<1>' + new_username, text)
+                        if new_text != text:
+                            atomic_write_text(str(p), new_text)
+                    except Exception as err:
+                        logger.warning("Failed to update skill owner in %s: %s", p, err)
+                usage_path = skills_root / "_usage.json"
+                if usage_path.is_file():
+                    try:
+                        usage = json.loads(usage_path.read_text(encoding="utf-8"))
+                        if isinstance(usage, dict):
+                            new_usage = {}
+                            changed = False
+                            for k, v in usage.items():
+                                owner_part, sep, skill_part = k.partition("::")
+                                if sep and owner_part.lower() == old_username:
+                                    new_usage[new_username + "::" + skill_part] = v
+                                    changed = True
+                                else:
+                                    new_usage[k] = v
+                            if changed:
+                                atomic_write_json(str(usage_path), new_usage)
+                    except Exception as err:
+                        logger.warning("Failed to update skills usage keys %s -> %s: %s", old_username, new_username, err)
+        except Exception as e:
+            logger.warning("Failed to rename skills owner references %s -> %s: %s", old_username, new_username, e)
+
+        # The in-memory session cache (session_manager.sessions) stores each
+        # session's owner at load time. Without this patch the renamed user's
+        # sessions are invisible on the next /api/sessions call because
+        # get_sessions_for_user does an exact `s.owner == username` comparison
+        # against stale in-memory values.
+        sm = getattr(request.app.state, "session_manager", None)
+        if sm is not None:
+            for sess in list(getattr(sm, "sessions", {}).values()):
+                if str(getattr(sess, "owner", None) or "").strip().lower() == old_username:
+                    sess.owner = new_username
+
        # The owner-rename loop above updated ApiToken.owner in the DB, but the
        # bearer-token cache still maps each token to the OLD owner. Without
        # refreshing it, the renamed user's API tokens resolve to the old (now
@@ -348,6 +492,31 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
            invalidator()
        return {"ok": True, "username": new_username, "renamed_self": old_username == user}

+    @router.put("/users/{username}/admin")
+    async def set_user_admin(username: str, body: SetAdminRequest, request: Request):
+        """Promote/demote a user to/from admin. Admin only.
+
+        The last remaining admin can't be demoted (no lockout). Self-demotion
+        is allowed while another admin exists; the `self` flag tells the UI to
+        reload the acting user into the normal-user view.
+        """
+        user = _get_current_user(request)
+        if not user or not auth_manager.is_admin(user):
+            raise HTTPException(403, "Admin only")
+        result = auth_manager.set_admin(username, body.is_admin, user)
+        if result is SetAdminResult.USER_NOT_FOUND:
+            raise HTTPException(404, "User not found")
+        if result is SetAdminResult.NOT_AUTHORIZED:
+            raise HTTPException(403, "Admin only")
+        if result is SetAdminResult.LAST_ADMIN:
+            raise HTTPException(400, "Cannot demote the last admin")
+        target = (username or "").strip().lower()
+        return {
+            "ok": True,
+            "is_admin": body.is_admin,
+            "self": target == (user or "").strip().lower(),
+        }
+
    @router.post("/signup-toggle", deprecated=True)
    async def toggle_signup(request: Request):
        """
@@ -378,7 +547,23 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
        user = _get_current_user(request)
        if not user or not auth_manager.is_admin(user):
            raise HTTPException(403, "Admin only")
-        ok = auth_manager.delete_user(body.username, user)
+
+        def _invalidate_api_token_cache():
+            try:
+                invalidator = getattr(request.app.state, "invalidate_token_cache", None)
+                if invalidator:
+                    invalidator()
+            except Exception:
+                pass
+
+        try:
+            ok = auth_manager.delete_user(body.username, user)
+        except Exception:
+            # delete_user can touch ApiToken rows before a later auth-store write
+            # fails. Dirty the bearer cache anyway so a partial token purge does
+            # not leave already-cached tokens authenticating until restart.
+            _invalidate_api_token_cache()
+            raise
        if not ok:
            raise HTTPException(400, "Cannot delete user")
        # delete_user removes the user's ApiToken rows, but the bearer-auth
@@ -386,12 +571,7 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
        # rebuilds when flagged dirty. Without this, a deleted user's already
        # cached token keeps authenticating until some other token op or a
        # restart clears the cache. Mirror what the token routes do.
-        try:
-            invalidator = getattr(request.app.state, "invalidate_token_cache", None)
-            if invalidator:
-                invalidator()
-        except Exception:
-            pass
+        _invalidate_api_token_cache()
        return {"ok": True}

    # ---- Feature visibility (admin-managed) ----
@@ -101,11 +101,17 @@ def setup_backup_routes(memory_manager, preset_manager, skills_manager) -> APIRo
        # ── Skills ──
        if "skills" in body and isinstance(body["skills"], list):
            existing = skills_manager.load_all()
-            existing_names = {s.get("name") for s in existing if s.get("name")}
-            existing_ids = {s.get("id") for s in existing if s.get("id")}
+            # Dedup against THIS user's own skills only. Using every tenant's
+            # rows (load_all) meant a skill whose id/name/title matched any
+            # other user's was silently skipped, so the importing user lost
+            # their own data — same cross-tenant bug fixed for memories above.
+            # The full store is still saved back below.
+            own = [s for s in existing if s.get("owner") == user]
+            existing_names = {s.get("name") for s in own if s.get("name")}
+            existing_ids = {s.get("id") for s in own if s.get("id")}
            existing_titles = {
                (s.get("title") or s.get("description") or "").strip().lower()
-                for s in existing
+                for s in own
            }
            added = 0
            for skill in body["skills"]:
@@ -11,7 +11,7 @@ from pydantic import BaseModel
 from sqlalchemy import or_, and_
 from dateutil.rrule import rrulestr

-from core.database import SessionLocal, CalendarCal, CalendarEvent
+from core.database import SessionLocal, CalendarCal, CalendarDeletedEvent, CalendarEvent
 from src.auth_helpers import require_user
 from src.upload_limits import read_upload_limited, ICS_MAX_BYTES

@@ -126,6 +126,54 @@ def _resolve_base_uid(uid: str) -> str:
        raise ValueError("malformed compound UID: missing base before ::")
    return base

+
+async def _push_caldav_event_after_commit(owner: str, uid: str, action: str):
+    """Best-effort CalDAV write-through. Local writes stay authoritative if
+    the remote server is unreachable; pending flags let /sync retry later."""
+    try:
+        result = {"ok": True}
+        if action == "create":
+            from src.caldav_sync import push_event_create
+            result = await push_event_create(owner, uid)
+        elif action == "update":
+            from src.caldav_sync import push_event_update
+            result = await push_event_update(owner, uid)
+        elif action == "delete":
+            from src.caldav_sync import push_event_delete
+            result = await push_event_delete(owner, uid)
+        if result and not result.get("ok") and not result.get("skipped"):
+            raise RuntimeError(result.get("error") or result)
+    except Exception as e:
+        logger.warning("CalDAV %s push failed for uid=%s: %s", action, uid, e)
+        if action in {"create", "update"}:
+            db = SessionLocal()
+            try:
+                ev = _get_or_404_event(db, uid, owner)
+                ev.caldav_sync_pending = action
+                db.commit()
+            except Exception:
+                db.rollback()
+            finally:
+                db.close()
+
+
+def _record_caldav_delete_tombstone(db, ev: CalendarEvent, owner: str) -> None:
+    if not (ev.calendar and ev.calendar.source == "caldav"):
+        return
+    tombstone = db.query(CalendarDeletedEvent).filter(
+        CalendarDeletedEvent.uid == ev.uid,
+        CalendarDeletedEvent.owner == owner,
+    ).first()
+    if not tombstone:
+        tombstone = CalendarDeletedEvent(uid=ev.uid, owner=owner)
+        db.add(tombstone)
+    tombstone.calendar_id = ev.calendar_id
+    tombstone.remote_href = ev.remote_href
+    tombstone.remote_etag = ev.remote_etag
+    tombstone.caldav_base_url = getattr(ev.calendar, "caldav_base_url", None)
+    tombstone.summary = ev.summary or ""
+    tombstone.last_error = None
+
 # ── Pydantic models ──

 class EventCreate(BaseModel):
@@ -843,36 +891,35 @@ def setup_calendar_routes() -> APIRouter:
            return {"ok": False, "error": str(e)[:200]}

    @router.post("/sync")
-    async def sync_caldav_endpoint(request: Request):
-        """Pull events from the configured CalDAV server into local DB.
+    async def sync_caldav_endpoint(request: Request, direction: str = "pull"):
+        """Sync events with the configured CalDAV server.
        Returns counts + any per-calendar errors. Called by the frontend
        on calendar open and by the periodic scheduler loop."""
        owner = _require_user(request)
-        from src.caldav_sync import sync_caldav
-        return await sync_caldav(owner)
+        from src.caldav_sync import sync_caldav_direction
+        return await sync_caldav_direction(owner, direction)
+

    @router.delete("/calendars/{cal_id}")
-    async def delete_calendar(cal_id: str, request: Request):
+    async def delete_calendar(request: Request, cal_id: str):
        owner = _require_user(request)
        db = SessionLocal()
        try:
-            cal = db.query(CalendarCal).filter(
-                CalendarCal.id == cal_id,
-                CalendarCal.owner == owner,
-            ).first()
-            if not cal:
-                raise HTTPException(404, "Calendar not found")
+            cal = _get_or_404_calendar(db, cal_id, owner)
+            db.query(CalendarEvent).filter(CalendarEvent.calendar_id == cal_id).delete()
            db.delete(cal)
            db.commit()
            return {"ok": True}
        except HTTPException:
            raise
        except Exception as e:
+            db.rollback()
            logger.error("Failed to delete calendar %s: %s", cal_id, e)
            raise HTTPException(500, "Failed to delete calendar")
        finally:
            db.close()

+
    @router.get("/calendars")
    async def list_calendars(request: Request):
        owner = _require_user(request)
@@ -1003,19 +1050,12 @@ def setup_calendar_routes() -> APIRouter:
                is_utc=_is_utc and not data.all_day,
                rrule=data.rrule or "",
                color=data.color or None,
+                caldav_sync_pending="create" if cal.source == "caldav" else None,
            )
            db.add(ev)
            db.commit()
            if cal.source == "caldav":
-                # Push the new event to the remote so it appears on the user's
-                # other devices — the sync is otherwise pull-only (#800).
-                from src.caldav_writeback import writeback_event
-                await writeback_event(owner, cal.source, cal.id, {
-                    "uid": uid, "summary": data.summary, "description": data.description,
-                    "location": data.location, "dtstart": dtstart, "dtend": dtend,
-                    "all_day": data.all_day, "is_utc": _is_utc and not data.all_day,
-                    "rrule": data.rrule or "",
-                })
+                await _push_caldav_event_after_commit(owner, uid, "create")
            return {"ok": True, "uid": uid}
        except HTTPException:
            raise
@@ -1061,15 +1101,12 @@ def setup_calendar_routes() -> APIRouter:
                ev.rrule = data.rrule
            if data.color is not None:
                ev.color = data.color if data.color else None
+            is_caldav = ev.calendar and ev.calendar.source == "caldav"
+            if is_caldav:
+                ev.caldav_sync_pending = "update"
            db.commit()
-            cal = db.query(CalendarCal).filter(CalendarCal.id == ev.calendar_id).first()
-            if cal and cal.source == "caldav":
-                from src.caldav_writeback import writeback_event
-                await writeback_event(owner, cal.source, cal.id, {
-                    "uid": ev.uid, "summary": ev.summary, "description": ev.description,
-                    "location": ev.location, "dtstart": ev.dtstart, "dtend": ev.dtend,
-                    "all_day": ev.all_day, "is_utc": ev.is_utc, "rrule": ev.rrule or "",
-                })
+            if is_caldav:
+                await _push_caldav_event_after_commit(owner, base_uid, "update")
            return {"ok": True}
        except HTTPException:
            raise
@@ -1090,15 +1127,13 @@ def setup_calendar_routes() -> APIRouter:
        db = SessionLocal()
        try:
            ev = _get_or_404_event(db, base_uid, owner)
-            # Capture what the remote push needs BEFORE the row is gone.
-            _cal = db.query(CalendarCal).filter(CalendarCal.id == ev.calendar_id).first()
-            _is_caldav = bool(_cal and _cal.source == "caldav")
-            _cal_id, _ev_uid = ev.calendar_id, ev.uid
+            is_caldav = ev.calendar and ev.calendar.source == "caldav"
+            if is_caldav:
+                _record_caldav_delete_tombstone(db, ev, owner)
            db.delete(ev)
            db.commit()
-            if _is_caldav:
-                from src.caldav_writeback import writeback_event
-                await writeback_event(owner, "caldav", _cal_id, {"uid": _ev_uid}, delete=True)
+            if is_caldav:
+                await _push_caldav_event_after_commit(owner, base_uid, "delete")
            return {"ok": True}
        except HTTPException:
            raise
@@ -1152,23 +1187,6 @@ def setup_calendar_routes() -> APIRouter:
        finally:
            db.close()

-    @router.delete("/calendars/{cal_id}")
-    async def delete_calendar(request: Request, cal_id: str):
-        owner = _require_user(request)
-        db = SessionLocal()
-        try:
-            cal = _get_or_404_calendar(db, cal_id, owner)
-            db.query(CalendarEvent).filter(CalendarEvent.calendar_id == cal_id).delete()
-            db.delete(cal)
-            db.commit()
-            return {"ok": True}
-        except HTTPException:
-            raise
-        except Exception as e:
-            db.rollback()
-            return {"error": str(e)}
-        finally:
-            db.close()

    # Hard cap on ICS upload (ICS_MAX_BYTES, default 10 MB). Loading the whole
    # file into memory is unavoidable with python-icalendar, so an unbounded
@@ -159,9 +159,17 @@ async def auto_name_session(session_manager, sess):
            return

        owner = getattr(sess, "owner", None)
-        t_url, t_model, t_headers = resolve_task_endpoint(
-            sess.endpoint_url, sess.model, sess.headers, owner=owner,
-        )
+        t_url, t_model, t_headers = resolve_task_endpoint(owner=owner)
+        if not t_model:
+            # If no task/utility model is configured at all, fall back to
+            # the session's own model so auto-naming still works even on
+            # minimal setups.
+            from src.endpoint_resolver import resolve_endpoint
+            _fallback = resolve_endpoint("default", owner=owner)
+            if _fallback and _fallback[1]:
+                t_url, t_model, t_headers = _fallback
+            else:
+                t_url, t_model, t_headers = sess.endpoint_url, sess.model, sess.headers
        if not t_model:
            logger.debug("[auto-name] No model provided, skipping")
            return
@@ -497,6 +505,29 @@ def _normalize_model_id_from_cache(sess) -> Optional[str]:
    return None


+def _session_is_research_spinoff(sess) -> bool:
+    """True if this session was created via research "Discuss" spin-off.
+
+    Detected by the primer system message the spin-off endpoint seeds into
+    history (metadata ``research_spinoff_from``). Such sessions are grounded
+    on the seeded report, so global memory + personal-doc RAG injection is
+    suppressed for them (the report is the sole knowledge base). Handles both
+    ChatMessage objects and plain dicts.
+    """
+    for m in getattr(sess, "history", []) or []:
+        role = getattr(m, "role", None)
+        if role is None and isinstance(m, dict):
+            role = m.get("role")
+        if role != "system":
+            continue
+        md = getattr(m, "metadata", None)
+        if md is None and isinstance(m, dict):
+            md = m.get("metadata")
+        if (md or {}).get("research_spinoff_from"):
+            return True
+    return False
+
+
 async def build_chat_context(
    sess,
    request,
@@ -562,9 +593,17 @@ async def build_chat_context(
        mem_enabled, user, incognito, no_memory, uprefs.get("memory_enabled", "NOT_SET"),
    )

+    # Research-spinoff ("Discuss") sessions are grounded on the seeded report:
+    # the primer system message IS the knowledge base. Injecting global memory
+    # or personal-doc RAG on every turn pulls in keyword-matched but off-topic
+    # facts ("wrong data") and competes with the report, so suppress both here.
+    is_research_spinoff = _session_is_research_spinoff(sess)
+    if is_research_spinoff:
+        mem_enabled = False
+
    # Use RAG?
    use_rag_val = (str(use_rag).lower() != "false") if use_rag is not None else True
-    if incognito or not allow_tool_preprocessing:
+    if incognito or not allow_tool_preprocessing or is_research_spinoff:
        use_rag_val = False

    # If pre-fetched search context was provided (compare mode), skip live web search
@@ -587,7 +626,7 @@ async def build_chat_context(
        incognito=incognito,
        use_skills=skills_enabled,
    )
-    if use_rag is not None:
+    if use_rag is not None or is_research_spinoff:
        _preface_kwargs["use_rag"] = use_rag_val
    preface, rag_sources, web_sources = chat_processor.build_context_preface(**_preface_kwargs)

@@ -615,6 +654,26 @@ async def build_chat_context(
    # Build messages
    messages = preface + sess.get_context_messages()

+    # Current date/time — injected as a standalone *user*-role context message
+    # placed immediately before the latest user turn, NOT folded into the
+    # system prompt. Its text changes every minute, and local OpenAI-compatible
+    # backends (llama.cpp / LM Studio) key their KV-cache prefix off the
+    # system message byte-for-byte; mixing ever-changing timestamp text into
+    # it would invalidate the cached prefix on every request (issue #2927).
+    # Placing it at the tail also keeps it out of the stable
+    # preface+history prefix, so that prefix stays byte-identical turn over
+    # turn (modulo the genuinely new history entries) and the cache survives.
+    if not agent_mode:
+        try:
+            from src.user_time import current_datetime_context_message
+            _dt_msg = current_datetime_context_message()
+            if messages and messages[-1].get("role") == "user":
+                messages.insert(len(messages) - 1, _dt_msg)
+            else:
+                messages.append(_dt_msg)
+        except Exception:
+            logger.debug("Failed to add current date/time context", exc_info=True)
+
    # Auto-compact
    messages, context_length, was_compacted = await maybe_compact(
        sess, sess.endpoint_url, sess.model, messages, sess.headers, owner=user,
@@ -911,6 +970,54 @@ def save_assistant_response(
    return None


+def _is_session_stream_active(session_id: str) -> bool:
+    """Best-effort check for "is a chat completion currently streaming for
+    this session?" — used to keep background extraction from overlapping a
+    main completion and competing for the local backend's processing slots
+    (issue #2927). Lazily imports the route module's live registry to avoid
+    a circular import (chat_routes imports this module at load time)."""
+    try:
+        from routes import chat_routes as _cr
+        return session_id in getattr(_cr, "_active_streams", {})
+    except Exception:
+        return False
+
+
+async def _run_extraction_jobs_sequentially(session_id: str, jobs: list, max_wait_s: float = 120.0):
+    """Run queued background-extraction coroutines one at a time, only once
+    no chat completion is actively streaming for this session.
+
+    As diagnosed in issue #2927, firing memory/skill extraction concurrently
+    with the main chat completion (or with each other) makes them compete for
+    the local backend's limited processing slots, evicting the main
+    conversation's cached KV-cache checkpoint and forcing a full prompt
+    re-evaluation on the next turn. Waiting for the stream to go idle and then
+    running the jobs strictly in sequence keeps at most one "side" request in
+    flight against the backend at any time, and never alongside the user's
+    own conversation.
+    """
+    # Wait for the triggering turn's own stream to finish winding down (it
+    # almost always already has by the time this task gets scheduled — this
+    # is a small safety margin, not the primary mechanism).
+    waited = 0.0
+    poll = 0.25
+    while _is_session_stream_active(session_id) and waited < max_wait_s:
+        await asyncio.sleep(poll)
+        waited += poll
+
+    for name, job in jobs:
+        # Re-check before each job: a fast follow-up message from the user
+        # may have started a new stream for this session while we waited.
+        waited = 0.0
+        while _is_session_stream_active(session_id) and waited < max_wait_s:
+            await asyncio.sleep(poll)
+            waited += poll
+        try:
+            await job
+        except Exception:
+            logger.warning("[bg-extract] %s extraction job failed for session %s", name, session_id, exc_info=True)
+
+
 def run_post_response_tasks(
    sess,
    session_manager,
@@ -933,7 +1040,22 @@ def run_post_response_tasks(
    extract_skills: bool = True,
    allow_background_extraction: bool = True,
 ):
-    """Fire background tasks after a completed response: memory extraction, webhooks, auto-name, skill extraction."""
+    """Fire background tasks after a completed response: memory extraction, webhooks, auto-name, skill extraction.
+
+    Memory/skill extraction are queued to run *sequentially*, after the main
+    completion stream for this session has fully wound down — never
+    concurrently with it or with each other. As diagnosed in issue #2927,
+    firing these "side" LLM calls in parallel with the main chat completion
+    makes them compete for the local backend's limited processing slots
+    (llama.cpp defaults to 4), evicting the main conversation's cached
+    checkpoint and forcing a full prompt re-evaluation on the next turn. By
+    the time this function runs the main response is already saved, but the
+    extraction calls themselves are still async — queuing them through
+    ``_queue_background_extraction`` keeps them from overlapping the *next*
+    turn's request too.
+    """
+    _extraction_jobs: list = []
+
    # Memory extraction — only every 4th message pair to avoid excess LLM calls
    _msg_count = len(sess.history) if hasattr(sess, 'history') else 0
    _should_extract = (_msg_count >= 4) and (_msg_count % 4 == 0)
@@ -943,10 +1065,10 @@ def run_post_response_tasks(
        t_url, t_model, t_headers = resolve_task_endpoint(
            sess.endpoint_url, sess.model, sess.headers, owner=owner,
        )
-        asyncio.create_task(extract_and_store(
+        _extraction_jobs.append(("memory", extract_and_store(
            sess, memory_manager, memory_vector,
            t_url, t_model, t_headers,
-        ))
+        )))

    # Skill extraction from complex agent runs. Only when the user actually
    # chose agent mode — not a chat we auto-escalated for a notes/calendar
@@ -982,12 +1104,15 @@ def run_post_response_tasks(
                sess.endpoint_url, sess.model, sess.headers, owner=owner,
            )
            logger.debug("[skill-extract] dispatching extractor (model=%s)", s_model)
-            asyncio.create_task(maybe_extract_skill(
+            _extraction_jobs.append(("skill", maybe_extract_skill(
                sess, skills_manager,
                s_url, s_model, s_headers,
                agent_rounds, agent_tool_calls,
                owner=owner,
-            ))
+            )))
+
+    if _extraction_jobs:
+        asyncio.create_task(_run_extraction_jobs_sequentially(session_id, _extraction_jobs))

    # Token accumulation
    if last_metrics:
@@ -62,6 +62,33 @@ def _stream_set(session_id: str, **fields) -> None:
    rec.update(fields)


+def _resolve_request_workspace(request, raw_value) -> tuple:
+    """Resolve the posted workspace for this request: (workspace, rejected).
+
+    Privilege is checked BEFORE the path ever touches the filesystem. Only
+    admin/single-user callers can use the workspace-backed file/shell tools,
+    so only they get vet_workspace() and the workspace_rejected signal. For
+    any other caller the submitted value is dropped uniformly, with no vetting
+    and no event: otherwise the presence/absence of workspace_rejected would
+    let a non-admin chat caller probe which host paths exist.
+
+    vet_workspace rejects non-directories, sensitive roots (.ssh, .gnupg,
+    ...), and filesystem roots; on rejection there is no confinement and the
+    default tool-path allowlist applies. The rejected value is surfaced so the
+    stream can tell an admin client (which believes a workspace is active)
+    that it was dropped.
+    """
+    requested = (raw_value or "").strip()
+    if not requested:
+        return "", ""
+    from src.tool_security import owner_is_admin_or_single_user
+    if not owner_is_admin_or_single_user(get_current_user(request)):
+        return "", ""
+    from src.tool_execution import vet_workspace
+    workspace = vet_workspace(requested) or ""
+    return workspace, (requested if not workspace else "")
+
+
 def _session_url_matches_endpoint(session_url: str, endpoint_base: str) -> bool:
    if not session_url or not endpoint_base:
        return False
@@ -400,6 +427,7 @@ def setup_chat_routes(
            temperature=ctx.preset.temperature,
            max_tokens=ctx.preset.max_tokens,
            prompt_type=preset_id,
+            session_id=session,
        )
        _clean_reply, _clean_md = clean_thinking_for_save(reply, {"model": sess.model})
        sess.add_message(ChatMessage("assistant", _clean_reply, metadata=_clean_md))
@@ -446,8 +474,11 @@ def setup_chat_routes(
        use_research = form_data.get("use_research")
        time_filter = form_data.get("time_filter")
        preset_id = form_data.get("preset_id")
-        allow_bash = form_data.get("allow_bash")
-        allow_web_search = form_data.get("allow_web_search")
+        # Issue #3229: API callers send JSON, not FormData.  Read from the
+        # JSON body as fallback so callers who send {"allow_bash": true}
+        # actually get bash enabled.
+        allow_bash = form_data.get("allow_bash") or (body or {}).get("allow_bash")
+        allow_web_search = form_data.get("allow_web_search") or (body or {}).get("allow_web_search")
        use_rag = form_data.get("use_rag")
        search_context = form_data.get("search_context")  # pre-fetched web search results (compare mode)
        compare_mode = str(form_data.get("compare_mode", "")).lower() == "true"
@@ -456,7 +487,10 @@ def setup_chat_routes(
        # manual form posts that still send plan_mode=true.
        plan_mode = False
        chat_mode = str(form_data.get("mode", "")).lower()  # 'chat' or 'agent'
-        workspace = ""
+        # Workspace: confine the agent's file/shell tools to this folder.
+        workspace, workspace_rejected = _resolve_request_workspace(
+            request, form_data.get("workspace")
+        )
        # Plan mode is a modifier on agent mode — it only makes sense with tools.
        if plan_mode:
            chat_mode = "agent"
@@ -707,7 +741,7 @@ def setup_chat_routes(
            # leak a doc that belongs to a DIFFERENT session.
            if not active_doc:
                try:
-                    from src.tool_implementations import get_active_document
+                    from src.agent_tools.document_tools import get_active_document
                    _mem_id = get_active_document()
                    if _mem_id:
                        _mem_q = _doc_db.query(DBDocument).filter(DBDocument.id == _mem_id)
@@ -728,9 +762,18 @@ def setup_chat_routes(

        # Build disabled-tools set from frontend toggles + user privileges
        disabled_tools = set()
-        if str(allow_bash).lower() != "true":
+        # Only disable bash/web_search when the caller *explicitly* set them
+        # to a falsy value.  When unset (None), defer to per-user privilege
+        # checks below — this lets admins with can_use_bash=True use bash
+        # by default without having to send allow_bash in every request.
+        if allow_bash is not None and str(allow_bash).lower() != "true":
            disabled_tools.add("bash")
-        if str(allow_web_search).lower() != "true":
+        _explicit_web_intent = bool(_tool_intent and _tool_intent.category == "web")
+        if (
+            allow_web_search is not None
+            and str(allow_web_search).lower() != "true"
+            and not _explicit_web_intent
+        ):
            disabled_tools.add("web_search")
            disabled_tools.add("web_fetch")

@@ -848,6 +891,13 @@ def setup_chat_routes(
            # Register active stream for partial-save safety net
            _active_streams[session] = {"status": "streaming", "partial": "", "query": message, "is_research": effective_do_research, "mode": _effective_mode}

+            # The client sent a workspace the server refused to bind (deleted
+            # folder, file path, sensitive dir, filesystem root). Tell it up
+            # front so the UI can clear the pill instead of displaying a
+            # confinement that is not actually in effect.
+            if workspace_rejected:
+                yield f"data: {json.dumps({'type': 'workspace_rejected', 'data': {'path': workspace_rejected}})}\n\n"
+
            if ctx.preprocessed.attachment_meta:
                yield f"data: {json.dumps({'type': 'attachments', 'data': ctx.preprocessed.attachment_meta})}\n\n"

@@ -1076,6 +1126,7 @@ def setup_chat_routes(
                        max_tokens=ctx.preset.max_tokens,
                        prompt_type=preset_id,
                        tools=None,
+                        session_id=session,
                    ):
                        if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
                            try:
@@ -1223,9 +1274,9 @@ def setup_chat_routes(
                        tool_policy=tool_policy,
                        owner=_user,
                        fallbacks=_fallback_candidates,
-                        workspace=None,
                        plan_mode=plan_mode,
                        approved_plan=approved_plan or None,
+                        workspace=workspace or None,
                    ):
                        if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
                            try:
@@ -18,6 +18,7 @@ from fastapi.responses import StreamingResponse
 from src.auth_helpers import require_authenticated_request, require_user
 from src.tool_implementations import do_manage_notes
 from src.constants import COOKBOOK_STATE_FILE
+from routes._validators import validate_remote_host, validate_ssh_port


 COOKBOOK_READ_SCOPES = {"cookbook:read", "cookbook:launch"}
@@ -36,6 +37,21 @@ DOCS_WRITE_SCOPES = {"documents:write"}
 WRITE_ACTIONS = {"add", "create", "new", "save", "remind", "update", "delete", "toggle_item", "remove", "remove_item"}


+def _ssh_prefix_for_task(task: dict) -> tuple[str, str]:
+    """Resolve a cookbook task's stored SSH target into ``(host, port_flag)``.
+
+    ``host`` is ``""`` for a local task. ``remoteHost`` / ``sshPort`` come from
+    cookbook_state.json and get interpolated into an ``ssh`` command string, so
+    validate them the same way the cookbook routes do. A tampered entry with
+    shell metacharacters in ``remoteHost`` is rejected with 400 rather than
+    injected.
+    """
+    host = validate_remote_host((task.get("remoteHost") or "").strip() or None) or ""
+    ssh_port = validate_ssh_port((task.get("sshPort") or "").strip() or None) or ""
+    port_flag = f"-p {ssh_port} " if ssh_port and ssh_port != "22" else ""
+    return host, port_flag
+
+
 async def _as_owner(request: Request, owner: str, fn, *args, **kwargs):
    """Run an existing route handler with request.state.current_user temporarily
    set to ``owner`` so its internal get_current_user/require_user calls see
@@ -550,8 +566,7 @@ def setup_codex_routes(
        task = next((t for t in tasks if t.get("sessionId") == session_id), None)
        if task is None:
            raise HTTPException(404, "task not found")
-        host = (task.get("remoteHost") or "").strip()
-        ssh_port = (task.get("sshPort") or "").strip()
+        host, port_flag = _ssh_prefix_for_task(task)
        # Prefer the persisted log file over the tmux pane. The pane gets
        # overwritten by the post-crash neofetch banner + bash prompt the
        # moment vllm exits; the log file is the raw stdout/stderr and
@@ -563,7 +578,6 @@ def setup_codex_routes(
            f"else tmux capture-pane -t {session_id} -p -S -{tail}; fi"
        )
        if host:
-            port_flag = f"-p {ssh_port} " if ssh_port and ssh_port != "22" else ""
            import shlex
            cmd = f"ssh {port_flag}{host} {shlex.quote(inner)}"
        else:
@@ -625,10 +639,8 @@ def setup_codex_routes(
        state = _read_cookbook_state()
        tasks = state.get("tasks") or []
        task = next((t for t in tasks if t.get("sessionId") == session_id), None)
-        host = ((task or {}).get("remoteHost") or "").strip()
-        ssh_port = ((task or {}).get("sshPort") or "").strip()
+        host, port_flag = _ssh_prefix_for_task(task or {})
        if host:
-            port_flag = f"-p {ssh_port} " if ssh_port and ssh_port != "22" else ""
            cmd = f"ssh {port_flag}{host} \"tmux kill-session -t {session_id}\""
        else:
            cmd = f"tmux kill-session -t {session_id}"
@@ -45,10 +45,14 @@ def _save_settings(settings):
 def _get_carddav_config():
    import os
    settings = _load_settings()
+    password = settings.get("carddav_password", os.environ.get("CARDDAV_PASSWORD", ""))
+    if password and "carddav_password" in settings:
+        from src.secret_storage import decrypt
+        password = decrypt(password)
    return {
        "url": settings.get("carddav_url", os.environ.get("CARDDAV_URL", "")),
        "username": settings.get("carddav_username", os.environ.get("CARDDAV_USERNAME", "")),
-        "password": settings.get("carddav_password", os.environ.get("CARDDAV_PASSWORD", "")),
+        "password": password,
    }


@@ -769,8 +773,11 @@ def setup_contacts_routes():
    @router.post("/import")
    async def import_vcf(data: dict, _admin: str = Depends(require_admin)):
        """Import contacts from .vcf or CSV. Body: {"vcf": "..."} or {"csv": "..."}."""
-        text = data.get("vcf") or data.get("text") or ""
-        csv_text = data.get("csv") or ""
+        # Coerce defensively: a non-string vcf/text/csv (e.g. a number or list
+        # in the JSON body) would otherwise reach .strip() and 500 with an
+        # AttributeError instead of degrading to a clean "no data" response.
+        text = str(data.get("vcf") or data.get("text") or "")
+        csv_text = str(data.get("csv") or "")
        if text.strip():
            if "BEGIN:VCARD" not in text.upper():
                return {"success": False, "error": "No vCard data found"}
@@ -822,7 +829,11 @@ def setup_contacts_routes():
                    except ValueError as e:
                        raise HTTPException(400, str(e))
                else:
-                    settings[key] = data[key]
+                    value = data[key]
+                    if key == "carddav_password" and value:
+                        from src.secret_storage import encrypt
+                        value = encrypt(value)
+                    settings[key] = value
        _save_settings(settings)
        # Force re-fetch
        _contact_cache["fetched_at"] = None
@@ -1,16 +1,19 @@
 """cookbook_helpers.py — validators + small helpers shared by the cookbook routes.
 Extracted from cookbook_routes.py; the routes module imports the symbols it needs."""

+import json
 import logging
 import ntpath
 import os
 import posixpath
 import re
 import shlex
+from pathlib import Path

 from fastapi import HTTPException
 from pydantic import BaseModel

+from routes._validators import validate_remote_host, validate_ssh_port
 from core.platform_compat import _ssh_exec_argv

 logger = logging.getLogger(__name__)
@@ -30,21 +33,24 @@ _LOCAL_MODEL_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*$")
 _OLLAMA_MODEL_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._:/-]{0,200}$")
 # Include pattern is a glob: allow typical safe glyphs only.
 _INCLUDE_RE = re.compile(r"^[A-Za-z0-9._\-*?/\[\]]+$")
-# Remote host: either `user@host` or plain `host` (alias is allowed), where host
-# is a safe DNS-like token or a short SSH config alias.
-_REMOTE_HOST_RE = re.compile(r"^(?:[A-Za-z0-9._-]+@)?[A-Za-z0-9._-]+$")
 # HF tokens and API tokens are url-safe base64-like.
 _TOKEN_RE = re.compile(r"^[A-Za-z0-9._~+/=-]+$")
 # Session IDs we mint look like "cookbook-deadbeef" or "serve-deadbeef".
 # Anything beyond plain alphanumerics + dash + underscore could break out
 # of the shell/PowerShell contexts the value lands in.
 _SESSION_ID_RE = re.compile(r"^[A-Za-z0-9_-]{1,64}$")
-_SSH_PORT_RE = re.compile(r"^\d{1,5}$")
 _GPU_LIST_RE = re.compile(r"^\d+(?:,\d+)*$")
 # A download target directory. Absolute or ~-relative path; safe path glyphs
-# only (no quotes, shell metacharacters, or spaces) since it lands in a shell
-# command. A leading ~ is expanded to $HOME at command-build time.
-_LOCAL_DIR_RE = re.compile(r"^~?/[A-Za-z0-9._/-]*$|^~$")
+# only (no quotes or shell metacharacters). Spaces are allowed because command
+# builders pass the value through quoted shell/Python contexts. The character
+# class uses ``\w`` — Unicode word characters under Python 3's default str
+# matching — so non-ASCII folder names pass validation too: Cyrillic, accented
+# Latin, CJK, e.g. ``/Volumes/Модели`` or ``D:\AI Models\Модели``. This stays
+# shell-safe: none of ``; & | ` $ '' "" () {}`` newlines etc. are in ``[\w. -]``,
+# so injection vectors remain rejected. A leading ~ is expanded to $HOME at
+# command-build time. (Drive letters stay ASCII: ``[A-Za-z]:``.)
+_LOCAL_DIR_RE = re.compile(r"^~?(?:/[\w. -]*)+$|^~$")
+_WINDOWS_LOCAL_DIR_RE = re.compile(r"^[A-Za-z]:[\\/](?:[\w. -]+(?:[\\/][\w. -]+)*[\\/]?)?$")
 _WINDOWS_DRIVE_PATH_RE = re.compile(r"^[A-Za-z]:[\\/]")


@@ -78,14 +84,6 @@ def _validate_include(v: str | None) -> str | None:
    return v


-def _validate_remote_host(v: str | None) -> str | None:
-    if v is None or v == "":
-        return None
-    if not _REMOTE_HOST_RE.match(v):
-        raise HTTPException(400, "Invalid remote_host — must be host or user@host, no SSH option syntax")
-    return v
-
-
 def _validate_token(v: str | None) -> str | None:
    if v is None or v == "":
        return None
@@ -94,26 +92,43 @@ def _validate_token(v: str | None) -> str | None:
    return v


+def load_stored_hf_token(*, state_path: Path | str | None = None) -> str:
+    """Return the decrypted HF token from cookbook_state.json, else env fallback."""
+    path = Path(state_path) if state_path else Path(os.environ.get("DATA_DIR", "data")) / "cookbook_state.json"
+    token = ""
+    if path.exists():
+        try:
+            state = json.loads(path.read_text(encoding="utf-8"))
+            env = state.get("env") if isinstance(state, dict) else {}
+            if isinstance(env, dict) and env.get("hfToken"):
+                from src.secret_storage import decrypt
+                token = decrypt(env.get("hfToken") or "")
+        except Exception:
+            token = ""
+    if not token:
+        token = (os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") or "").strip()
+    return token
+
+
 def _validate_local_dir(v: str | None) -> str | None:
    if v is None or v == "":
        return None
+    if len(v) >= 2 and v[0] == v[-1] and v[0] in {"'", '"'}:
+        v = v[1:-1]
    v = v.rstrip("/") or "/"
-    if not _LOCAL_DIR_RE.match(v):
-        raise HTTPException(400, "Invalid local_dir — must be an absolute or ~ path with no spaces or shell metacharacters")
+    if not (_LOCAL_DIR_RE.match(v) or _WINDOWS_LOCAL_DIR_RE.match(v)):
+        raise HTTPException(400, "Invalid local_dir — must be an absolute or ~ path with no shell metacharacters")
+    # Reject path segments that start with '-' (option injection). '-' is in the
+    # allowlist, so a dir like ``/models/-rf`` or ``D:\models\-rf`` could be read
+    # as a CLI flag by hf/etc. — and quoting does NOT stop a value from being
+    # parsed as an option. This is the one residual that command-build-time
+    # quoting can't cover, so the guard lives here, keeping the safety wholly
+    # inside the validator rather than relying on consumers.
+    if any(seg.startswith("-") for seg in re.split(r"[\\/]", v) if seg):
+        raise HTTPException(400, "Invalid local_dir — path segments cannot start with '-'")
    return v


-def _validate_ssh_port(v: str | None) -> str | None:
-    if v is None or v == "":
-        return None
-    if not _SSH_PORT_RE.fullmatch(str(v)):
-        raise HTTPException(400, "Invalid ssh_port")
-    port = int(v)
-    if port < 1 or port > 65535:
-        raise HTTPException(400, "Invalid ssh_port")
-    return str(port)
-
-
 def _validate_gpus(v: str | None) -> str | None:
    if v is None or v == "":
        return None
@@ -125,7 +140,7 @@ def _validate_gpus(v: str | None) -> str | None:
 def _shell_path(p: str) -> str:
    """Render a validated path for a double-quoted shell context, expanding a
    leading ~ to $HOME (single quotes wouldn't expand it). Safe because
-    _validate_local_dir already restricts the charset."""
+    _validate_local_dir already rejects quotes and shell metacharacters."""
    if p == "~":
        return '"$HOME"'
    if p.startswith("~/"):
@@ -347,7 +362,12 @@ def _user_shell_path_bootstrap() -> list[str]:
        '  ODYSSEUS_USER_PATH="$("$ODYSSEUS_USER_SHELL" -ic \'printf "__ODYSSEUS_PATH__%s\\n" "$PATH"\' 2>/dev/null | sed -n \'s/^__ODYSSEUS_PATH__//p\' | tail -n 1 || true)"',
        '  if [ -n "$ODYSSEUS_USER_PATH" ]; then export PATH="$ODYSSEUS_USER_PATH:$PATH"; fi',
        'fi',
-        'command -v python3 >/dev/null 2>&1 || python3() { python "$@"; }',
+        # Windows can expose python3 as a Microsoft Store App Execution Alias
+        # under WindowsApps. Git Bash sees that stub as present, but it exits
+        # before running Python. A Windows venv usually has python.exe, not
+        # python3.exe, so treat a missing or WindowsApps python3 as absent.
+        '_odys_py3="$(command -v python3 2>/dev/null || true)"',
+        'case "$_odys_py3" in ""|*[Ww]indows[Aa]pps*) python3() { python "$@"; } ;; esac',
        'command -v python >/dev/null 2>&1 || python() { python3 "$@"; }',
    ]

@@ -386,6 +406,7 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None, add_hf_cache:
        "    for root, dirs, fns in safe_walk(base):",
        "        for fn in sorted(fns):",
        "            if not fn.lower().endswith('.gguf'): continue",
+        "            if fn.startswith('._'): continue  # macOS AppleDouble sidecar, not a real GGUF",
        "            fp = os.path.join(root, fn)",
        "            try: size = os.path.getsize(fp)",
        "            except Exception: size = 0",
@@ -557,6 +578,36 @@ _GGUF_PRELUDE_RE = re.compile(
 _OLLAMA_HOST_ASSIGNMENT_RE = re.compile(r"(?:^|\s)OLLAMA_HOST=([^\s]+)")
 _OLLAMA_BIND_RE = re.compile(r"^\[([^\]]+)\]:(\d+)$|^([^:]+):(\d+)$")
 _OLLAMA_BIND_HOST_RE = re.compile(r"^[A-Za-z0-9._:-]+$")
+_LLAMA_CPP_PYTHON_GGML_TYPES = {
+    "f32": "0",
+    "f16": "1",
+    "q4_0": "2",
+    "q4_1": "3",
+    "q5_0": "6",
+    "q5_1": "7",
+    "q8_0": "8",
+    "q8_1": "9",
+    "q2_k": "10",
+    "q3_k": "11",
+    "q4_k": "12",
+    "q5_k": "13",
+    "q6_k": "14",
+    "q8_k": "15",
+    "iq2_xxs": "16",
+    "iq2_xs": "17",
+    "iq3_xxs": "18",
+    "iq1_s": "19",
+    "iq4_nl": "20",
+    "iq3_s": "21",
+    "iq2_s": "22",
+    "iq4_xs": "23",
+    "mxfp4": "39",
+    "nvfp4": "40",
+    "q1_0": "41",
+}
+_LLAMA_CPP_PYTHON_TYPE_FLAG_RE = re.compile(
+    r"(?P<flag>--type_[kv])(?P<sep>\s+|=)(?P<quote>['\"]?)(?P<value>[A-Za-z0-9_]+)(?P=quote)"
+)


 def _ollama_bind_from_cmd(cmd: str | None, *, default_host: str = "127.0.0.1") -> tuple[str, str]:
@@ -588,6 +639,22 @@ def _ollama_bind_from_cmd(cmd: str | None, *, default_host: str = "127.0.0.1") -
    return f"[{host}]" if bracketed_host else host, port


+def _normalize_llama_cpp_python_cache_types(cmd: str | None) -> str | None:
+    """Map llama.cpp KV cache type names to llama-cpp-python's integer enum."""
+    if not cmd or "llama_cpp.server" not in cmd:
+        return cmd
+
+    def repl(match: re.Match[str]) -> str:
+        value = match.group("value")
+        mapped = _LLAMA_CPP_PYTHON_GGML_TYPES.get(value.lower())
+        if not mapped:
+            return match.group(0)
+        quote = match.group("quote")
+        return f"{match.group('flag')}{match.group('sep')}{quote}{mapped}{quote}"
+
+    return _LLAMA_CPP_PYTHON_TYPE_FLAG_RE.sub(repl, cmd)
+
+
 def _check_serve_binary(seg: str) -> None:
    """Validate that a single command segment starts with an allowlisted binary
    (after skipping leading env-var assignments like `CUDA_VISIBLE_DEVICES=0`)."""
@@ -726,6 +793,7 @@ def _append_llama_cpp_linux_accel_build_lines(runner_lines: list[str]) -> None:
    runner_lines.append('    done')
    # rm -rf build so a prior poisoned CMakeCache.txt (e.g. from a failed CUDA
    # or HIP attempt) doesn't cause the next configure to reuse stale settings.
+    runner_lines.append('    mkdir -p ~/bin')
    runner_lines.append('    cd ~/llama.cpp && rm -rf build')
    runner_lines.append('    if command -v hipconfig &>/dev/null || [ -d /opt/rocm ] || [ -n "$ROCM_PATH" ] || [ -n "$HIP_PATH" ]; then')
    runner_lines.append('      if command -v hipconfig &>/dev/null; then')
@@ -1030,6 +1098,16 @@ def _diagnose_serve_output(text: str) -> dict | None:
            "vLLM is not installed or not in PATH on this server.",
            [{"label": "install vLLM in Cookbook Dependencies", "op": "dependency", "package": "vllm"}],
        ),
+        (
+            r"sgl_kernel[\s\S]*(Python\.h|libnuma\.so\.1|common_ops)|"
+            r"(Python\.h|libnuma\.so\.1|common_ops)[\s\S]*sgl_kernel|"
+            r"Please ensure sgl_kernel is properly installed",
+            "SGLang native dependencies are missing on this server.",
+            [
+                {"label": "install OS packages: libnuma-dev python3.12-dev build-essential", "op": "manual"},
+                {"label": "upgrade sglang-kernel after OS packages are installed", "op": "manual"},
+            ],
+        ),
        (
            r"sglang.*command not found|No module named sglang|SGLang is not installed",
            "SGLang is not installed or not in PATH on this server.",
@@ -0,0 +1,75 @@
+"""Pure helpers for shaping cookbook task output for the status response.
+
+Kept dependency-free (no FastAPI / SQLAlchemy imports) so the behavior can be
+unit-tested without standing up the whole app.
+"""
+
+import re
+
+_FETCHING_ZERO_FILES_RE = re.compile(r"Fetching\s+0\s+files", re.IGNORECASE)
+
+# Probe scripts for the dead-session download check, run as
+# `python3 -c <PROBE> <repo_id> <cache_root>` (locally or over SSH).
+# cache_root is the task's custom download dir, '' for the default HF cache.
+# It has to be passed explicitly: the download runner exports
+# HF_HOME=<local_dir>, so that task's cache lives under <local_dir>/hub, and
+# the probe process's own environment knows nothing about it.
+HF_CACHE_COMPLETE_PROBE = (
+    "import os,sys;"
+    "repo=sys.argv[1];"
+    "root=os.path.expanduser(sys.argv[2]) if len(sys.argv)>2 and sys.argv[2] else '';"
+    "base=os.path.join(root,'hub') if root else (os.environ.get('HUGGINGFACE_HUB_CACHE') or os.path.join(os.environ.get('HF_HOME', os.path.expanduser('~/.cache/huggingface')), 'hub'));"
+    "d=os.path.join(base,'models--'+repo.replace('/','--'));"
+    "snap=os.path.join(d,'snapshots');"
+    "ok=os.path.isdir(snap) and any(os.path.isdir(os.path.join(snap,x)) and os.listdir(os.path.join(snap,x)) for x in os.listdir(snap));"
+    "inc=False;"
+    "blobs=os.path.join(d,'blobs');"
+    "inc=os.path.isdir(blobs) and any(x.endswith('.incomplete') for x in os.listdir(blobs));"
+    "sys.exit(0 if ok and not inc else 1)"
+)
+
+HF_CACHE_INCOMPLETE_PROBE = (
+    "import os,sys;"
+    "repo=sys.argv[1];"
+    "root=os.path.expanduser(sys.argv[2]) if len(sys.argv)>2 and sys.argv[2] else '';"
+    "base=os.path.join(root,'hub') if root else (os.environ.get('HUGGINGFACE_HUB_CACHE') or os.path.join(os.environ.get('HF_HOME', os.path.expanduser('~/.cache/huggingface')), 'hub'));"
+    "d=os.path.join(base,'models--'+repo.replace('/','--'));"
+    "blobs=os.path.join(d,'blobs');"
+    "inc=os.path.isdir(blobs) and any(x.endswith('.incomplete') for x in os.listdir(blobs));"
+    "sys.exit(0 if inc else 1)"
+)
+
+
+def classify_dead_download(full_snapshot: str):
+    """Resolve a dead download session's status from its runner markers.
+
+    The runner prints DOWNLOAD_OK only after exiting 0 (and DOWNLOAD_FAILED
+    otherwise), so the markers stay trustworthy after the tmux pane is gone.
+    Returns (status, zero_files), or None when the snapshot carries no marker
+    and the caller has to fall back to the cache probe. Same precedence as
+    the live-session branch: DOWNLOAD_OK wins, except a "Fetching 0 files"
+    run is an error (nothing matched the include/quant pattern).
+    """
+    if not full_snapshot:
+        return None
+    if "DOWNLOAD_OK" in full_snapshot:
+        if _FETCHING_ZERO_FILES_RE.search(full_snapshot):
+            return ("error", True)
+        return ("completed", False)
+    if "DOWNLOAD_FAILED" in full_snapshot:
+        return ("error", False)
+    return None
+
+
+def error_aware_output_tail(full_snapshot: str, status: str) -> str:
+    """Return the trailing slice of a task log for the status response.
+
+    Failed tasks return the last 50 lines so the "Copy last 50 lines" action
+    surfaces the actual error context (stack traces, build output). Running and
+    other non-error tasks keep the cheaper 12-line tail to limit the payload on
+    the 10s polling interval.
+    """
+    if not full_snapshot:
+        return ""
+    tail_lines = 50 if status == "error" else 12
+    return "\n".join(full_snapshot.splitlines()[-tail_lines:])
@@ -15,9 +15,11 @@ from pathlib import Path
 from fastapi import APIRouter, HTTPException, Request, Depends

 from src.auth_helpers import require_user
+from src.constants import COOKBOOK_STATE_FILE
 from pydantic import BaseModel

 from core.middleware import require_admin
+from routes._validators import validate_remote_host, validate_ssh_port
 from core.platform_compat import (
    IS_WINDOWS,
    detached_popen_kwargs,
@@ -28,18 +30,26 @@ from core.platform_compat import (
    which_tool,
 )
 from routes.shell_routes import TMUX_LOG_DIR
+from routes.cookbook_output import (
+    error_aware_output_tail, classify_dead_download,
+    HF_CACHE_COMPLETE_PROBE, HF_CACHE_INCOMPLETE_PROBE,
+)

 logger = logging.getLogger(__name__)

 from routes.cookbook_helpers import (
-    _SSH_PORT_RE, _REMOTE_HOST_RE, _SESSION_ID_RE,
-    _validate_repo_id, _validate_serve_model_id, _validate_include, _validate_remote_host, _validate_token,
-    _validate_local_dir, _validate_ssh_port, _validate_gpus, _shell_path,
+    _SESSION_ID_RE, _validate_repo_id, _validate_serve_model_id, _validate_include, _validate_token,
+    _validate_local_dir, _validate_gpus, _shell_path,
    _ps_squote, _bash_squote, _validate_serve_cmd, _parse_serve_phase,
    _safe_env_prefix, _local_tooling_path_export, _append_serve_preflight_exit_lines,
    _append_serve_exit_code_lines, _append_llama_cpp_linux_accel_build_lines, _cached_model_scan_script,
+    load_stored_hf_token,
+    _append_vllm_linux_preflight_lines, _ollama_bind_from_cmd, _pip_install_fallback_chain,
+    _pip_install_no_cache, _user_shell_path_bootstrap, _venv_safe_local_pip_install_cmd,
+    _diagnose_serve_output, run_ssh_command_async,
    _ollama_bind_from_cmd, _pip_install_fallback_chain, _pip_install_no_cache,
    _user_shell_path_bootstrap, _venv_safe_local_pip_install_cmd,
+    _normalize_llama_cpp_python_cache_types,
    ModelDownloadRequest, ServeRequest,
 )

@@ -48,13 +58,13 @@ _HF_TOKEN_STATUS_SNIPPET = (
    'echo "[odysseus] HF token: applied"; '
    'else '
    'echo "[odysseus] HF token: NOT SET — gated/private models will be denied. '
-    'Add one in Odysseus Settings -> Cookbook -> HuggingFace Token."; '
+    'Add one in Odysseus Cookbook -> Settings -> HuggingFace Token."; '
    'fi'
 )

 def setup_cookbook_routes() -> APIRouter:
    router = APIRouter(tags=["cookbook"])
-    _cookbook_state_path = Path(os.environ.get("DATA_DIR", "data")) / "cookbook_state.json"
+    _cookbook_state_path = Path(COOKBOOK_STATE_FILE)

    def _mask_secret(value: str) -> str:
        if not value:
@@ -164,6 +174,16 @@ def setup_cookbook_routes() -> APIRouter:
                "vLLM is not installed or not in PATH on this server.",
                [{"label": "install vLLM in Cookbook Dependencies", "op": "dependency", "package": "vllm"}],
            ),
+            (
+                r"sgl_kernel[\s\S]*(Python\.h|libnuma\.so\.1|common_ops)|"
+                r"(Python\.h|libnuma\.so\.1|common_ops)[\s\S]*sgl_kernel|"
+                r"Please ensure sgl_kernel is properly installed",
+                "SGLang native dependencies are missing on this server.",
+                [
+                    {"label": "install OS packages: libnuma-dev python3.12-dev build-essential", "op": "manual"},
+                    {"label": "upgrade sglang-kernel after OS packages are installed", "op": "manual"},
+                ],
+            ),
            (
                r"sglang.*command not found|No module named sglang|SGLang is not installed",
                "SGLang is not installed or not in PATH on this server.",
@@ -232,14 +252,7 @@ def setup_cookbook_routes() -> APIRouter:
        return state

    def _load_stored_hf_token() -> str:
-        if not _cookbook_state_path.exists():
-            return ""
-        try:
-            state = json.loads(_cookbook_state_path.read_text(encoding="utf-8"))
-            env = state.get("env") if isinstance(state, dict) else {}
-            return _decrypt_secret(env.get("hfToken") if isinstance(env, dict) else "")
-        except Exception:
-            return ""
+        return load_stored_hf_token(state_path=_cookbook_state_path)

    def _cookbook_ssh_dir() -> Path:
        # The Docker image keeps cookbook keys under /app/.ssh; that path only
@@ -354,7 +367,11 @@ def setup_cookbook_routes() -> APIRouter:
            # all output to the log the poller reads. Paths handed to bash use
            # POSIX form + shell-quoting so drive paths / spaces survive.
            inner = TMUX_LOG_DIR / f"{session_id}_run.sh"
-            inner.write_text("\n".join(bash_lines) + "\n", encoding="utf-8")
+            pp = shlex.quote(pid_path.as_posix())
+            inner.write_text(
+                f"printf '%s\\n' \"$$\" > {pp}\n" + "\n".join(bash_lines) + "\n",
+                encoding="utf-8",
+            )
            lp = shlex.quote(log_path.as_posix())
            ip = shlex.quote(inner.as_posix())
            script_path = TMUX_LOG_DIR / f"{session_id}.sh"
@@ -406,8 +423,8 @@ def setup_cookbook_routes() -> APIRouter:
        else:
            _validate_repo_id(req.repo_id)
            _validate_include(req.include)
-        _validate_remote_host(req.remote_host)
-        req.ssh_port = _validate_ssh_port(req.ssh_port)
+        validate_remote_host(req.remote_host)
+        req.ssh_port = validate_ssh_port(req.ssh_port)
        req.local_dir = _validate_local_dir(req.local_dir)
        req.hf_token = "" if is_ollama_download else (req.hf_token or _load_stored_hf_token())
        _validate_token(req.hf_token)
@@ -738,9 +755,8 @@ def setup_cookbook_routes() -> APIRouter:
        # Validate shell-bound inputs, matching the sibling list_gpus endpoint —
        # `host`/`ssh_port` are interpolated into an ssh command below, so an
        # unvalidated value (e.g. "x'; rm -rf ~ #") would be command injection.
-        host = _validate_remote_host(host)
-        if ssh_port is not None and ssh_port != "" and not _SSH_PORT_RE.fullmatch(ssh_port):
-            raise HTTPException(400, "Invalid ssh_port")
+        host = validate_remote_host(host)
+        ssh_port = validate_ssh_port(ssh_port)
        TMUX_LOG_DIR.mkdir(parents=True, exist_ok=True)

        model_dirs = []
@@ -889,11 +905,16 @@ def setup_cookbook_routes() -> APIRouter:
            # listening" check without requiring ss/netstat/nmap.
            ssh_base = ["ssh", "-o", "ConnectTimeout=4", "-o", "StrictHostKeyChecking=no"]
            if ssh_port and str(ssh_port) != "22":
-                if not _SSH_PORT_RE.match(str(ssh_port)):
+                try:
+                    ssh_port = validate_ssh_port(ssh_port)
+                except HTTPException:
                    return None
                ssh_base.extend(["-p", str(ssh_port)])
-            host_arg = remote
-            if not _REMOTE_HOST_RE.match(host_arg):
+            try:
+                host_arg = validate_remote_host(remote)
+            except HTTPException:
+                return None
+            if not host_arg:
                return None
            probe_ports = " ".join(str(start_port + i) for i in range(max_offset + 1))
            script = (
@@ -1196,8 +1217,8 @@ def setup_cookbook_routes() -> APIRouter:
        """
        require_admin(request)
        # Defence-in-depth: reject values that could break out of shell contexts.
-        _validate_remote_host(req.remote_host)
-        req.ssh_port = _validate_ssh_port(req.ssh_port)
+        validate_remote_host(req.remote_host)
+        req.ssh_port = validate_ssh_port(req.ssh_port)
        req.gpus = _validate_gpus(req.gpus)
        req.hf_token = req.hf_token or _load_stored_hf_token()
        _validate_token(req.hf_token)
@@ -1208,6 +1229,7 @@ def setup_cookbook_routes() -> APIRouter:
        # many downstream `"engine" in req.cmd` membership checks can't hit
        # `TypeError: argument of type 'NoneType'` (a 500 instead of a clean 400).
        req.cmd = _validate_serve_cmd(req.cmd) or ""
+        req.cmd = _normalize_llama_cpp_python_cache_types(req.cmd) or ""
        req.cmd = _venv_safe_local_pip_install_cmd(
            req.cmd,
            local=not bool(req.remote_host),
@@ -1637,12 +1659,11 @@ def setup_cookbook_routes() -> APIRouter:
    async def server_setup(request: Request, req: SetupRequest):
        """Install required dependencies on a remote server via SSH."""
        require_admin(request)
-        host = _validate_remote_host(req.host)
+        host = validate_remote_host(req.host)
        if not host:
            raise HTTPException(400, "host is required")
        port = req.ssh_port
-        if port is not None and port != "" and not re.fullmatch(r"\d{1,5}", port):
-            raise HTTPException(400, "Invalid ssh_port")
+        port = validate_ssh_port(port)
        pf = f"-p {port} " if port and port != "22" else ""

        # Detect platform: Windows first (echo %OS% → Windows_NT), then Termux, then Linux
@@ -1886,9 +1907,8 @@ def setup_cookbook_routes() -> APIRouter:
        `busy` is True when free_mb/total_mb < 0.5.
        """
        require_admin(request)
-        host = _validate_remote_host(host)
-        if ssh_port is not None and ssh_port != "" and not _SSH_PORT_RE.fullmatch(ssh_port):
-            raise HTTPException(400, "Invalid ssh_port")
+        host = validate_remote_host(host)
+        ssh_port = validate_ssh_port(ssh_port)
        gpu_query = "nvidia-smi --query-gpu=index,name,memory.free,memory.total,memory.used,utilization.gpu,uuid --format=csv,noheader,nounits"
        nvidia_error = None
        try:
@@ -2045,9 +2065,8 @@ def setup_cookbook_routes() -> APIRouter:
        sig = (req.signal or "TERM").upper()
        if sig not in ("TERM", "KILL", "INT"):
            raise HTTPException(400, "signal must be TERM, KILL, or INT")
-        host = _validate_remote_host(req.host)
-        if req.ssh_port and not _SSH_PORT_RE.fullmatch(req.ssh_port):
-            raise HTTPException(400, "Invalid ssh_port")
+        host = validate_remote_host(req.host)
+        req.ssh_port = validate_ssh_port(req.ssh_port)
        kill_cmd = f"kill -{sig} {req.pid}"
        try:
            if host:
@@ -2381,14 +2400,19 @@ def setup_cookbook_routes() -> APIRouter:
            host = (srv.get("host") or "").strip()
            if not host:
                continue  # local-only entry; the /proc scan handles it
-            if not _REMOTE_HOST_RE.match(host):
+            try:
+                host = validate_remote_host(host)
+            except HTTPException:
                continue
            sport = str(srv.get("port") or "").strip()
            ssh_base = ["ssh", "-o", "ConnectTimeout=4", "-o", "StrictHostKeyChecking=no"]
            if sport and sport != "22":
-                if not _SSH_PORT_RE.match(sport):
+                try:
+                    sport = validate_ssh_port(sport)
+                except HTTPException:
                    continue
-                ssh_base.extend(["-p", sport])
+                if sport != "22":
+                    ssh_base.extend(["-p", sport])

            try:
                ls = subprocess.run(
@@ -2802,30 +2826,20 @@ def setup_cookbook_routes() -> APIRouter:
    def _cookbook_tasks_status_sync():
        import subprocess

-        def _download_cache_complete(repo_id: str, remote_host: str = "", ssh_port: str = "") -> bool:
+        def _download_cache_complete(repo_id: str, remote_host: str = "", ssh_port: str = "", cache_root: str = "") -> bool:
            """Best-effort check for a completed HF cache entry.

            tmux output can stop at a stale progress line if the pane/session
            disappears before Cookbook captures the final DOWNLOAD_OK marker.
            In that case, trust the cache shape: a snapshot directory with files
            and no *.incomplete blobs means HuggingFace finished materializing the
-            model.
+            model. cache_root is the task's custom download dir — the runner
+            pointed HF_HOME there, so the cache lives under <cache_root>/hub,
+            not wherever this probe's environment says.
            """
            if not repo_id or "/" not in repo_id:
                return False
-            py = (
-                "import os,sys;"
-                "repo=sys.argv[1];"
-                "base=os.environ.get('HUGGINGFACE_HUB_CACHE') or os.path.join(os.environ.get('HF_HOME', os.path.expanduser('~/.cache/huggingface')), 'hub');"
-                "d=os.path.join(base,'models--'+repo.replace('/','--'));"
-                "snap=os.path.join(d,'snapshots');"
-                "ok=os.path.isdir(snap) and any(os.path.isdir(os.path.join(snap,x)) and os.listdir(os.path.join(snap,x)) for x in os.listdir(snap));"
-                "inc=False;"
-                "blobs=os.path.join(d,'blobs');"
-                "inc=os.path.isdir(blobs) and any(x.endswith('.incomplete') for x in os.listdir(blobs));"
-                "sys.exit(0 if ok and not inc else 1)"
-            )
-            cmd = ["python3", "-c", py, repo_id]
+            cmd = ["python3", "-c", HF_CACHE_COMPLETE_PROBE, repo_id, cache_root or ""]
            try:
                if remote_host:
                    ssh_base = ["ssh"]
@@ -2839,7 +2853,7 @@ def setup_cookbook_routes() -> APIRouter:
            except Exception:
                return False

-        def _download_cache_incomplete(repo_id: str, remote_host: str = "", ssh_port: str = "") -> bool:
+        def _download_cache_incomplete(repo_id: str, remote_host: str = "", ssh_port: str = "", cache_root: str = "") -> bool:
            """Best-effort check for resumable HF partial blobs.

            A lost SSH/tmux session can leave a real download still incomplete.
@@ -2848,16 +2862,7 @@ def setup_cookbook_routes() -> APIRouter:
            """
            if not repo_id or "/" not in repo_id:
                return False
-            py = (
-                "import os,sys;"
-                "repo=sys.argv[1];"
-                "base=os.environ.get('HUGGINGFACE_HUB_CACHE') or os.path.join(os.environ.get('HF_HOME', os.path.expanduser('~/.cache/huggingface')), 'hub');"
-                "d=os.path.join(base,'models--'+repo.replace('/','--'));"
-                "blobs=os.path.join(d,'blobs');"
-                "inc=os.path.isdir(blobs) and any(x.endswith('.incomplete') for x in os.listdir(blobs));"
-                "sys.exit(0 if inc else 1)"
-            )
-            cmd = ["python3", "-c", py, repo_id]
+            cmd = ["python3", "-c", HF_CACHE_INCOMPLETE_PROBE, repo_id, cache_root or ""]
            try:
                if remote_host:
                    ssh_base = ["ssh"]
@@ -2929,12 +2934,18 @@ def setup_cookbook_routes() -> APIRouter:
            if not _SESSION_ID_RE.match(session_id):
                logger.warning(f"Skipping task with unsafe session_id: {session_id!r}")
                continue
-            if remote and not _REMOTE_HOST_RE.match(remote):
-                logger.warning(f"Skipping task with unsafe remoteHost: {remote!r}")
-                continue
-            if _tport and not _SSH_PORT_RE.match(str(_tport)):
-                logger.warning(f"Skipping task with unsafe sshPort: {_tport!r}")
-                continue
+            if remote:
+                try:
+                    remote = validate_remote_host(remote)
+                except HTTPException:
+                    logger.warning(f"Skipping task with unsafe remoteHost: {remote!r}")
+                    continue
+            if _tport:
+                try:
+                    _tport = validate_ssh_port(str(_tport))
+                except HTTPException:
+                    logger.warning(f"Skipping task with unsafe sshPort: {_tport!r}")
+                    continue
            if task_platform == "windows" and remote:
                # Windows: check PID file + Get-Process, read log tail
                sd = "$env:TEMP\\odysseus-sessions"
@@ -3047,6 +3058,7 @@ def setup_cookbook_routes() -> APIRouter:
            # snapshot to classify (DOWNLOAD_OK / exit marker) — evaluate it even
            # when the PID is gone instead of blindly reporting "stopped".
            download_zero_files = False
+            exit_code = None
            status = "unknown"
            download_has_ok = task_type == "download" and "DOWNLOAD_OK" in full_snapshot
            download_has_failed = task_type == "download" and "DOWNLOAD_FAILED" in full_snapshot
@@ -3055,7 +3067,7 @@ def setup_cookbook_routes() -> APIRouter:
                and (
                    ".incomplete" in full_snapshot
                    or bool(re.search(r'model-\d+-of-\d+\.[A-Za-z0-9_.-]+:\s+(?:[0-9]|[1-8][0-9])%', full_snapshot))
-                    or _download_cache_incomplete(_payload.get("repo_id") or model, remote, str(_tport or ""))
+                    or _download_cache_incomplete(_payload.get("repo_id") or model, remote, str(_tport or ""), _payload.get("local_dir") or "")
                )
            )
            if is_alive or (local_win_task and full_snapshot):
@@ -3096,11 +3108,19 @@ def setup_cookbook_routes() -> APIRouter:
                else:
                    status = "running"
            else:
-                # Session is dead — check if it completed or crashed
-                if (
+                # Session is dead — check if it completed or crashed. The
+                # runner markers in the retained output are conclusive
+                # (DOWNLOAD_OK only prints after exit 0), so check them before
+                # the cache probe, which can't see ollama pulls at all.
+                marker = classify_dead_download(full_snapshot) if task_type == "download" else None
+                if marker is not None:
+                    status, download_zero_files = marker
+                    if status == "completed" and not progress_text:
+                        progress_text = "Download complete"
+                elif (
                    task_type == "download"
                    and not download_has_incomplete_evidence
-                    and _download_cache_complete(_payload.get("repo_id") or model, remote, str(_tport or ""))
+                    and _download_cache_complete(_payload.get("repo_id") or model, remote, str(_tport or ""), _payload.get("local_dir") or "")
                ):
                    status = "completed"
                    if not progress_text:
@@ -3120,7 +3140,7 @@ def setup_cookbook_routes() -> APIRouter:
                status = "error"
            if download_zero_files:
                diagnosis = {"message": "No matching files were downloaded. The model repo or filename/quant pattern may be wrong (for example a ':Q4_K_M' tag that does not exist in the repo). Check the repo and the include/quant pattern."}
-            output_tail = "\n".join(full_snapshot.splitlines()[-12:]) if full_snapshot else ""
+            output_tail = error_aware_output_tail(full_snapshot, status)

            results.append({
                "session_id": session_id,
@@ -3131,6 +3151,7 @@ def setup_cookbook_routes() -> APIRouter:
                "phase": serve_phase,
                "diagnosis": diagnosis,
                "output_tail": output_tail,
+                "exit_code": exit_code,
                "cmd": _payload.get("_cmd") or "",
                "tps": phase_info.get("tps"),
                "reqs": phase_info.get("reqs"),
@@ -1,12 +1,13 @@
 """Diagnostics routes — /api/db/stats, /api/rag/stats, /api/test/youtube, /api/test-research."""

 import logging
+import os
 from typing import Dict, Any

 from fastapi import APIRouter, HTTPException, Form, Request

 from services.youtube.youtube_handler import extract_youtube_id, extract_transcript_async
-from core.constants import DEFAULT_HOST
+from core.constants import DEFAULT_HOST, DATA_DIR
 from core.middleware import require_admin

 logger = logging.getLogger(__name__)
@@ -16,9 +17,42 @@ def setup_diagnostics_routes(
    rag_manager,
    rag_available: bool,
    research_handler,
+    memory_vector=None,
 ) -> APIRouter:
    router = APIRouter(tags=["diagnostics"])

+    @router.get("/api/diagnostics/services")
+    async def get_service_health(request: Request) -> Dict[str, Any]:
+        """Consolidated degraded-state report for ChromaDB, SearXNG, email,
+        ntfy, and provider endpoints. Non-intrusive probes — safe to poll."""
+        require_admin(request)
+        from src.service_health import collect_service_health
+        return await collect_service_health(rag_manager, memory_vector)
+
+    @router.get("/api/diagnostics/logs")
+    async def get_diagnostics_logs(request: Request, limit: int = 200) -> Dict[str, Any]:
+        require_admin(request)
+        limit = max(1, min(limit, 1000))
+        try:
+            log_file = os.path.join(DATA_DIR, "logs", "app.log")
+            if not os.path.exists(log_file):
+                return {"status": "success", "logs": []}
+
+            # Safe tail read of the log file (max 5MB via rotation)
+            with open(log_file, "r", encoding="utf-8", errors="ignore") as f:
+                lines = f.readlines()
+
+            tail_lines = lines[-limit:] if len(lines) > limit else lines
+            tail_lines = [line.rstrip('\r\n') for line in tail_lines]
+
+            return {
+                "status": "success",
+                "logs": tail_lines
+            }
+        except Exception as e:
+            logger.error(f"Diagnostics logs retrieval error: {e}")
+            raise HTTPException(500, f"Failed to retrieve logs: {str(e)}")
+
    @router.get("/api/db/stats")
    async def get_database_stats(request: Request) -> Dict[str, Any]:
        require_admin(request)
@@ -108,10 +108,10 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
            # to markdown for prose.
            language = req.language
            if not language:
-                from src.tool_implementations import _looks_like_email_document, _sniff_doc_language
+                from src.agent_tools.document_tools import _looks_like_email_document, _sniff_doc_language
                language = _sniff_doc_language(req.content)
            else:
-                from src.tool_implementations import _looks_like_email_document
+                from src.agent_tools.document_tools import _looks_like_email_document
            if _looks_like_email_document(req.content, req.title):
                language = "email"

@@ -643,7 +643,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
                    # in-memory active-doc pointer so the last-resort injection
                    # path doesn't re-surface this doc in a later chat (#1160).
                    try:
-                        from src.tool_implementations import clear_active_document
+                        from src.agent_tools.document_tools import clear_active_document
                        clear_active_document(doc_id)
                    except Exception:
                        pass
@@ -672,7 +672,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
            # Closed/deleted — drop the in-memory active-doc pointer so it isn't
            # re-injected into a later, unrelated chat (#1160).
            try:
-                from src.tool_implementations import clear_active_document
+                from src.agent_tools.document_tools import clear_active_document
                clear_active_document(doc_id)
            except Exception:
                pass
@@ -304,6 +304,7 @@ OWNER_SCOPED_EMAIL_CACHE_TABLES = {
    "email_ai_replies",
    "email_calendar_extractions",
    "email_urgency_alerts",
+    "sender_signatures",
 }


@@ -341,6 +342,55 @@ def _ensure_owner_scoped_email_cache_table(conn, table: str, create_sql: str, co
        _lg.getLogger(__name__).warning(f"{table} owner-migration skipped: {_mig_e}")


+def _ensure_sender_signatures_table(conn):
+    """Create/migrate learned sender signatures to an owner-scoped cache."""
+    create_sql = """
+        CREATE TABLE IF NOT EXISTS sender_signatures (
+            from_address TEXT,
+            owner TEXT DEFAULT '',
+            signature_text TEXT,
+            sample_count INTEGER,
+            last_built_at TEXT NOT NULL,
+            model_used TEXT,
+            source TEXT,
+            PRIMARY KEY (from_address, owner)
+        )
+    """
+    conn.execute(create_sql)
+    try:
+        info = conn.execute("PRAGMA table_info(sender_signatures)").fetchall()
+        cols = [r[1] for r in info]
+        pk_cols = [r[1] for r in sorted((r for r in info if r[5]), key=lambda r: r[5])]
+        if "owner" in cols and pk_cols == ["from_address", "owner"]:
+            return
+
+        conn.execute("ALTER TABLE sender_signatures RENAME TO sender_signatures__old")
+        conn.execute(create_sql)
+        old_cols = [r[1] for r in conn.execute("PRAGMA table_info(sender_signatures__old)").fetchall()]
+        copy_cols = [
+            c for c in (
+                "from_address",
+                "signature_text",
+                "sample_count",
+                "last_built_at",
+                "model_used",
+                "source",
+            )
+            if c in old_cols
+        ]
+        source_owner = "COALESCE(owner, '')" if "owner" in old_cols else "''"
+        conn.execute(
+            f"INSERT OR IGNORE INTO sender_signatures "
+            f"({', '.join([*copy_cols, 'owner'])}) "
+            f"SELECT {', '.join([*copy_cols, source_owner])} "
+            f"FROM sender_signatures__old"
+        )
+        conn.execute("DROP TABLE sender_signatures__old")
+    except Exception as _mig_e:
+        import logging as _lg
+        _lg.getLogger(__name__).warning(f"sender_signatures owner-migration skipped: {_mig_e}")
+
+
 def attachment_extract_dir(folder: str, uid: str) -> Path:
    """Containment-safe extraction directory for an attachment.

@@ -559,20 +609,10 @@ def _init_scheduled_db():
            conn.execute("ALTER TABLE email_boundaries ADD COLUMN turns_json TEXT")
    except Exception:
        pass
-    # Per-sender signature cache. Populated by `learn_sender_signatures`
-    # action: the LLM extracts the common trailing block across N emails
-    # from each sender; the renderer folds it consistently for every
-    # future email from that address.
-    conn.execute("""
-        CREATE TABLE IF NOT EXISTS sender_signatures (
-            from_address TEXT PRIMARY KEY,
-            signature_text TEXT,
-            sample_count INTEGER,
-            last_built_at TEXT NOT NULL,
-            model_used TEXT,
-            source TEXT
-        )
-    """)
+    # Per-sender signature cache. Populated by `learn_sender_signatures`.
+    # Message sender addresses are global, so signatures must be scoped to the
+    # mailbox owner before `/read` returns them to the renderer.
+    _ensure_sender_signatures_table(conn)
    conn.commit()
    conn.close()

@@ -762,10 +802,14 @@ def _open_imap_connection(host: str, port: int, *, starttls: bool, timeout: int
    imaplib._MAXLINE = 50_000_000
    return conn

-def _imap_connect(account_id: str | None = None, owner: str = ""):
+def _imap_connect(account_id: str | None = None, owner: str = "",
+                  timeout: int = _IMAP_TIMEOUT_SECONDS):
    # SECURITY: passing `owner` scopes the fallback config lookup so a brand
    # new user doesn't get connected against another user's default mailbox
    # when they have no account configured.
+    #
+    # `timeout` is overridable so short-lived callers (e.g. the service-health
+    # probe) can impose a tighter budget than the default IMAP timeout.
    cfg = _get_email_config(account_id, owner=owner)
    # Connection mode:
    #   STARTTLS on → plain + upgrade
@@ -778,7 +822,7 @@ def _imap_connect(account_id: str | None = None, owner: str = ""):
        cfg["imap_host"],
        cfg["imap_port"],
        starttls=bool(cfg.get("imap_starttls")),
-        timeout=_IMAP_TIMEOUT_SECONDS,
+        timeout=timeout,
    )
    try:
        conn.login(cfg["imap_user"], cfg["imap_password"])
@@ -249,6 +249,41 @@ def _uid_from_fetch_meta(meta_b: bytes) -> str:
    return m.group(1).decode() if m else ""


+_FETCH_SEQ_RE = re.compile(rb"^(\d+)\s+\(")
+
+
+def _group_uid_fetch_records(msg_data) -> list:
+    """Group an imaplib UID FETCH response into per-message (meta, payload).
+
+    imaplib yields an interleaved list: ``(meta, literal)`` tuples for
+    attributes that carry a literal (``RFC822.HEADER {n}`` etc.) plus bare
+    ``bytes`` elements for everything the server sends outside a literal.
+    Where each attribute lands is server-specific: Dovecot sends FLAGS
+    *before* the header literal (so it ends up inside the tuple meta), while
+    Gmail sends FLAGS *after* it, arriving as a bare ``b' FLAGS (\\Seen))'``
+    element. Dropping bare elements therefore silently loses FLAGS on Gmail
+    and every message renders as unread/unflagged.
+
+    A tuple whose meta starts with a sequence number opens a new record;
+    every other part — continuation tuple or bare bytes — is folded into the
+    current record's meta so attribute regexes see the full meta text.
+    Plain ``b')'`` terminators get folded in too, which is harmless.
+    """
+    grouped: list = []  # list of (meta_bytes, payload_bytes_or_None)
+    for part in (msg_data or []):
+        if isinstance(part, tuple):
+            meta_b = part[0] if isinstance(part[0], (bytes, bytearray)) else str(part[0]).encode()
+            if _FETCH_SEQ_RE.match(meta_b):
+                grouped.append((meta_b, part[1]))
+            elif grouped:
+                cur_meta, cur_payload = grouped[-1]
+                grouped[-1] = (cur_meta + b" " + meta_b, cur_payload or part[1])
+        elif isinstance(part, (bytes, bytearray)) and grouped:
+            cur_meta, cur_payload = grouped[-1]
+            grouped[-1] = (cur_meta + b" " + bytes(part), cur_payload)
+    return grouped
+
+
 def _smtp_ready(cfg: dict) -> bool:
    return bool(cfg.get("smtp_host") and cfg.get("smtp_user") and cfg.get("smtp_password"))

@@ -799,20 +834,11 @@ def setup_email_routes():
                except Exception as e:
                    logger.warning(f"Batch fetch failed, falling back to per-UID: {e}")
                    status, msg_data = "NO", []
-                # imaplib batch responses interleave (meta, payload) tuples and
-                # `b')'` terminators. Group by message: each tuple where the
-                # meta begins with a seq number starts a new message record.
-                seq_re = re.compile(rb'^(\d+)\s+\(')
-                grouped = []  # list of (meta_str, payload_bytes)
-                for part in (msg_data or []):
-                    if isinstance(part, tuple):
-                        meta_b = part[0] if isinstance(part[0], (bytes, bytearray)) else str(part[0]).encode()
-                        if seq_re.match(meta_b):
-                            grouped.append((meta_b, part[1]))
-                        elif grouped:
-                            # continuation of previous message — concatenate meta info if any
-                            cur_meta, cur_payload = grouped[-1]
-                            grouped[-1] = (cur_meta + b" " + meta_b, cur_payload or part[1])
+                # Group the batched response into per-message (meta, payload)
+                # records. Bare bytes parts must be kept: Gmail returns FLAGS
+                # after the header literal as a bare element, and dropping it
+                # rendered every Gmail message as unread/unflagged.
+                grouped = _group_uid_fetch_records(msg_data)

                if status != "OK" and not grouped:
                    conn.logout()
@@ -1061,7 +1087,10 @@ def setup_email_routes():
            return {"contacts": [], "error": "Mail operation failed"}

    @router.get("/search")
-    async def search_emails(
+    # Sync def: the body is blocking IMAP I/O with no awaits. As `async def` it ran
+    # directly on the event loop and stalled the whole app during a search; as a sync
+    # def FastAPI runs it in a threadpool, keeping the loop responsive.
+    def search_emails(
        q: str = Query(""),
        folder: str = Query("INBOX"),
        limit: int = Query(50),
@@ -1123,14 +1152,15 @@ def setup_email_routes():
                            continue
                        raw_header = None
                        flags = ""
-                        for part in msg_data:
-                            if isinstance(part, tuple):
-                                meta = part[0].decode() if isinstance(part[0], bytes) else str(part[0])
-                                if b"RFC822.HEADER" in part[0] if isinstance(part[0], bytes) else "RFC822.HEADER" in meta:
-                                    raw_header = part[1]
-                                flag_match = re.search(r'FLAGS \(([^)]*)\)', meta)
-                                if flag_match:
-                                    flags = flag_match.group(1)
+                        # Same Gmail caveat as the list route: FLAGS may
+                        # arrive after the header literal, so group bare
+                        # parts back into the message meta before scanning.
+                        for meta_b, payload in _group_uid_fetch_records(msg_data):
+                            if payload and b"RFC822.HEADER" in meta_b:
+                                raw_header = payload
+                            flag_match = re.search(rb'FLAGS \(([^)]*)\)', meta_b)
+                            if flag_match:
+                                flags = flag_match.group(1).decode(errors="replace")
                        if not raw_header:
                            continue
                        msg = email_mod.message_from_bytes(raw_header)
@@ -1279,8 +1309,9 @@ def setup_email_routes():
                try:
                    if sender_addr:
                        _rs = _c.execute(
-                            "SELECT signature_text FROM sender_signatures WHERE from_address = ?",
-                            (sender_addr.lower().strip(),),
+                            f"SELECT signature_text FROM sender_signatures "
+                            f"WHERE from_address = ? AND {owner_clause}",
+                            (sender_addr.lower().strip(), *owner_params),
                        ).fetchone()
                        if _rs and _rs[0]:
                            cached_sender_sig = _rs[0]
@@ -1756,7 +1787,9 @@ def setup_email_routes():
            return {"success": False, "error": "Mail operation failed"}

    @router.post("/archive/{uid}")
-    async def archive_email(uid: str, folder: str = Query("INBOX"), account_id: str | None = Query(None), owner: str = Depends(require_owner)):
+    # Sync def: blocking IMAP I/O with no awaits — see search_emails above. Runs in a
+    # threadpool instead of blocking the event loop.
+    def archive_email(uid: str, folder: str = Query("INBOX"), account_id: str | None = Query(None), owner: str = Depends(require_owner)):
        """Move email to Archive folder."""
        try:
            with _imap(account_id, owner=owner) as conn:
@@ -11,6 +11,7 @@ from typing import Dict, Any, Optional
 from pydantic import BaseModel

 from core.database import GalleryImage
+from src.auth_helpers import _auth_disabled

 logger = logging.getLogger(__name__)

@@ -120,19 +121,18 @@ def _image_to_dict(img: GalleryImage, session_name: str = None) -> Dict[str, Any
    }


-def _owner_filter(q, user):
+def _owner_filter(q, user, model_cls=GalleryImage):
    """Apply owner filtering to a gallery query.

-    When auth is disabled (single-user mode) get_current_user returns None
-    and there is no per-user scoping. The main library list and stats already
-    treat None as "show everything" (`if user is not None`), so this helper
-    must too — otherwise the tag/model filter sidebars come back empty and the
-    tag-cleanup endpoints (clear-user-tags, clear-ai-tags, dedupe-tags)
-    silently affect zero rows in the most common self-hosted deployment.
+    ``get_current_user`` returns None both in auth-disabled single-user mode
+    and when auth is enabled but no current user was resolved. Preserve the
+    single-user behavior, but fail closed for auth-enabled null-user states.
    """
-    if user is None:
+    if user is not None:
+        return q.filter(model_cls.owner == user)
+    if _auth_disabled():
        return q
-    return q.filter(GalleryImage.owner == user)
+    return q.filter(False)



@@ -19,6 +19,7 @@ from src.upload_limits import (
    GALLERY_TRANSFORM_UPLOAD_MAX_BYTES,
 )
 from src.constants import GENERATED_IMAGES_DIR
+from src.optional_deps import patch_realesrgan_torchvision_compat

 from routes.gallery_helpers import (
    GalleryPatch, _extract_exif, _image_to_dict, _owner_filter, _human_size,
@@ -108,6 +109,32 @@ def _visible_image_endpoint_for_base(db, base: str, owner: str | None):
    return fallback


+async def _fetch_result_image_b64(url: str) -> Optional[str]:
+    """Fetch an image URL returned in an upstream response body, base64-encoded
+    (or None on a non-200).
+
+    The URL comes from the diffusion/OpenAI server's response, not from our own
+    config, so a malicious or compromised endpoint could otherwise steer this
+    fetch at an internal or cloud-metadata address. Validate it the same way the
+    client-supplied endpoint is validated before the first request.
+    """
+    import base64
+    import httpx
+    from src.url_safety import check_outbound_url
+
+    ok, reason = check_outbound_url(
+        url,
+        block_private=os.getenv("IMAGE_BLOCK_PRIVATE_IPS", "false").lower() == "true",
+    )
+    if not ok:
+        raise HTTPException(502, f"Upstream returned an unsafe image URL: {reason}")
+    async with httpx.AsyncClient(timeout=60) as c2:
+        ir = await c2.get(url)
+        if ir.status_code == 200:
+            return base64.b64encode(ir.content).decode()
+    return None
+
+
 def setup_gallery_routes() -> APIRouter:
    router = APIRouter(tags=["gallery"])

@@ -476,8 +503,7 @@ def setup_gallery_routes() -> APIRouter:
                .outerjoin(DbSession, GalleryImage.session_id == DbSession.id)
                .filter(GalleryImage.is_active == True)
            )
-            if user is not None:
-                q = q.filter(GalleryImage.owner == user)
+            q = _owner_filter(q, user)

            # Search filter (prompt + tags + ai_tags)
            if search:
@@ -579,28 +605,26 @@ def setup_gallery_routes() -> APIRouter:
        db = SessionLocal()
        try:
            q = db.query(GalleryAlbum)
-            if user:
-                q = q.filter(GalleryAlbum.owner == user)
+            q = _owner_filter(q, user, GalleryAlbum)
            albums = q.order_by(GalleryAlbum.created_at.desc()).all()
            result = []
            for a in albums:
                _count_q = db.query(GalleryImage).filter(
                    GalleryImage.album_id == a.id, GalleryImage.is_active == True
                )
-                if user:
-                    _count_q = _count_q.filter(GalleryImage.owner == user)
+                _count_q = _owner_filter(_count_q, user)
                count = _count_q.count()
                cover_url = None
                if a.cover_id:
-                    cover = db.query(GalleryImage).filter(GalleryImage.id == a.cover_id).first()
+                    cover_q = db.query(GalleryImage).filter(GalleryImage.id == a.cover_id)
+                    cover = _owner_filter(cover_q, user).first()
                    if cover:
                        cover_url = f"/api/generated-image/{cover.filename}"
                elif count > 0:
                    _cover_q = db.query(GalleryImage).filter(
                        GalleryImage.album_id == a.id, GalleryImage.is_active == True
                    )
-                    if user:
-                        _cover_q = _cover_q.filter(GalleryImage.owner == user)
+                    _cover_q = _owner_filter(_cover_q, user)
                    first = _cover_q.order_by(GalleryImage.created_at.desc()).first()
                    if first:
                        cover_url = f"/api/generated-image/{first.filename}"
@@ -643,10 +667,9 @@ def setup_gallery_routes() -> APIRouter:
            base = db.query(GalleryImage).filter(GalleryImage.is_active == True)
            size_q = db.query(func.sum(GalleryImage.file_size)).filter(GalleryImage.is_active == True)
            album_q = db.query(GalleryAlbum)
-            if user:
-                base = base.filter(GalleryImage.owner == user)
-                size_q = size_q.filter(GalleryImage.owner == user)
-                album_q = album_q.filter(GalleryAlbum.owner == user)
+            base = _owner_filter(base, user)
+            size_q = _owner_filter(size_q, user)
+            album_q = _owner_filter(album_q, user, GalleryAlbum)
            total = base.count()
            total_size = size_q.scalar() or 0
            fav_count = base.filter(GalleryImage.favorite == True).count()
@@ -674,8 +697,7 @@ def setup_gallery_routes() -> APIRouter:
                GalleryImage.is_active == True,
                (GalleryImage.ai_tags == None) | (GalleryImage.ai_tags == ""),
            )
-            if user:
-                q = q.filter(GalleryImage.owner == user)
+            q = _owner_filter(q, user)
            if album_id:
                q = q.filter(GalleryImage.album_id == album_id)
            untagged = q.count()
@@ -909,15 +931,23 @@ def setup_gallery_routes() -> APIRouter:
                raise HTTPException(404, "Image not found")

            img_filename = img.filename
-            # Remove the file from disk
-            img_path = _gallery_image_path(img_filename)
-            if img_path.exists():
-                img_path.unlink()
-
-            # Soft-delete the record
+            # Soft-delete the record first; the DB is the source of truth.
            img.is_active = False
            db.commit()

+            # Only after the soft-delete commit succeeds do we remove the file.
+            # If the file were deleted first and the commit then failed/rolled
+            # back, the still-active record would point at a missing file.
+            # Best-effort so a missing or locked file can't 500 a delete that
+            # already succeeded logically. Uses the path-confined resolver so a
+            # malformed stored filename can't escape generated_images.
+            try:
+                img_path = _gallery_image_path(img_filename)
+                if img_path.exists():
+                    img_path.unlink()
+            except Exception as e:
+                logger.warning(f"Could not remove gallery image file for {img_filename}: {e}")
+
            # Strip stale chat-history references so the image bubble
            # (and its prompt caption) doesn't come back after a server
            # reboot replays the session. We remove the matching tool
@@ -1147,10 +1177,7 @@ def setup_gallery_routes() -> APIRouter:
                        if item.get("b64_json"):
                            raw_b64 = item["b64_json"]
                        elif item.get("url"):
-                            async with httpx.AsyncClient(timeout=60) as c2:
-                                img_r = await c2.get(item["url"])
-                                if img_r.status_code == 200:
-                                    raw_b64 = base64.b64encode(img_r.content).decode()
+                            raw_b64 = await _fetch_result_image_b64(item["url"])
                    if not raw_b64:
                        raise HTTPException(502, "OpenAI returned no image")

@@ -1211,7 +1238,7 @@ def setup_gallery_routes() -> APIRouter:
        original and regenerates `strength` fraction. With strength ~0.4
        you get edge blending + lighting unification while keeping the
        composition recognisable."""
-        import httpx, base64 as _b64
+        import httpx
        user = require_privilege(request, "can_generate_images")
        body = await request.json()

@@ -1387,10 +1414,9 @@ def setup_gallery_routes() -> APIRouter:
                            if item.get("b64_json"):
                                return {"image": item["b64_json"]}
                            if item.get("url"):
-                                async with httpx.AsyncClient(timeout=60) as c2:
-                                    ir = await c2.get(item["url"])
-                                    if ir.status_code == 200:
-                                        return {"image": _b64.b64encode(ir.content).decode()}
+                                img_b64 = await _fetch_result_image_b64(item["url"])
+                                if img_b64:
+                                    return {"image": img_b64}
                    last_err = f"{path}: server returned no image"
                except httpx.ConnectError as e:
                    raise HTTPException(502, f"Can't reach diffusion server at {base}: {e}")
@@ -1450,6 +1476,7 @@ def setup_gallery_routes() -> APIRouter:
        img_bytes = base64.b64decode(image_b64)
        src = Image.open(io.BytesIO(img_bytes)).convert("RGB")
        try:
+            patch_realesrgan_torchvision_compat()
            from realesrgan import RealESRGANer
        except ImportError:
            return {"error": "realesrgan not installed. Install it from Cookbook → Dependencies (search 'realesrgan')."}
@@ -1499,6 +1526,7 @@ def setup_gallery_routes() -> APIRouter:
        img_bytes = base64.b64decode(image_b64)
        src = Image.open(io.BytesIO(img_bytes)).convert("RGB")
        try:
+            patch_realesrgan_torchvision_compat()
            from basicsr.archs.rrdbnet_arch import RRDBNet
            from realesrgan import RealESRGANer
        except ImportError:
@@ -1,7 +1,9 @@
 import re
 from copy import deepcopy

-from fastapi import APIRouter
+from fastapi import APIRouter, HTTPException
+
+from routes._validators import validate_remote_host, validate_ssh_port


 # Backends the manual hardware simulator accepts. Must stay a subset of what
@@ -11,6 +13,14 @@ from fastapi import APIRouter
 _MANUAL_BACKENDS = {"cuda", "rocm", "metal", "cpu_x86", "cpu_arm"}


+def _validate_detection_target(host: str = "", ssh_port: str = "") -> tuple[str, str]:
+    host_value = validate_remote_host(host) or ""
+    port_value = validate_ssh_port(ssh_port) or ""
+    if port_value and not host_value:
+        raise HTTPException(400, "ssh_port requires host")
+    return host_value, port_value
+
+
 def _apply_manual_hardware(system, manual_mode="", manual_gpu_count="", manual_vram_gb="", manual_ram_gb="", manual_backend=""):
    """Manual hardware is a "what if I had this setup" simulator —
    REPLACES the detected hardware entirely instead of adding to it.
@@ -105,6 +115,7 @@ def setup_hwfit_routes():
        """Detect and return current system hardware info. Pass host=user@server for remote.
        fresh=true bypasses the per-host cache (the Rescan button)."""
        from services.hwfit.hardware import detect_system
+        host, ssh_port = _validate_detection_target(host, ssh_port)
        return detect_system(host=host, ssh_port=ssh_port, platform=platform, fresh=fresh)

    @router.get("/models")
@@ -118,6 +129,7 @@ def setup_hwfit_routes():
        from services.hwfit.hardware import detect_system
        from services.hwfit.fit import rank_models
        from services.hwfit.models import get_models, model_catalog_path
+        host, ssh_port = _validate_detection_target(host, ssh_port)
        system = deepcopy(detect_system(host=host, ssh_port=ssh_port, platform=platform, fresh=fresh))
        if system.get("error"):
            return {"system": system, "models": [], "error": system["error"]}
@@ -165,8 +177,14 @@ def setup_hwfit_routes():
            system["gpu_name"] = g["name"]
            system["active_group"] = {**g, "use_count": n}

-        if gpu_count != "":
-            n = int(gpu_count)
+        # Parse the optional count defensively (matches the gpu_group guard
+        # above): a non-numeric query param previously raised ValueError ->
+        # HTTP 500. A malformed value is ignored, same as omitting it.
+        try:
+            n = int(gpu_count) if gpu_count != "" else None
+        except ValueError:
+            n = None
+        if n is not None:
            if n == 0:
                # RAM-only mode: rank against system memory, offload allowed.
                system["has_gpu"] = False
@@ -229,6 +247,7 @@ def setup_hwfit_routes():
        from services.hwfit.hardware import detect_system
        from services.hwfit.models import get_models
        from services.hwfit.profiles import compute_serve_profiles
+        host, ssh_port = _validate_detection_target(host, ssh_port)
        system = detect_system(host=host, ssh_port=ssh_port, platform=platform, fresh=fresh)
        if system.get("error"):
            return {"system": system, "profiles": [], "error": system["error"]}
@@ -279,6 +298,7 @@ def setup_hwfit_routes():
        """Rank image generation models against detected hardware."""
        from services.hwfit.hardware import detect_system
        from services.hwfit.image_models import rank_image_models
+        host, ssh_port = _validate_detection_target(host, ssh_port)
        system = deepcopy(detect_system(host=host, ssh_port=ssh_port, platform=platform, fresh=fresh))
        if system.get("error"):
            return {"system": system, "models": [], "error": system["error"]}
@@ -108,6 +108,12 @@ def _load_disabled_map():
        db.close()


+def _mcp_oauth_redirect_uri() -> str:
+    """Shared callback URL for legacy Google and generic MCP OAuth flows."""
+    from src.mcp_oauth import REDIRECT_URI
+    return REDIRECT_URI
+
+
 def setup_mcp_routes(mcp_manager: McpManager):
    """Setup MCP routes with the provided manager."""

@@ -445,9 +451,9 @@ def setup_mcp_routes(mcp_manager: McpManager):
            client_id = keys["client_id"]
            scopes = oauth_cfg.get("scopes", [])

-            # For Desktop App creds, redirect to localhost — the user will
+            # For Desktop App creds, default to localhost — the user will
            # paste the resulting URL back if they're on a different device.
-            redirect_uri = "http://localhost:7000/api/mcp/oauth/callback"
+            redirect_uri = _mcp_oauth_redirect_uri()

            params = {
                "client_id": client_id,
@@ -469,7 +475,7 @@ def setup_mcp_routes(mcp_manager: McpManager):
                return RedirectResponse(auth_url)
            else:
                # Remote device — show paste-back page
-                return HTMLResponse(_oauth_authorize_page(auth_url, server_id, host))
+                return HTMLResponse(_oauth_authorize_page(auth_url, server_id, host, redirect_uri))
        finally:
            db.close()

@@ -536,7 +542,7 @@ def setup_mcp_routes(mcp_manager: McpManager):
            client_id = keys["client_id"]
            client_secret = keys["client_secret"]

-            redirect_uri = "http://localhost:7000/api/mcp/oauth/callback"
+            redirect_uri = _mcp_oauth_redirect_uri()

            async with httpx.AsyncClient() as client:
                resp = await client.post(
@@ -603,13 +609,19 @@ def setup_mcp_routes(mcp_manager: McpManager):
    return router


-def _oauth_authorize_page(auth_url: str, server_id: str, host: str) -> str:
+def _oauth_authorize_page(
+    auth_url: str,
+    server_id: str,
+    host: str,
+    redirect_uri: str = "http://localhost:7000/api/mcp/oauth/callback",
+) -> str:
    """Page with Google sign-in link and URL paste-back form for remote access."""
    # Escape values interpolated into the page: `host` comes from the request
    # Host header and `server_id` from the OAuth state — neither is trusted.
    auth_url = html.escape(auth_url, quote=True)
    server_id = html.escape(server_id, quote=True)
    host = html.escape(host, quote=True)
+    redirect_uri = html.escape(redirect_uri, quote=True)
    return f"""<!DOCTYPE html>
 <html><head>
 <meta charset="UTF-8"><title>Authorize — Odysseus</title>
@@ -654,7 +666,7 @@ def _oauth_authorize_page(auth_url: str, server_id: str, host: str) -> str:
  <div class="divider"></div>
  <form method="POST" action="http://{host}/api/mcp/oauth/exchange/{server_id}">
    <p>Paste the URL from your browser after signing in:</p>
-    <input type="text" name="callback_url" placeholder="http://localhost:7000/api/mcp/oauth/callback?code=..." required>
+    <input type="text" name="callback_url" placeholder="{redirect_uri}?code=..." required>
    <br><button type="submit">Connect</button>
  </form>
 </div></body></html>"""
@@ -29,6 +29,7 @@ from src.llm_core import llm_call_async
 from services.memory.memory_extractor import audit_memories
 from src.auth_helpers import get_current_user, require_user
 from src.endpoint_resolver import resolve_endpoint
+from src.task_endpoint import resolve_task_endpoint
 from src.upload_limits import read_upload_limited, MEMORY_IMPORT_MAX_BYTES

 logger = logging.getLogger(__name__)
@@ -105,6 +106,13 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
        if memory_manager.find_duplicates(text, user_mem):
            return {"ok": True, "count": len(user_mem), "message": "Memory already exists"}

+        if memory_data.session_id:
+            try:
+                session_obj = session_manager.get_session(memory_data.session_id)
+            except KeyError:
+                raise HTTPException(404, "Session not found")
+            _assert_session_owner(session_obj, user)
+
        new_entry = memory_manager.add_entry(text, memory_data.source, memory_data.category, owner=user)
        if memory_data.session_id:
            new_entry["session_id"] = memory_data.session_id
@@ -163,8 +171,17 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM

            session_id = memory.get("session_id")
            if session_id and session_id in session_manager.sessions:
-                session = session_manager.get_session(session_id)
-                memory["session_name"] = session.name if session else f"Session {session_id[:6]}"
+                try:
+                    session = session_manager.get_session(session_id)
+                    if session:
+                        _assert_session_owner(session, user)
+                    memory["session_name"] = session.name if session else f"Session {session_id[:6]}"
+                except KeyError:
+                    memory["session_name"] = "Unknown"
+                except HTTPException as exc:
+                    if exc.status_code != 404:
+                        raise
+                    memory["session_name"] = "Unknown"
            else:
                memory["session_name"] = "Unknown"

@@ -224,14 +241,18 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
        }
        messages = [system_msg] + sess.get_context_messages()

+        t_url, t_model, t_headers = resolve_task_endpoint(
+            sess.endpoint_url, sess.model, sess.headers, owner=_owner(request)
+        )
+
        try:
            suggestion_text = await llm_call_async(
-                sess.endpoint_url,
-                sess.model,
+                t_url,
+                t_model,
                messages,
                temperature=0.2,
                max_tokens=500,
-                headers=sess.headers,
+                headers=t_headers,
            )
            try:
                suggestions = json.loads(suggestion_text)
@@ -262,42 +283,50 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
        endpoint_url = model = None
        headers = {}

-        # Try default model from settings first
-        settings = _load_settings()
-        ep_id = settings.get("default_endpoint_id", "")
-        default_model = settings.get("default_model", "")
-        if ep_id:
-            db = SessionLocal()
-            try:
-                ep = db.query(ModelEndpoint).filter(
-                    ModelEndpoint.id == ep_id, ModelEndpoint.is_enabled == True
-                ).first()
-                if ep:
-                    base = _normalize_base(ep.base_url)
-                    endpoint_url = build_chat_url(base)
-                    model = default_model
-                    if not model and ep.models:
-                        try:
-                            models = _json.loads(ep.models) if isinstance(ep.models, str) else ep.models
-                            if models:
-                                model = models[0]
-                        except Exception:
-                            pass
-                    if ep.api_key:
-                        headers = {"Authorization": f"Bearer {ep.api_key}"}
-            finally:
-                db.close()
+        # Try utility model from settings first — memory audit is a background
+        # task and should prefer the lighter utility model over the main chat model.
+        from src.task_endpoint import resolve_task_endpoint
+        user = _owner(request)
+        t_url, t_model, t_headers = resolve_task_endpoint(owner=user)
+        if t_url and t_model:
+            endpoint_url, model, headers = t_url, t_model, t_headers
+        else:
+            # Fall back to default model if no task/utility model configured
+            settings = _load_settings()
+            ep_id = settings.get("default_endpoint_id", "")
+            default_model = settings.get("default_model", "")
+            if ep_id:
+                db = SessionLocal()
+                try:
+                    ep = db.query(ModelEndpoint).filter(
+                        ModelEndpoint.id == ep_id, ModelEndpoint.is_enabled == True
+                    ).first()
+                    if ep:
+                        base = _normalize_base(ep.base_url)
+                        endpoint_url = build_chat_url(base)
+                        model = default_model
+                        if not model and ep.models:
+                            try:
+                                models = _json.loads(ep.models) if isinstance(ep.models, str) else ep.models
+                                if models:
+                                    model = models[0]
+                            except Exception:
+                                pass
+                        if ep.api_key:
+                            headers = {"Authorization": f"Bearer {ep.api_key}"}
+                finally:
+                    db.close()

-        # Fall back to session model if no default configured
-        if not endpoint_url and session:
-            try:
-                sess = session_manager.get_session(session)
-                _assert_session_owner(sess, _owner(request))
-                endpoint_url = sess.endpoint_url
-                model = sess.model
-                headers = sess.headers
-            except KeyError:
-                pass
+            # Fall back to session model if no default configured
+            if not endpoint_url and session:
+                try:
+                    sess = session_manager.get_session(session)
+                    _assert_session_owner(sess, _owner(request))
+                    endpoint_url = sess.endpoint_url
+                    model = sess.model
+                    headers = sess.headers
+                except KeyError:
+                    pass

        if not endpoint_url or not model:
            raise HTTPException(400, "No default model configured — set one in Settings")
@@ -344,13 +373,14 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
            try:
                sess = session_manager.get_session(session)
                _assert_session_owner(sess, _owner(request))
-                endpoint_url = sess.endpoint_url
-                model = sess.model
-                headers = sess.headers
+                endpoint_url, model, headers = resolve_task_endpoint(
+                    sess.endpoint_url, sess.model, sess.headers, owner=_owner(request)
+                )
            except KeyError:
-                 raise HTTPException(404, "Session not found — needed for LLM config")
+                logger.warning("Session %s not found, falling back to utility endpoint", session)
+                endpoint_url, model, headers = resolve_endpoint("utility", owner=_owner(request))
        else:
-            endpoint_url, model, headers = resolve_endpoint("utility", owner=_owner(request))
+            endpoint_url, model, headers = resolve_task_endpoint(owner=_owner(request))
    
        if not endpoint_url or not model:
            raise HTTPException(400, "No LLM model configured. Set a default model in Settings.")
@@ -123,6 +123,21 @@ def _clear_user_pref_endpoint_refs(all_prefs: dict, ep_id: str) -> int:
    return cleared_users


+def _default_endpoint_needs_assignment(current_default_id: str, enabled_endpoint_ids) -> bool:
+    """Whether the global default chat endpoint should be (re)assigned.
+
+    True when nothing is configured yet, or the configured default no longer
+    resolves to an enabled endpoint (e.g. the user disabled it). Without the
+    second case, adding a new endpoint after disabling the previous default
+    leaves `default_endpoint_id` pointing at the disabled endpoint, so features
+    that read the raw setting (Memory → Tidy) fail with "No default model
+    configured" even though an enabled endpoint exists. See #3586.
+    """
+    if not current_default_id:
+        return True
+    return current_default_id not in enabled_endpoint_ids
+
+
 # Loopback hosts a user might type for a local model server (LM Studio,
 # llama.cpp, vLLM, …). Inside Docker these point at the *container*, not the
 # host the server actually runs on.
@@ -233,6 +248,9 @@ _PROVIDER_CURATED = {
    "zai-coding": [
        "glm-5.1", "glm-5v-turbo", "glm-5-turbo", "glm-4.7", "glm-4.5-air",
    ],
+    "kimi-code": [
+        "kimi-for-coding",
+    ],
    "deepseek": [
        "deepseek-chat", "deepseek-reasoner",
    ],
@@ -283,6 +301,7 @@ _HOST_TO_CURATED = (
    ("fireworks.ai", "fireworks"),
    ("googleapis.com", "google"),
    ("x.ai", "xai"),
+    ("nvidia.com", "nvidia"),
    ("openrouter.ai", "openrouter"),
    ("ollama.com", "ollama"),
 )
@@ -299,6 +318,8 @@ def _match_provider_curated(base_url: str, provider: str) -> str:
    parsed = urlparse(base_url)
    if _host_match(base_url, "z.ai") and "/api/coding" in (parsed.path or ""):
        return "zai-coding"
+    if _host_match(base_url, "kimi.com") and "/coding" in (parsed.path or ""):
+        return "kimi-code"
    for domain, key in _HOST_TO_CURATED:
        if _host_match(base_url, domain):
            return key
@@ -477,10 +498,17 @@ _NON_CHAT_PREFIXES = (
    "dall-e", "tts-", "whisper", "text-embedding", "embedding",
    "davinci", "babbage", "moderation", "omni-moderation",
    "sora", "gpt-image", "chatgpt-image",
+    # embedding / retrieval / non-chat models (common across providers)
+    "snowflake/arctic-embed", "nvidia/nv-embed", "embed",
 )
 _NON_CHAT_CONTAINS = (
    "-realtime", "-transcribe", "-tts", "-codex",
-    "codex-",
+    "codex-", "content-safety", "-safety", "-reward", "nvclip",
+    "kosmos", "fuyu", "deplot", "vila", "neva",
+    "gliner", "riva", "-parse", "-embedqa", "-nemoretriever",
+    "topic-control", "calibration",
+    "ai-synthetic-video", "cosmos-reason2",
+    "bge", "llama-guard",
 )
 _NON_CHAT_EXACT_PREFIXES = (
    "gpt-audio",  # gpt-audio, gpt-audio-mini etc. (not gpt-4o-audio-preview which is chat)
@@ -680,6 +708,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
    """Probe a base URL's /models endpoint and return list of model IDs.
    For Anthropic, queries their /v1/models API, falling back to hardcoded list."""
    from src.endpoint_resolver import resolve_url
+    from src.llm_core import httpx_get_kimi_aware
    base = resolve_url(_normalize_base(base_url))
    provider = _safe_detect_provider(base)
    if provider == "chatgpt-subscription":
@@ -715,7 +744,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
    url = _safe_build_models_url(base)
    headers = _safe_build_headers(api_key, base)
    try:
-        r = httpx.get(url, headers=headers, timeout=timeout, verify=llm_verify())
+        r = httpx_get_kimi_aware(url, headers, timeout=timeout, verify=llm_verify())
        r.raise_for_status()
        data = r.json()
        # OpenAI format: {"data": [{"id": "model-name"}]}
@@ -731,7 +760,12 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
                for _e in _PROVIDER_CURATED.get(_ck, []):
                    if _e not in set(models) and not any(m.startswith(_e) for m in models):
                        models.append(_e)
-            return models
+            if _host_match(base, "kimi.com") and "/coding" in (urlparse(base).path or ""):
+                _ck = _match_provider_curated(base, None)
+                for _e in _PROVIDER_CURATED.get(_ck, []):
+                    if _e not in set(models) and not any(m.startswith(_e) for m in models):
+                        models.append(_e)
+            return [m for m in models if _is_chat_model(m)]
    except httpx.HTTPStatusError as e:
        if api_key:
            status = e.response.status_code if e.response is not None else "unknown"
@@ -755,7 +789,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
            data = r.json()
            models = [m.get("name") or m.get("model") for m in (data.get("models") or []) if m.get("name") or m.get("model")]
            if models:
-                return models
+                return [m for m in models if _is_chat_model(m)]
    except Exception as e:
        logger.debug(f"Ollama /api/tags probe failed for {base}: {e}")
    # Fall back to curated list if the provider has a URL-based match (e.g. z.ai has no /models endpoint)
@@ -847,15 +881,52 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->


 def _model_endpoint_error_message(base_url: str, ping: Dict[str, Any] = None) -> str:
-    """Return a provider-aware error message for failed endpoint probes."""
+    """Return a provider-aware error message for failed endpoint probes.
+
+    Surfaces the URL we actually probed and, when the endpoint looks like
+    LM Studio (port 1234 or hostname match), adds a hint about loading a
+    model and confirming the Developer Server is running. The user previously
+    saw a generic "No models found for that provider/key" with no way to
+    tell whether the URL was wrong, the server was down, or the server was
+    reachable but had no model loaded (issue #25).
+    """
    ping = ping or {}
    error = ping.get("error")
+    from src.endpoint_resolver import build_models_url
+    try:
+        probed = build_models_url(base_url) or base_url
+    except Exception:
+        probed = base_url
    parsed = urlparse(base_url)
    host = (parsed.hostname or "").lower()
    is_ollama = parsed.port == 11434 or "ollama" in host or "ollama" in base_url.lower()
+    is_lmstudio = (
+        parsed.port == 1234
+        or "lmstudio" in host
+        or "lm-studio" in host
+        or "lm_studio" in host
+    )
+
+    if is_lmstudio:
+        parts = [
+            "LM Studio is reachable, but no models were reported.",
+            f"Probed {probed}.",
+        ]
+        if error:
+            parts.append(f"Last probe error: {error}.")
+        parts.append(
+            "Open LM Studio, load at least one model, and confirm the "
+            "Developer Server is running on port 1234."
+        )
+        parts.append(
+            "Base URL should be http://localhost:1234/v1 (native) or "
+            "http://host.docker.internal:1234/v1 (Docker)."
+        )
+        return " ".join(parts)

    if is_ollama:
        parts = ["No Ollama models found for that endpoint."]
+        parts.append(f"Probed {probed}.")
        if error:
            parts.append(f"Last probe error: {error}.")
        parts.append("Check that Ollama is running and that the base URL is correct.")
@@ -865,9 +936,9 @@ def _model_endpoint_error_message(base_url: str, ping: Dict[str, Any] = None) ->
        return " ".join(parts)

    if error:
-        return f"No models found for that provider/key. Last probe error: {error}."
+        return f"No models found for that provider/key. Probed {probed}. Last probe error: {error}."

-    return "No models found for that provider/key."
+    return f"No models found for that provider/key. Probed {probed}."


 def _normalize_model_ids(value):
@@ -1719,12 +1790,19 @@ def setup_model_routes(model_discovery):
            )
            db.add(ep)
            db.commit()
-            # Auto-set as default chat endpoint if none configured yet. Seed
-            # the first CHAT model (not raw model_ids[0]) so we don't pin the
-            # global default to an embedding/tts/etc. entry a provider happens
-            # to list first.
+            # Auto-set as default chat endpoint when none is usable yet — either
+            # nothing is configured, or the configured default points at an
+            # endpoint that is now missing/disabled (#3586). Seed the first CHAT
+            # model (not raw model_ids[0]) so we don't pin the global default to
+            # an embedding/tts/etc. entry a provider happens to list first.
            settings = _load_settings()
-            if not settings.get("default_endpoint_id"):
+            enabled_ids = {
+                e.id
+                for e in db.query(ModelEndpoint).filter(
+                    ModelEndpoint.is_enabled == True  # noqa: E712
+                ).all()
+            }
+            if _default_endpoint_needs_assignment(settings.get("default_endpoint_id") or "", enabled_ids):
                from src.endpoint_resolver import _first_chat_model
                settings["default_endpoint_id"] = ep.id
                settings["default_model"] = _first_chat_model(model_ids) or ""
@@ -160,8 +160,11 @@ def setup_personal_routes(personal_docs_manager, rag_manager, rag_available):
            JSON response confirming removal
        """
        try:
-            if not directory:
-                raise HTTPException(400, "Directory path is required")
+            # Confine to PERSONAL_DIR — parity with add_directory_to_rag (which
+            # resolves the path the same way). Without this, an arbitrary or
+            # `..`-escaping path is passed straight to
+            # personal_docs_manager.remove_directory / rag.remove_directory.
+            directory = _resolve_allowed_personal_dir(directory)

            logger.info(f"Removing directory from RAG: {directory}")

@@ -11,7 +11,7 @@ from core.session_manager import SessionManager
 from core.models import ChatMessage
 from src.request_models import SessionResponse
 from core.database import Session as DbSession, SessionLocal, Document, GalleryImage, utcnow_naive
-from src.auth_helpers import get_current_user, effective_user, _auth_disabled
+from src.auth_helpers import get_current_user, effective_user, _auth_disabled, owner_filter
 from src.session_actions import is_session_recently_active


@@ -258,7 +258,9 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
            last_msg_map = {}
            mode_map = {}
            msg_count_map = {}
-            rows = db.query(DbSession.id, DbSession.folder, DbSession.total_input_tokens, DbSession.total_output_tokens, DbSession.is_important, DbSession.created_at, DbSession.updated_at, DbSession.last_message_at, DbSession.mode, DbSession.message_count).filter(DbSession.archived == False, DbSession.owner == user).all()
+            q = db.query(DbSession.id, DbSession.folder, DbSession.total_input_tokens, DbSession.total_output_tokens, DbSession.is_important, DbSession.created_at, DbSession.updated_at, DbSession.last_message_at, DbSession.mode, DbSession.message_count).filter(DbSession.archived == False)
+            q = owner_filter(q, DbSession, user)
+            rows = q.all()
            for row in rows:
                folder_map[row.id] = row.folder
                token_map[row.id] = (row.total_input_tokens or 0) + (row.total_output_tokens or 0)
@@ -277,17 +279,19 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
            # Sessions with active documents that have content
            from sqlalchemy import func
            doc_session_ids = set(
-                r[0] for r in db.query(Document.session_id)
-                .filter(Document.is_active == True,
-                        Document.current_content != None,
-                        func.trim(Document.current_content) != "",
-                        Document.owner == user)
+                r[0] for r in owner_filter(
+                    db.query(Document.session_id)
+                    .filter(Document.is_active == True,
+                            Document.current_content != None,
+                            func.trim(Document.current_content) != ""),
+                    Document, user)
                .distinct().all()
            )
            img_session_ids = set(
-                r[0] for r in db.query(GalleryImage.session_id)
-                .filter(GalleryImage.session_id != None,
-                        GalleryImage.owner == user)
+                r[0] for r in owner_filter(
+                    db.query(GalleryImage.session_id)
+                    .filter(GalleryImage.session_id != None),
+                    GalleryImage, user)
                .distinct().all()
            )
        finally:
@@ -1,6 +1,7 @@
 """Shell routes — user-facing command execution endpoint."""

 import asyncio
+import importlib
 import json
 import logging
 import os
@@ -14,6 +15,7 @@ from collections import namedtuple
 from pathlib import Path
 from typing import Dict, Any
 from core.platform_compat import IS_APPLE_SILICON, which_tool
+from src.optional_deps import prepare_optional_dependency_import

 # POSIX-only: `pty`/`fcntl` transitively import `termios`, which does NOT exist
 # on Windows, so importing them unconditionally crashed app startup there
@@ -149,6 +151,11 @@ def _pip_dist_name(pkg: dict) -> str:
    return (pkg.get("name") or "").replace("_", "-")


+def _import_optional_dependency_for_status(name: str):
+    prepare_optional_dependency_import(name)
+    return importlib.import_module(name)
+
+
 def _package_installed_from_probe(name: str, probe: dict) -> bool:
    """Return whether an optional dependency is usable by Cookbook.

@@ -970,7 +977,6 @@ def setup_shell_routes() -> APIRouter:
        """
        _require_admin(request)
        _reject_cross_site(request)
-        import importlib
        import importlib.metadata as importlib_metadata
        import shlex
        import json as _json
@@ -1057,6 +1063,13 @@ def setup_shell_routes() -> APIRouter:
                "category": "Image",
                "target": "remote",
            },
+            {
+                "name": "transformers",
+                "pip": "transformers",
+                "desc": "Hugging Face model components used by SD/Flux pipelines and image tools",
+                "category": "Image",
+                "target": "remote",
+            },
            {
                "name": "rembg",
                "pip": "rembg[gpu]",
@@ -1202,7 +1215,7 @@ def setup_shell_routes() -> APIRouter:
                    pkg["status_note"] = _package_status_note("vllm", probe)
            else:
                try:
-                    importlib.import_module(pkg["name"])
+                    _import_optional_dependency_for_status(pkg["name"])
                    importlib_metadata.version(_pip_dist_name(pkg))
                    pkg["installed"] = True
                except ImportError:
@@ -1251,6 +1264,7 @@ def setup_shell_routes() -> APIRouter:
            "sglang[all]",
            "diffusers",
            "diffusers[torch]",
+            "transformers",
            "TTS",
            "bark",
            "faster-whisper",
@@ -198,6 +198,8 @@ def setup_webhook_routes(
        "opencode-go": "https://opencode.ai/zen/go/v1",
        "fireworks": "https://api.fireworks.ai/inference/v1",
        "venice": "https://api.venice.ai/api/v1",
+        "kimi-code": "https://api.kimi.com/coding/v1",
+        "kimicode": "https://api.kimi.com/coding/v1",
    }

    # Model prefix → provider mapping for auto-detection
@@ -210,6 +212,8 @@ def setup_webhook_routes(
        "mistral": "mistral",
        "llama": "groq",
        "mixtral": "groq",
+        "kimi-for-coding": "kimi-code",
+        "kimi": "kimi-code",
    }

    def _resolve_base_url(model: Optional[str], provider: Optional[str]) -> Optional[str]:
@@ -0,0 +1,85 @@
+"""Workspace API - browse server directories to pick a tool workspace folder."""
+import os
+from fastapi import APIRouter, Request, HTTPException, Query
+
+from src.auth_helpers import get_current_user
+from src.tool_security import owner_is_admin_or_single_user
+
+# Cap entries returned per directory (mirrors filesystem_tools._CODENAV_MAX_HITS).
+# A huge directory shouldn't dump thousands of rows into the picker; the user can
+# type/paste a path to jump straight in instead.
+_MAX_BROWSE_DIRS = 500
+
+
+def setup_workspace_routes():
+    router = APIRouter(prefix="/api/workspace", tags=["workspace"])
+
+    @router.get("/browse")
+    def browse(request: Request, path: str = Query(default="")):
+        """List subdirectories of `path` (default: home) so the UI can navigate
+        the server filesystem and pick a workspace folder. Directories only.
+
+        ADMIN-ONLY: this enumerates the server filesystem, so it is gated the
+        same way the file/shell tools are (read_file/write_file/bash are in
+        NON_ADMIN_BLOCKED_TOOLS). A non-admin who can't use those tools must not
+        be able to map the host's directory tree either.
+        """
+        owner = get_current_user(request)
+        if not owner_is_admin_or_single_user(owner):
+            raise HTTPException(status_code=403, detail="Workspace browsing is admin-only")
+
+        # Resolve symlinks so the reported path is canonical and the UI navigates
+        # real directories (defends against symlink games in displayed paths).
+        target = os.path.realpath(os.path.expanduser(path.strip() or "~"))
+        if not os.path.isdir(target):
+            target = os.path.realpath(os.path.expanduser("~"))
+
+        dirs = []
+        try:
+            with os.scandir(target) as it:
+                for entry in it:
+                    try:
+                        # Don't follow symlinks when classifying - a symlinked
+                        # dir is skipped rather than letting the browser wander
+                        # off via a link. Hidden entries are omitted.
+                        if entry.is_dir(follow_symlinks=False) and not entry.name.startswith("."):
+                            # Build the child path server-side with os.path.join
+                            # so it's correct on Windows (backslashes) and Linux.
+                            dirs.append({"name": entry.name, "path": os.path.join(target, entry.name)})
+                    except OSError:
+                        continue
+        except (PermissionError, OSError):
+            dirs = []
+
+        dirs_sorted = sorted(dirs, key=lambda d: d["name"].lower())
+        truncated = len(dirs_sorted) > _MAX_BROWSE_DIRS
+        parent = os.path.dirname(target)
+        from src.tool_execution import vet_workspace
+        return {
+            "path": target,
+            "parent": parent if parent and parent != target else None,
+            "dirs": dirs_sorted[:_MAX_BROWSE_DIRS],
+            "truncated": truncated,
+            # Whether this directory may be bound as a workspace (filesystem
+            # roots and sensitive dirs may be browsed through but not chosen).
+            "selectable": vet_workspace(target) is not None,
+        }
+
+    @router.get("/vet")
+    def vet(request: Request, path: str = Query(default="")):
+        """Validate a workspace path without binding it.
+
+        The UI calls this before persisting a manually typed path (/workspace
+        set) so a typo, file path, deleted folder, sensitive dir, or filesystem
+        root is rejected up front with the canonical path returned on success,
+        instead of being stored client-side and silently dropped at chat time.
+        Admin-gated like /browse: it confirms path existence on the host.
+        """
+        owner = get_current_user(request)
+        if not owner_is_admin_or_single_user(owner):
+            raise HTTPException(status_code=403, detail="Workspace selection is admin-only")
+        from src.tool_execution import vet_workspace
+        resolved = vet_workspace(path)
+        return {"ok": resolved is not None, "path": resolved}
+
+    return router