fix(agent): preserve loop guard stream behavior

fix(agent): surface early loop-guard stops
2026-06-23 05:05:24 -04:00 · 2026-06-15 17:17:16 +01:00 · 2026-06-15 17:07:15 +01:00
225 changed files with 2964 additions and 15557 deletions
@@ -15,10 +15,6 @@ build/
 # at runtime — never baked into the image. Mirrored in .gitignore.
 secrets.env
 secrets.env.*
 secrets.env~
 .secrets.env.swp
 .secrets.env.swo
 **/#secrets.env#
 !secrets.env.example
 /data/
 /logs/
@@ -19,7 +19,7 @@ jobs:
    name: Python syntax (compileall)
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0  # v7.0.0
+      - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
        with:
          persist-credentials: false
      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
@@ -32,7 +32,7 @@ jobs:
    name: JS syntax (node --check)
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0  # v7.0.0
+      - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
        with:
          persist-credentials: false
      - uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e  # v6.4.0
@@ -54,7 +54,7 @@ jobs:
    # ROADMAP "fresh install smoke tests" item; make this required once green.
    continue-on-error: true
    steps:
-      - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0  # v7.0.0
+      - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
        with:
          fetch-depth: 0
          persist-credentials: false
@@ -37,7 +37,7 @@ jobs:
      contents: read
    steps:
      - name: Checkout repository
-        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0  # v7.0.0
+        uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
        with:
          persist-credentials: false
@@ -52,7 +52,7 @@ jobs:
      contents: read
    steps:
      - name: Checkout repository
-        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0  # v7.0.0
+        uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
        with:
          persist-credentials: false
@@ -93,7 +93,7 @@ jobs:
      security-events: write  # upload SARIF to the Security tab
    steps:
      - name: Checkout repository
-        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0  # v7.0.0
+        uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
        with:
          persist-credentials: false
@@ -36,7 +36,7 @@ jobs:
      contents: read
    steps:
      - name: Checkout repository
-        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0  # v7.0.0
+        uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
        with:
          persist-credentials: false
@@ -55,7 +55,7 @@ jobs:
      contents: read
    steps:
      - name: Checkout repository
-        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0  # v7.0.0
+        uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
        with:
          persist-credentials: false
@@ -45,7 +45,7 @@ jobs:
            arch: arm64
            runner: ubuntu-24.04-arm
    steps:
-      - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0  # v7.0.0
+      - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
        with:
          persist-credentials: false
      - name: Set up Buildx
@@ -86,7 +86,7 @@ jobs:
      contents: read
      packages: write
    steps:
-      - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0  # v7.0.0
+      - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
        with:
          persist-credentials: false
      - name: Read APP_VERSION + short sha
@@ -14,7 +14,7 @@ jobs:
    # Skip bots (Dependabot, release-drafter, etc.)
    if: ${{ github.event.issue.user.type != 'Bot' }}
    steps:
-      - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0  # v7.0.0
+      - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
        with:
          sparse-checkout: .github/scripts
          persist-credentials: false
@@ -23,7 +23,7 @@ jobs:
    # Skip bots: they open PRs programmatically and have their own process.
    if: github.event.pull_request.user.type != 'Bot'
    steps:
-      - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0  # v7.0.0
+      - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
        with:
          ref: ${{ github.base_ref }}
          sparse-checkout: .github/scripts
@@ -35,7 +35,7 @@ jobs:
      contents: read
    steps:
      - name: Checkout repository
-        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0  # v7.0.0
+        uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
        with:
          # Full history so a secret committed in an earlier commit (and later
          # deleted) is still caught -- deletion does not remove it from Git.
@@ -36,7 +36,7 @@ jobs:
      contents: read
    steps:
      - name: Checkout repository
-        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0  # v7.0.0
+        uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
        with:
          persist-credentials: false
@@ -61,7 +61,7 @@ jobs:
      contents: read
    steps:
      - name: Checkout repository
-        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0  # v7.0.0
+        uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
        with:
          persist-credentials: false
@@ -37,7 +37,7 @@ Manual development uses Python 3.11+:
 python3 -m venv venv
 source venv/bin/activate
 pip install -r requirements.txt
-python -m uvicorn app:app --host 127.0.0.1 --port 7000
+python -m uvicorn app:app --host 0.0.0.0 --port 7000
 ```
 Windows is not actively tested. Docker on Linux or a Linux/macOS manual install is the safer path for now.
@@ -20,23 +20,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
    gosu \
    && rm -rf /var/lib/apt/lists/*
 # Docker CLI (client only — daemon stays on the host via the
 # /var/run/docker.sock mount). The Debian `docker.io` package ships
 # dockerd but not the client binary on slim, so grab the static client
 # tarball from download.docker.com instead.
 ARG DOCKER_CLI_VERSION=27.5.1
 RUN ARCH="$(dpkg --print-architecture)" \
    && case "$ARCH" in \
         amd64) DARCH=x86_64 ;; \
         arm64) DARCH=aarch64 ;; \
         *) echo "unsupported arch $ARCH"; exit 1 ;; \
       esac \
    && curl -fsSL "https://download.docker.com/linux/static/stable/${DARCH}/docker-${DOCKER_CLI_VERSION}.tgz" \
       -o /tmp/docker.tgz \
    && tar -xzf /tmp/docker.tgz -C /tmp \
    && install -m 0755 /tmp/docker/docker /usr/local/bin/docker \
    && rm -rf /tmp/docker /tmp/docker.tgz
 WORKDIR /app
 # Install Python deps first (layer cache). Optional extras (PyMuPDF AGPL, etc.)
@@ -1,45 +0,0 @@
 # -*- mode: python ; coding: utf-8 -*-
 a = Analysis(
    ['launcher.py'],
    pathex=[],
    binaries=[],
    datas=[('static', 'static'), ('scripts', 'scripts'), ('mcp_servers', 'mcp_servers'), ('services/hwfit/data', 'services/hwfit/data'), ('config', 'config'), ('.env.example', '.env.example')],
    hiddenimports=[],
    hookspath=[],
    hooksconfig={},
    runtime_hooks=[],
    excludes=[],
    noarchive=False,
    optimize=0,
 )
 pyz = PYZ(a.pure)
 exe = EXE(
    pyz,
    a.scripts,
    [],
    exclude_binaries=True,
    name='Odysseus',
    debug=False,
    bootloader_ignore_signals=False,
    strip=False,
    upx=True,
    console=False,
    disable_windowed_traceback=False,
    argv_emulation=False,
    target_arch=None,
    codesign_identity=None,
    entitlements_file=None,
    icon=['static\\icon.ico'],
 )
 coll = COLLECT(
    exe,
    a.binaries,
    a.datas,
    strip=False,
    upx=True,
    upx_exclude=[],
    name='Odysseus',
 )
@@ -1,5 +1,5 @@
 <p align="center">
-  <img src="docs/odysseus-wordmark.png" alt="Odysseus" width="238">
+  <img src="docs/odysseus-wordmark.png" alt="Odysseus" width="280">
 </p>
 <p align="center">
@@ -18,7 +18,7 @@
 </p>
 <p align="center">
-  <img src="docs/odysseus-browser.jpg" alt="Odysseus interface">
+  <img src="docs/odysseus.jpg" alt="Odysseus interface">
 </p>
 ---
@@ -1,7 +1,6 @@
 # app.py — slim orchestrator
 import mimetypes
 import os
 import sys
 def register_static_mime_types() -> None:
@@ -39,7 +38,7 @@ load_dotenv(encoding="utf-8-sig")
 import asyncio
 import logging
 import secrets
-from datetime import datetime, timezone
+from datetime import datetime
 from typing import Dict
 from contextlib import asynccontextmanager
@@ -114,13 +113,12 @@ app = FastAPI(
 )
 # ========= CORS =========
 CORS_ALLOW_METHODS = ["GET", "POST", "PUT", "PATCH", "DELETE"]
 allowed_origins = os.getenv("ALLOWED_ORIGINS", "http://localhost,http://127.0.0.1").split(",")
 app.add_middleware(
    CORSMiddleware,
    allow_origins=allowed_origins,
    allow_credentials=True,
-    allow_methods=CORS_ALLOW_METHODS,
+    allow_methods=["GET", "POST", "PUT", "DELETE"],
    allow_headers=[
        "Accept",
        "Authorization",
@@ -318,7 +316,7 @@ if AUTH_ENABLED:
            # (no admin cookie available in that context). Restricted to
            # loopback clients + matching token to keep it locked down.
            try:
-                from core.middleware import INTERNAL_TOOL_HEADER, INTERNAL_TOOL_TOKEN as _ITT, INTERNAL_TOOL_USER
+                from core.middleware import INTERNAL_TOOL_HEADER, INTERNAL_TOOL_TOKEN as _ITT
                _hdr = request.headers.get(INTERNAL_TOOL_HEADER)
                if _hdr and secrets.compare_digest(_hdr, _ITT) and _is_trusted_loopback(request):
                    # Impersonation: when the agent's loopback call sets
@@ -330,11 +328,11 @@ if AUTH_ENABLED:
                    if _impersonate and _impersonate in getattr(_auth_mgr, "users", {}):
                        request.state.current_user = _impersonate
                    else:
-                        request.state.current_user = INTERNAL_TOOL_USER
+                        request.state.current_user = "internal-tool"
                    request.state.api_token = False
                    return await call_next(request)
-            except Exception as _e:
+            except Exception:
-                logger.warning("Internal tool auth header check failed", exc_info=_e)
+                pass
            # Allow DIRECT localhost requests (internal service calls from
            # heartbeats etc.). Tunnel/proxy-forwarded requests are excluded by
            # _is_trusted_loopback so LOCALHOST_BYPASS can't be abused over a
@@ -387,10 +385,11 @@ if AUTH_ENABLED:
                                    _db.close()
                            try:
                                await _asyncio.to_thread(_do)
-                            except Exception as _e:
+                            except Exception:
-                                logger.debug("Failed to update token last_used_at", exc_info=_e)
+                                pass
                        _asyncio.create_task(_touch_last_used(matched_id))
                        # Keep bearer-token callers out of normal cookie/user
                        # routes. API-aware routes can read api_token_owner.
                        request.state.current_user = "api"
                        request.state.api_token = True
                        request.state.api_token_id = matched_id
@@ -439,7 +438,7 @@ class _RevalidatingStatic(StaticFiles):
        return resp
-app.mount("/static", _RevalidatingStatic(directory=STATIC_DIR), name="static")
+app.mount("/static", _RevalidatingStatic(directory="static"), name="static")
 # ========= GENERATED IMAGES =========
@app.get("/api/generated-image/{filename}")
@@ -465,8 +464,8 @@ async def serve_generated_image(filename: str, request: Request):
                _db.close()
    except HTTPException:
        raise
-    except Exception as _e:
+    except Exception:
-        logger.warning("Image ownership verification failed for %r", filename, exc_info=_e)
+        pass
    ext = filename.rsplit('.', 1)[-1].lower()
    mime = {
        "png": "image/png", "jpg": "image/jpeg", "jpeg": "image/jpeg",
@@ -529,7 +528,6 @@ memory_vector     = components.get("memory_vector")
 upload_handler    = components["upload_handler"]
 app.state.upload_handler = upload_handler
 personal_docs_mgr = components["personal_docs_manager"]
 app.state.personal_docs_manager = personal_docs_mgr
 api_key_manager   = components["api_key_manager"]
 preset_manager    = components["preset_manager"]
 chat_processor    = components["chat_processor"]
@@ -863,7 +861,7 @@ async def get_version():
@app.get("/api/health")
 async def health_check() -> Dict[str, str]:
-    return {"status": "healthy", "timestamp": datetime.now(timezone.utc).isoformat()}
+    return {"status": "healthy", "timestamp": datetime.utcnow().isoformat()}
@app.get("/api/ready")
 async def readiness_check() -> JSONResponse:
@@ -1173,12 +1171,3 @@ async def _shutdown_event():
    except Exception as e:
        logger.warning(f"MCP shutdown error: {e}")
    logger.info("Application shutdown complete")
 if __name__ == "__main__":
    import uvicorn
    bind_host = os.getenv("APP_BIND", "127.0.0.1")
    bind_port = int(os.getenv("APP_PORT", "7000"))
    uvicorn.run(app, host=bind_host, port=bind_port, log_level="info")
@@ -1,72 +0,0 @@
 #Requires -Version 5.1
 <#
  Build a portable Windows distribution for Odysseus.
  Output layout:
    dist\Odysseus\Odysseus.exe
    dist\Odysseus\static\...
    dist\Odysseus\scripts\...
    dist\Odysseus\mcp_servers\...
    dist\Odysseus\services\hwfit\data\...
  The app then keeps using its normal filesystem layout when frozen.
  Usage:
    powershell -ExecutionPolicy Bypass -File .\build-windows-portable.ps1
 #>
 $ErrorActionPreference = "Stop"
 Set-Location -Path $PSScriptRoot
 function Write-Step($msg) { Write-Host ""; Write-Host ("==> " + $msg) -ForegroundColor Cyan }
 function Fail($msg) {
    Write-Host ""
    Write-Host ("ERROR: " + $msg) -ForegroundColor Red
    exit 1
 }
 Write-Step "Checking for Python"
 $pyExe = $null
 if (Test-Path ".\.venv\Scripts\python.exe") {
    $pyExe = (Resolve-Path ".\.venv\Scripts\python.exe").Path
 } else {
    foreach ($c in @("py", "python")) {
        $cmd = Get-Command $c -ErrorAction SilentlyContinue
        if ($cmd) { $pyExe = $cmd.Source; break }
    }
    if ($pyExe -like "*WindowsApps*python.exe") {
        $pyCmd = Get-Command py -ErrorAction SilentlyContinue
        if ($pyCmd) {
            $pyExe = $pyCmd.Source
        }
    }
 }
 if (-not $pyExe) {
    Fail "Python not found on PATH. Install Python 3.11+ first."
 }
 Write-Host ("Using Python: " + $pyExe)
 Write-Step "Installing build dependencies"
 & $pyExe -m pip install --upgrade pip --quiet
 & $pyExe -m pip install -r requirements.txt pyinstaller pystray Pillow
 if ($LASTEXITCODE -ne 0) { Fail "Dependency install failed." }
 Write-Step "Building portable exe bundle"
 Remove-Item -Recurse -Force build, dist -ErrorAction SilentlyContinue
 $dataArgs = @(
    "--add-data", "static;static",
    "--add-data", "scripts;scripts",
    "--add-data", "mcp_servers;mcp_servers",
    "--add-data", "services/hwfit/data;services/hwfit/data",
    "--add-data", "config;config",
    "--add-data", ".env.example;.env.example"
 )
 & $pyExe -m PyInstaller --noconfirm --clean --onedir --noconsole --icon=static/icon.ico --name Odysseus @dataArgs launcher.py
 if ($LASTEXITCODE -ne 0) { Fail "PyInstaller build failed." }
 Write-Host ""
 Write-Host "Build complete." -ForegroundColor Green
 Write-Host "Portable app folder: $PSScriptRoot\dist\Odysseus" -ForegroundColor Green
 Write-Host "Distribute the whole folder (or zip it) so static assets and scripts stay with the exe." -ForegroundColor Green
@@ -5,9 +5,8 @@ offers and pair to it, without duplicating any LLM logic.
 Auth is enforced globally by AuthMiddleware (app.py), so reaching a handler here
 means the caller is authenticated by either a cookie session or a Bearer `ody_`
-API token. Ping/info accept either credential type, models requires a chat-
+API token. The read endpoints (ping/info/models) accept either; the pairing
-scoped API token for bearer callers, and the pairing endpoints are admin-cookie
+endpoints are admin-cookie only.
 only.
 Pairing CSRF posture: minting happens ONLY on POST. The session cookie is
 SameSite=Lax (routes/auth_routes.py), which a browser does not send on a
@@ -19,7 +18,7 @@ on a GET would be unsafe (Lax cookies ride top-level GET navigations), so GET
 import html
-from fastapi import APIRouter, HTTPException, Request
+from fastapi import APIRouter, Request
 from fastapi.responses import HTMLResponse
 from core.middleware import require_admin
@@ -53,18 +52,6 @@ def owner_can_see(row_owner, owner) -> bool:
    return row_owner is None or row_owner == owner
 def require_models_scope(request: Request) -> None:
    """Require the companion chat scope for bearer-token model inventory."""
    if not getattr(request.state, "api_token", False):
        return
    scopes = getattr(request.state, "api_token_scopes", None) or []
    if isinstance(scopes, str):
        scopes = [scope.strip() for scope in scopes.split(",")]
    scope_set = {str(scope).strip() for scope in scopes if str(scope).strip()}
    if _pairing.COMPANION_SCOPE not in scope_set:
        raise HTTPException(403, "API token requires chat scope")
 def mint_pairing_token(owner: str, invalidate=None) -> tuple[str, str]:
    """Mint a pairing token AND invalidate the auth middleware's in-memory token
    cache, so the new token is accepted on the very next request without a server
@@ -116,7 +103,6 @@ def setup_companion_routes() -> APIRouter:
        rows -- the same rule as owner_filter. Read-only; never returns api_key
        material.
        """
        require_models_scope(request)
        import json as _json
        from core.database import SessionLocal, ModelEndpoint
@@ -20,7 +20,6 @@ logger = logging.getLogger(__name__)
 from core.atomic_io import atomic_write_json as _atomic_write_json  # noqa: E402
 from core.middleware import INTERNAL_TOOL_USER  # noqa: E402
 DEFAULT_PRIVILEGES = {
    "can_use_agent": True,
@@ -48,7 +47,7 @@ ADMIN_PRIVILEGES["allowed_models_restricted"] = False
 # backwards for this sentinel.
 ADMIN_PRIVILEGES["block_all_models"] = False
-from src.constants import AUTH_FILE, PASSWORD_MIN_LENGTH
+from src.constants import AUTH_FILE
 DEFAULT_AUTH_PATH = AUTH_FILE
 TOKEN_TTL = 60 * 60 * 24 * 7  # 7 days
@@ -66,7 +65,7 @@ TOKEN_TTL = 60 * 60 * 24 * 7  # 7 days
 # of those names would be denied an assistant and inconsistently owner-scoped.
 # Refuse to create or rename into any of them so the sentinels can't be
 # impersonated. (Keep this in sync with that synthetic-owner set.)
-RESERVED_USERNAMES = frozenset({INTERNAL_TOOL_USER, "api", "demo", "system"})
+RESERVED_USERNAMES = frozenset({"internal-tool", "api", "demo", "system"})
 def normalize_known_username(users: Dict[str, Any], username: str | None) -> Optional[str]:
@@ -244,15 +243,6 @@ class AuthManager:
    def is_configured(self) -> bool:
        return len(self.users) > 0
    def policy(self) -> dict:
        """Return public auth policy constants for the frontend."""
        return {
            "password_min_length": PASSWORD_MIN_LENGTH,
            "reserved_usernames": sorted(RESERVED_USERNAMES),
            "signup_enabled": self.signup_enabled,
            "session_days": TOKEN_TTL // 86400,
        }
    # ------------------------------------------------------------------
    # Account management
    # ------------------------------------------------------------------
@@ -583,15 +573,11 @@ class AuthManager:
            return None
        return self.create_session_trusted(username)
-    def create_session_trusted(self, username: str) -> Optional[str]:
+    def create_session_trusted(self, username: str) -> str:
        """Issue a session token for an already-verified user.
        Call only after verify_password (and TOTP if enabled) have passed."""
        username = username.strip().lower()
        token = secrets.token_hex(32)
        with self._config_lock:
            if username not in self.users:
                logger.warning("Refused to issue session for missing user '%s'", username)
                return None
        with self._sessions_lock:
            self._sessions[token] = {
                "username": username,
@@ -2,15 +2,12 @@ import os
 import logging
 import sqlite3
 from datetime import datetime, timezone
 from pathlib import Path
 from sqlalchemy import event, create_engine, Column, String, Text, Boolean, DateTime, Integer, ForeignKey, JSON, Index, func, text
 from sqlalchemy.engine import Engine
 from sqlalchemy.types import TypeDecorator
 from sqlalchemy.ext.declarative import declarative_base, declared_attr
 from sqlalchemy.orm import relationship, sessionmaker, backref
 from src.runtime_paths import get_app_root
 logger = logging.getLogger(__name__)
 # Create base class for declarative models
@@ -32,26 +29,9 @@ class TimestampMixin:
    def updated_at(cls):
        return Column(DateTime, default=utcnow_naive, onupdate=utcnow_naive, nullable=False)
 # Ensure the writable data directory exists before SQLite connects.
 from src.constants import DATA_DIR, AUTH_FILE, MEMORY_FILE, USER_PREFS_FILE, SETTINGS_FILE
 Path(DATA_DIR).mkdir(parents=True, exist_ok=True)
 def _default_database_url() -> str:
    return f"sqlite:///{Path(DATA_DIR) / 'app.db'}"
 def _normalize_sqlite_url(url: str) -> str:
    if not url.startswith("sqlite:///"):
        return url
    db_path = url.replace("sqlite:///", "", 1)
    if db_path == ":memory:" or os.path.isabs(db_path):
        return url
    return f"sqlite:///{(Path(get_app_root()) / db_path).resolve().as_posix()}"
 # Get database URL from environment, default to SQLite in DATA_DIR
-DATABASE_URL = _normalize_sqlite_url(os.getenv("DATABASE_URL", _default_database_url()))
+from src.constants import DATA_DIR, AUTH_FILE, MEMORY_FILE, USER_PREFS_FILE, SETTINGS_FILE
 DATABASE_URL = os.getenv("DATABASE_URL", f"sqlite:///{DATA_DIR}/app.db")
 # Create engine
 engine = create_engine(
@@ -15,8 +15,6 @@ from starlette.responses import Response
 # same value from this module. Never persisted or exposed externally.
 INTERNAL_TOOL_TOKEN = os.environ.get("ODYSSEUS_INTERNAL_TOKEN") or secrets.token_hex(32)
 INTERNAL_TOOL_HEADER = "X-Odysseus-Internal-Token"
 # Pseudo-username on in-process tool-loopback requests; require_admin trusts it and it is reserved.
 INTERNAL_TOOL_USER = "internal-tool"
 def is_cors_preflight(method: str, headers) -> bool:
@@ -41,7 +39,7 @@ def require_admin(request: Request):
        hdr = request.headers.get(INTERNAL_TOOL_HEADER)
        if hdr and secrets.compare_digest(hdr, INTERNAL_TOOL_TOKEN):
            return
-        if getattr(request.state, "current_user", None) == INTERNAL_TOOL_USER:
+        if getattr(request.state, "current_user", None) == "internal-tool":
            return
    except Exception:
        pass
@@ -67,9 +65,10 @@ class SecurityHeadersMiddleware(BaseHTTPMiddleware):
        response = await call_next(request)
        path = request.url.path
-        # Tool render endpoints
+        # Tool render endpoints are served inside iframes — allow framing by self
        is_tool_render = path.startswith("/api/tools/") and path.endswith("/render")
-        # Document library PDF preview endpoint
+        # PDF previews are embedded by the in-app document library. Keep the
        # exception route-scoped so normal app pages remain unframeable.
        is_document_pdf_preview = path.startswith("/api/document/") and path.endswith("/render-pdf")
        # Visual report pages are self-contained HTML — need inline scripts + external images
        is_report = path.startswith("/api/research/report/")
@@ -96,7 +95,9 @@ class SecurityHeadersMiddleware(BaseHTTPMiddleware):
                "frame-ancestors 'none'"
            )
        elif is_tool_render:
-            # Skip framing headers for tools.
+            # Tool iframe content: skip all framing headers — the iframe's
            # sandbox="allow-scripts" attribute provides isolation.
            # Don't overwrite the route's own restrictive CSP either.
            pass
        elif is_document_pdf_preview:
            response.headers["X-Frame-Options"] = "SAMEORIGIN"
@@ -28,14 +28,6 @@ services:
      # land under /app/.local for the odysseus user. Persist them so a
      # container recreate does not silently remove installed serve engines.
      - ${APP_DATA_DIR:-./data}/local:/app/.local:z
      # Docker socket — lets Cookbook launch commands like
      # `docker exec ollama-rocm ollama show <tag>` reach the host's
      # Docker daemon (and sibling containers like ollama-rocm /
      # ollama-test). The in-container user needs to be in the
      # socket's owning group — see `group_add` below; the GID
      # there must match the host's `docker` group (defaults to 963
      # on Debian, 999 on Ubuntu — override via env if yours differs).
      - /var/run/docker.sock:/var/run/docker.sock
    extra_hosts:
      # Lets the container reach local services on the Docker host, including
      # Ollama at http://host.docker.internal:11434.
@@ -68,13 +60,6 @@ services:
      - ODYSSEUS_INPROCESS_TASKS=${ODYSSEUS_INPROCESS_TASKS:-1}
      - ODYSSEUS_SCRIPT_HOST=${ODYSSEUS_SCRIPT_HOST:-localhost}
      - ODYSSEUS_CHAT_UPLOAD_MAX_BYTES=${ODYSSEUS_CHAT_UPLOAD_MAX_BYTES:-10485760}
      - ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES=${ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES:-104857600}
      - ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES=${ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES:-26214400}
      - ODYSSEUS_MEMORY_IMPORT_MAX_BYTES=${ODYSSEUS_MEMORY_IMPORT_MAX_BYTES:-10485760}
      - ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES=${ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES:-26214400}
      - ODYSSEUS_EMAIL_COMPOSE_UPLOAD_MAX_BYTES=${ODYSSEUS_EMAIL_COMPOSE_UPLOAD_MAX_BYTES:-26214400}
      - ODYSSEUS_STT_MAX_AUDIO_BYTES=${ODYSSEUS_STT_MAX_AUDIO_BYTES:-26214400}
      - ODYSSEUS_ICS_MAX_BYTES=${ODYSSEUS_ICS_MAX_BYTES:-10485760}
      - DATA_BRAVE_API_KEY=${DATA_BRAVE_API_KEY:-}
      - GOOGLE_API_KEY=${GOOGLE_API_KEY:-}
      - GOOGLE_PSE_CX=${GOOGLE_PSE_CX:-}
@@ -101,7 +86,6 @@ services:
      - /dev/kfd
      - /dev/dri
    group_add:
      - "${DOCKER_GID:-963}"
      - video
      - ${RENDER_GID:-render}
@@ -27,16 +27,6 @@ services:
      # land under /app/.local for the odysseus user. Persist them so a
      # container recreate does not silently remove installed serve engines.
      - ${APP_DATA_DIR:-./data}/local:/app/.local:z
      # Docker socket — lets Cookbook launch commands like
      # `docker exec ollama-rocm ollama show <tag>` reach the host's
      # Docker daemon (and sibling containers like ollama-rocm /
      # ollama-test). The in-container user needs to be in the
      # socket's owning group — see `group_add` below; the GID
      # there must match the host's `docker` group (defaults to 963
      # on Debian, 999 on Ubuntu — override via env if yours differs).
      - /var/run/docker.sock:/var/run/docker.sock
    group_add:
      - "${DOCKER_GID:-963}"
    extra_hosts:
      # Lets the container reach local services on the Docker host, including
      # Ollama at http://host.docker.internal:11434.
@@ -69,13 +59,6 @@ services:
      - ODYSSEUS_INPROCESS_TASKS=${ODYSSEUS_INPROCESS_TASKS:-1}
      - ODYSSEUS_SCRIPT_HOST=${ODYSSEUS_SCRIPT_HOST:-localhost}
      - ODYSSEUS_CHAT_UPLOAD_MAX_BYTES=${ODYSSEUS_CHAT_UPLOAD_MAX_BYTES:-10485760}
      - ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES=${ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES:-104857600}
      - ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES=${ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES:-26214400}
      - ODYSSEUS_MEMORY_IMPORT_MAX_BYTES=${ODYSSEUS_MEMORY_IMPORT_MAX_BYTES:-10485760}
      - ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES=${ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES:-26214400}
      - ODYSSEUS_EMAIL_COMPOSE_UPLOAD_MAX_BYTES=${ODYSSEUS_EMAIL_COMPOSE_UPLOAD_MAX_BYTES:-26214400}
      - ODYSSEUS_STT_MAX_AUDIO_BYTES=${ODYSSEUS_STT_MAX_AUDIO_BYTES:-26214400}
      - ODYSSEUS_ICS_MAX_BYTES=${ODYSSEUS_ICS_MAX_BYTES:-10485760}
      - DATA_BRAVE_API_KEY=${DATA_BRAVE_API_KEY:-}
      - GOOGLE_API_KEY=${GOOGLE_API_KEY:-}
      - GOOGLE_PSE_CX=${GOOGLE_PSE_CX:-}
@@ -16,16 +16,6 @@ services:
      # land under /app/.local for the odysseus user. Persist them so a
      # container recreate does not silently remove installed serve engines.
      - ${APP_DATA_DIR:-./data}/local:/app/.local:z
      # Docker socket — lets Cookbook launch commands like
      # `docker exec ollama-rocm ollama show <tag>` reach the host's
      # Docker daemon (and sibling containers like ollama-rocm /
      # ollama-test). The in-container user needs to be in the
      # socket's owning group — see `group_add` below; the GID
      # there must match the host's `docker` group (defaults to 963
      # on Debian, 999 on Ubuntu — override via env if yours differs).
      - /var/run/docker.sock:/var/run/docker.sock
    group_add:
      - "${DOCKER_GID:-963}"
    extra_hosts:
      # Lets the container reach local services on the Docker host, including
      # Ollama at http://host.docker.internal:11434.
@@ -58,13 +48,6 @@ services:
      - ODYSSEUS_INPROCESS_TASKS=${ODYSSEUS_INPROCESS_TASKS:-1}
      - ODYSSEUS_SCRIPT_HOST=${ODYSSEUS_SCRIPT_HOST:-localhost}
      - ODYSSEUS_CHAT_UPLOAD_MAX_BYTES=${ODYSSEUS_CHAT_UPLOAD_MAX_BYTES:-10485760}
      - ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES=${ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES:-104857600}
      - ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES=${ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES:-26214400}
      - ODYSSEUS_MEMORY_IMPORT_MAX_BYTES=${ODYSSEUS_MEMORY_IMPORT_MAX_BYTES:-10485760}
      - ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES=${ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES:-26214400}
      - ODYSSEUS_EMAIL_COMPOSE_UPLOAD_MAX_BYTES=${ODYSSEUS_EMAIL_COMPOSE_UPLOAD_MAX_BYTES:-26214400}
      - ODYSSEUS_STT_MAX_AUDIO_BYTES=${ODYSSEUS_STT_MAX_AUDIO_BYTES:-26214400}
      - ODYSSEUS_ICS_MAX_BYTES=${ODYSSEUS_ICS_MAX_BYTES:-10485760}
      - DATA_BRAVE_API_KEY=${DATA_BRAVE_API_KEY:-}
      - GOOGLE_API_KEY=${GOOGLE_API_KEY:-}
      - GOOGLE_PSE_CX=${GOOGLE_PSE_CX:-}
@@ -13,8 +13,6 @@ set -e
 PUID="${PUID:-1000}"
 PGID="${PGID:-1000}"
 GOSU_BIN="$(command -v gosu)"
 PYTHON_BIN="$(command -v python)"
 # Reuse an existing matching group/user if the host's UID/GID already
 # corresponds to one in /etc/passwd (e.g. when the image is rebuilt
@@ -26,78 +24,26 @@ if ! getent passwd "$PUID" >/dev/null 2>&1; then
    useradd -u "$PUID" -g "$PGID" -M -s /bin/sh -d /app odysseus
 fi
-ODY_USER="$(getent passwd "$PUID" | cut -d: -f1)"
+# Repair ownership on every writable path the app touches at runtime.
-[ -z "$ODY_USER" ] && ODY_USER=odysseus
+#
-
+# Bind-mounted dirs (/app/data, /app/logs) are the obvious ones, but
-# Docker-socket group plumbing. When /var/run/docker.sock is bind-mounted
+# the app ALSO writes inside the image's own source tree at runtime:
-# (Cookbook uses docker exec to reach sibling containers), the socket is
+#   - services/cache/{search,content}/*  (search cache LRU)
-# owned by root:<host docker gid>. Add the app user to that group and later
+#   - services/search_analytics.json
-# call gosu by username so supplementary groups are retained.
+#   - services/search_engine_error.log
-DOCKER_SOCK="${DOCKER_SOCK:-/var/run/docker.sock}"
+#   - services/tts cache, etc.
-if [ -S "$DOCKER_SOCK" ]; then
+# These dirs were created as root during `docker build`, so dropping
-    SOCK_GID="$(stat -c '%g' "$DOCKER_SOCK" 2>/dev/null || echo '')"
+# to PUID:PGID would otherwise crash on the first import that tries
-    if [ -n "$SOCK_GID" ] && [ "$SOCK_GID" != "0" ]; then
+# to mkdir them. Chown the whole /app tree — fast (<1s on this size)
-        if ! getent group "$SOCK_GID" >/dev/null 2>&1; then
+# and idempotent via the `-not -uid` filter so we only touch files
-            groupadd -g "$SOCK_GID" docker_host || true
+# that need fixing.
-        fi
+for dir in /app /app/data /app/logs; do
        SOCK_GROUP="$(getent group "$SOCK_GID" | cut -d: -f1)"
        if [ -n "$SOCK_GROUP" ]; then
            usermod -aG "$SOCK_GROUP" "$ODY_USER" 2>/dev/null || true
        fi
    fi
 fi
 mount_root_for() {
    awk -v target="$1" '$5 == target { print $4; exit }' /proc/self/mountinfo 2>/dev/null || true
 }
 is_broad_mount_root() {
    case "$1" in
        /|/home|/srv|/var|/usr|/opt|/tmp|/mnt|/media)
            return 0
            ;;
    esac
    return 1
 }
 repair_tree_ownership() {
    dir="$1"
    if [ -d "$dir" ]; then
-        find "$dir" -xdev -not -uid "$PUID" -print0 2>/dev/null \
+        # `find ... -not -uid` keeps this O(touched-files), not
        # O(everything), so terabyte-sized maildirs don't slow startup.
        find "$dir" -not -uid "$PUID" -print0 2>/dev/null \
            | xargs -0 -r chown "$PUID:$PGID" 2>/dev/null || true
    fi
 }
 repair_app_tree_ownership() {
    if [ -d /app ]; then
        find /app -xdev \
            \( -path /app/data -o -path /app/logs -o -path /app/.ssh -o -path /app/.cache -o -path /app/.local \) -prune \
            -o -not -uid "$PUID" -print0 2>/dev/null \
            | xargs -0 -r chown "$PUID:$PGID" 2>/dev/null || true
    fi
 }
 repair_bind_mount_ownership() {
    dir="$1"
    if [ ! -d "$dir" ]; then
        return
    fi
    mount_root="$(mount_root_for "$dir")"
    if is_broad_mount_root "$mount_root"; then
        echo "Skipping recursive ownership repair for $dir because it maps to broad host path $mount_root" >&2
        chown "$PUID:$PGID" "$dir" 2>/dev/null || true
        return
    fi
    repair_tree_ownership "$dir"
 }
 # Repair image-owned writable paths without walking into bind-mounted host
 # trees, then repair the app-owned mount roots separately.
 repair_app_tree_ownership
 for dir in /app/data /app/logs /app/.ssh /app/.cache/huggingface /app/.local; do
    repair_bind_mount_ownership "$dir"
 done
 # Cookbook installs vllm/etc. via `pip install --user`, which pulls
@@ -124,7 +70,6 @@ for cu in \
        break
    fi
 done
 # Disable the FlashInfer JIT sampler unconditionally — it is sampler-only
 # and has no impact on the attention path, but requires nvcc + matching
 # CUDA headers at startup. Without this, vLLM crashes with "Could not find
@@ -138,9 +83,9 @@ export PATH="/app/.local/bin:$PATH"
 # Run first-time setup as the app user so data/ files get the right ownership.
 # setup.py is idempotent — skips auth.json / .env if they already exist.
 # || true so a setup failure never prevents the container from starting.
-"$GOSU_BIN" "$ODY_USER" "$PYTHON_BIN" /app/setup.py || true
+gosu "$PUID:$PGID" python /app/setup.py || true
 # Drop root and run the actual app. `gosu` is preferred over `su` /
 # `sudo` because it cleans up the process tree (no extra shell layer)
 # so signals (SIGTERM from `docker stop`) reach uvicorn directly.
-exec "$GOSU_BIN" "$ODY_USER" "$@"
+exec gosu "$PUID:$PGID" "$@"
@@ -105,14 +105,6 @@ if (-not $pyExe) {
    }
 }
 if ($pyExe -like "*WindowsApps*python.exe") {
    $pyCmd = Get-Command py -ErrorAction SilentlyContinue
    if ($pyCmd) {
        $pyExe = $pyCmd.Source
        $pyArgs = @("-3.11")
    }
 }
 if (-not $pyExe) {
    Fail "Couldn't find Python 3.11+ for Windows setup. Install Python 3.11+ (or open the Python launcher with 'py -3.11') from https://www.python.org/downloads/, then re-run this script."
 }
@@ -1,142 +0,0 @@
 # launcher.py
 """Dedicated entrypoint for the standalone Windows portable launcher.
 Handles:
 - Immediate GUI splash screen creation using tkinter.
 - Suppressing console stream crashes in windowed GUI mode via NullWriter.
 - Spawning system tray icon via pystray and Pillow (lazy-loaded).
 - Auto-opening default browser pointing to the running backend.
 - Launching the FastAPI server (importing and running app.py).
 """
 import os
 import sys
 import threading
 import time
 import webbrowser
 # Define a dummy NullWriter to suppress standard stream crashes (isatty etc.) in GUI mode
 class NullWriter:
    def write(self, text):
        pass
    def flush(self):
        pass
    def isatty(self):
        return False
 if sys.stdout is None:
    sys.stdout = NullWriter()
 if sys.stderr is None:
    sys.stderr = NullWriter()
 splash_root = None
 # If running from a frozen PyInstaller bundle, launch the splash screen IMMEDIATELY
 if getattr(sys, 'frozen', False):
    import tkinter as tk
    def show_splash_instantly():
        global splash_root
        try:
            splash_root = tk.Tk()
            splash_root.title("Odysseus")
            splash_root.overrideredirect(True)
            splash_root.configure(bg="#1a1c23")
            # Accented borders
            splash_root.config(highlightbackground="#e06c75", highlightcolor="#e06c75", highlightthickness=1)
            w, h = 360, 160
            ws = splash_root.winfo_screenwidth()
            hs = splash_root.winfo_screenheight()
            x = (ws - w) // 2
            y = (hs - h) // 2
            splash_root.geometry(f"{w}x{h}+{x}+{y}")
            tk.Label(splash_root, text="⛵ Odysseus", font=("Segoe UI", 22, "bold"), bg="#1a1c23", fg="#e06c75").pack(pady=(22, 2))
            tk.Label(splash_root, text="Launching background services...", font=("Segoe UI", 10), bg="#1a1c23", fg="#d1d4e0").pack(pady=2)
            tk.Label(splash_root, text="Please wait, this will take a few seconds.", font=("Segoe UI", 8, "italic"), bg="#1a1c23", fg="#5c6370").pack(pady=(12, 0))
            splash_root.attributes("-topmost", True)
            splash_root.mainloop()
        except Exception:
            pass
    # Launch the GUI splash screen immediately on a background thread
    threading.Thread(target=show_splash_instantly, daemon=True).start()
 def create_tray_image():
    # Generate a beautiful 64x64 icon matching Odysseus brand red accent (#e06c75)
    from PIL import Image, ImageDraw
    image = Image.new('RGBA', (64, 64), (0, 0, 0, 0))
    dc = ImageDraw.Draw(image)
    accent_red = (224, 108, 117, 255)
    light_red = (224, 108, 117, 150)
    # Draw premium sailing boat
    dc.polygon([(32, 10), (32, 45), (12, 45)], fill=accent_red)
    dc.polygon([(32, 18), (32, 45), (48, 45)], fill=light_red)
    dc.polygon([(8, 48), (56, 48), (44, 56), (20, 56)], fill=accent_red)
    return image
 def on_open_browser(icon, item, url):
    webbrowser.open(url)
 def on_exit(icon, item):
    icon.stop()
    os._exit(0)
 def setup_system_tray(url):
    try:
        import pystray
        icon_img = create_tray_image()
        menu = (
            pystray.MenuItem('Open Odysseus', lambda icon, item: on_open_browser(icon, item, url), default=True),
            pystray.MenuItem('Exit', on_exit)
        )
        tray_icon = pystray.Icon(
            "Odysseus",
            icon_img,
            "Odysseus",
            menu
        )
        tray_icon.run()
    except Exception:
        pass
 def open_browser(url):
    # Allow uvicorn and app lifecycles to complete warmups
    time.sleep(3.5)
    # Safely close the splash screen
    try:
        global splash_root
        if splash_root:
            splash_root.after(0, splash_root.destroy)
    except Exception:
        pass
    webbrowser.open(url)
 if __name__ == "__main__":
    import uvicorn
    # Import the FastAPI app from app.py
    from app import app
    bind_host = os.getenv("APP_BIND", "127.0.0.1")
    bind_port = int(os.getenv("APP_PORT", "7000"))
    url = f"http://{bind_host}:{bind_port}"
    if getattr(sys, 'frozen', False):
        # Start browser manager thread
        threading.Thread(target=open_browser, args=(url,), daemon=True).start()
        # Start system tray manager thread
        threading.Thread(target=setup_system_tray, args=(url,), daemon=True).start()
    uvicorn.run(app, host=bind_host, port=bind_port, log_level="info")
@@ -23,7 +23,6 @@ import os.path
 from pathlib import Path
 from datetime import datetime, timedelta
 import uuid
 from contextvars import ContextVar
 from mcp.server import Server
 from mcp.server.stdio import stdio_server
@@ -56,8 +55,6 @@ def _uid_fetch_rows(data) -> list:
 # flat keys when no DB row matches (legacy single-account behaviour).
 _ACCOUNT_CACHE: dict = {}  # key = normalized account selector -> config dict
 _MCP_OWNER_ARG = "_odysseus_owner"
 _CURRENT_OWNER: ContextVar[str | None] = ContextVar("email_mcp_owner", default=None)
 def _clean_header_value(value) -> str:
@@ -71,45 +68,6 @@ def _db_path() -> Path:
    return Path(APP_DB)
 def _current_owner() -> str:
    owner = _CURRENT_OWNER.get()
    return str(owner or "").strip()
 def _account_visible_to_owner(row: dict, owner: str) -> bool:
    row_owner = str(row.get("owner") or "").strip()
    if row_owner == owner:
        return True
    if row_owner:
        return False
    # Legacy ownerless accounts are only visible to a scoped caller when the
    # mailbox itself matches the owner, mirroring the HTTP email route fallback.
    owner_l = owner.lower()
    return owner_l in {
        str(row.get("imap_user") or "").strip().lower(),
        str(row.get("from_address") or "").strip().lower(),
    }
 def _filter_accounts_for_owner(rows: list[dict]) -> list[dict]:
    owner = _current_owner()
    if owner:
        return [r for r in rows if _account_visible_to_owner(r, owner)]
    owners = {str(r.get("owner") or "").strip() for r in rows if str(r.get("owner") or "").strip()}
    if len(owners) > 1:
        return []
    return rows
 def _mcp_owner_required(rows: list[dict] | None = None) -> bool:
    if _current_owner():
        return False
    rows = rows if rows is not None else _read_accounts_from_db()
    owners = {str(r.get("owner") or "").strip() for r in rows if str(r.get("owner") or "").strip()}
    return len(owners) > 1
 def _load_email_writing_style() -> str:
    """Return the existing Settings > Email > Writing Style value."""
    try:
@@ -163,8 +121,9 @@ def _default_document_owner() -> str | None:
        return None
-def _read_accounts_from_db() -> list:
+def _list_accounts_raw() -> list:
-    """Return all enabled email account rows. Empty list if missing. Never raises."""
+    """Return list of dicts from the email_accounts table. Empty list if table
    missing or empty. Never raises."""
    path = _db_path()
    if not path.exists():
        return []
@@ -172,10 +131,9 @@ def _read_accounts_from_db() -> list:
        conn = sqlite3.connect(str(path))
        conn.row_factory = sqlite3.Row
        columns = {r[1] for r in conn.execute("PRAGMA table_info(email_accounts)").fetchall()}
        owner_select = "owner" if "owner" in columns else "NULL AS owner"
        smtp_security_select = "smtp_security" if "smtp_security" in columns else "'' AS smtp_security"
        rows = conn.execute(f"""
-            SELECT id, {owner_select}, name, is_default, enabled,
+            SELECT id, name, is_default, enabled,
                   imap_host, imap_port, imap_user, imap_password, imap_starttls,
                   smtp_host, smtp_port, {smtp_security_select}, smtp_user, smtp_password, from_address
            FROM email_accounts WHERE enabled = 1
@@ -189,15 +147,11 @@ def _read_accounts_from_db() -> list:
        return []
-def _list_accounts_raw() -> list:
+def _resolve_account(selector: str | None) -> dict | None:
    """Return owner-visible email account rows for the active MCP call."""
    return _filter_accounts_for_owner(_read_accounts_from_db())
 def _resolve_account_from_rows(rows: list[dict], selector: str | None) -> dict | None:
    """Given a selector (None = default, or a name/user/id string), return the
    matching row or None. Matching is case-insensitive substring on name +
    imap_user + from_address, plus exact id match."""
    rows = _list_accounts_raw()
    if not rows:
        return None
    if not selector:
@@ -232,10 +186,6 @@ def _resolve_account_from_rows(rows: list[dict], selector: str | None) -> dict |
    return None
 def _resolve_account(selector: str | None) -> dict | None:
    return _resolve_account_from_rows(_list_accounts_raw(), selector)
 def _load_config(account: str | None = None) -> dict:
    """Return the full config dict for the requested account (or default).
@@ -244,7 +194,7 @@ def _load_config(account: str | None = None) -> dict:
      2. env vars + settings.json flat keys (legacy)
      3. hardcoded fallbacks (localhost:31143 etc.)
    """
-    cache_key = (_current_owner(), (account or "").strip().lower() or "__default__")
+    cache_key = (account or "").strip().lower() or "__default__"
    if cache_key in _ACCOUNT_CACHE:
        return _ACCOUNT_CACHE[cache_key]
@@ -273,11 +223,8 @@ def _load_config(account: str | None = None) -> dict:
        "account_name": None,
    }
-    raw_rows = _read_accounts_from_db()
+    rows = _list_accounts_raw()
-    rows = _filter_accounts_for_owner(raw_rows)
+    row = _resolve_account(account)
    row = _resolve_account_from_rows(rows, account)
    if _current_owner() and raw_rows and not rows:
        raise ValueError("No email account is configured for the authenticated owner")
    if account and rows and not row:
        available = ", ".join(
            f"{r.get('name') or r.get('imap_user')} <{r.get('imap_user') or r.get('from_address') or '?'}>"
@@ -1006,7 +953,7 @@ def _stash_agent_draft(*, to, subject, body, in_reply_to=None, references=None,
            now,
            account or None,
            "agent_draft",
-            _current_owner(),
+            "",
        ))
        conn.commit()
        conn.close()
@@ -1192,7 +1139,7 @@ def _create_email_draft_document(
    doc_id = str(uuid.uuid4())
    ver_id = str(uuid.uuid4())
    doc_title = (title or subject or "Email draft").strip() or "Email draft"
-    doc_owner = _current_owner() or _default_document_owner()
+    doc_owner = _default_document_owner()
    db = SessionLocal()
    try:
@@ -1978,22 +1925,10 @@ async def list_tools() -> list[Tool]:
@server.call_tool()
 async def call_tool(name: str, arguments: dict) -> list[TextContent]:
    arguments = dict(arguments) if isinstance(arguments, dict) else {}
    owner = str(arguments.pop(_MCP_OWNER_ARG, "") or "").strip()
    owner_token = _CURRENT_OWNER.set(owner or None)
    try:
        all_db_accounts = _read_accounts_from_db()
        if _mcp_owner_required(all_db_accounts):
            return [TextContent(
                type="text",
                text="Error: email MCP requires an authenticated owner when multiple email account owners are configured.",
            )]
        if name == "list_email_accounts":
-            rows = _filter_accounts_for_owner(all_db_accounts)
+            rows = _list_accounts_raw()
            if not rows:
                if all_db_accounts and owner:
                    return [TextContent(type="text", text="No email accounts configured for this owner.")]
                return [TextContent(type="text", text="No email accounts configured. Legacy single-account mode active.")]
            lines = [f"Found {len(rows)} email account(s):\n"]
            for r in rows:
@@ -2173,16 +2108,6 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
                bcc=arguments.get("bcc"),
                account=acct,
            )
            if "error" in result:
                return [TextContent(type="text", text=f"Error: {result['error']}")]
            if result.get("pending"):
                return [TextContent(
                    type="text",
                    text=(
                        f"Draft staged for approval (pending id: {result.get('pending_id')}). "
                        "Nothing has been sent yet. Review and approve it in Odysseus before delivery."
                    ),
                )]
            acct_note = f" (from {result['account']})" if result.get("account") else ""
            return [TextContent(type="text", text=f"Sent email to {result['to']} with subject '{result['subject']}'{acct_note}.")]
@@ -2358,8 +2283,6 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
    except Exception as e:
        return [TextContent(type="text", text=f"Error: {e}")]
    finally:
        _CURRENT_OWNER.reset(owner_token)
 # ── Main ──
@@ -6,7 +6,6 @@ Imports MemoryManager and MemoryVectorStore from the Odysseus codebase.
 """
 import asyncio
 import os
 import sys
 import time
 from pathlib import Path
@@ -24,55 +23,6 @@ _memory_manager = None
 _memory_vector = None
 _initialized = False
 _OWNER_ENV_KEYS = ("ODYSSEUS_MCP_MEMORY_OWNER", "ODYSSEUS_MEMORY_OWNER")
 _OWNER_SCOPE_ERROR = (
    "Error: Memory MCP owner is not configured for an owner-scoped memory store. "
    "Set ODYSSEUS_MCP_MEMORY_OWNER for this server or use the owner-aware native memory tool."
 )
 def _configured_owner() -> str | None:
    for key in _OWNER_ENV_KEYS:
        owner = os.environ.get(key, "").strip()
        if owner:
            return owner
    return None
 def _entry_owner(entry: dict) -> str | None:
    owner = entry.get("owner")
    if owner is None:
        return None
    owner_text = str(owner).strip()
    return owner_text or None
 def _owner_scoped_store(entries: list[dict]) -> bool:
    return any(_entry_owner(entry) for entry in entries if isinstance(entry, dict))
 def _scope_entries() -> tuple[str | None, list[dict], list[dict], str | None]:
    """Return configured owner, all entries, visible entries, and optional error."""
    entries = _memory_manager.load_all()
    owner = _configured_owner()
    if owner is None and _owner_scoped_store(entries):
        return None, entries, [], _OWNER_SCOPE_ERROR
    if owner is None:
        visible = [
            entry for entry in entries
            if isinstance(entry, dict) and _entry_owner(entry) is None
        ]
    else:
        visible = [
            entry for entry in entries
            if isinstance(entry, dict) and _entry_owner(entry) == owner
        ]
    return owner, entries, visible, None
 def _text_result(text: str) -> list[TextContent]:
    return [TextContent(type="text", text=text)]
 def _ensure_init():
    """Lazy-init memory managers on first use."""
@@ -125,26 +75,24 @@ async def list_tools() -> list[Tool]:
@server.call_tool()
 async def call_tool(name: str, arguments: dict) -> list[TextContent]:
    if name != "manage_memory":
-        return _text_result(f"Unknown tool: {name}")
+        return [TextContent(type="text", text=f"Unknown tool: {name}")]
    _ensure_init()
    if not _memory_manager:
-        return _text_result("Error: Memory manager not available")
+        return [TextContent(type="text", text="Error: Memory manager not available")]
    action = arguments.get("action", "")
    if action == "list":
        category_filter = arguments.get("category", "")
-        _owner, _all_memories, memories, scope_error = _scope_entries()
+        memories = _memory_manager.load()
        if scope_error:
            return _text_result(scope_error)
        if category_filter:
            memories = [m for m in memories if m.get("category", "").lower() == category_filter.lower()]
        if not memories:
            msg = "No memories found"
            if category_filter:
                msg += f" in category '{category_filter}'"
-            return _text_result(msg + ".")
+            return [TextContent(type="text", text=msg + ".")]
        lines = [f"Found {len(memories)} memory entries:\n"]
        for m in memories:
@@ -154,17 +102,15 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
            if len(text) > 150:
                text = text[:150] + "..."
            lines.append(f"- [{cat}] `{mid}` — {text}")
-        return _text_result("\n".join(lines))
+        return [TextContent(type="text", text="\n".join(lines))]
    elif action == "add":
        text = arguments.get("text", "")
        category = arguments.get("category", "fact")
        if not text:
-            return _text_result("Error: Memory text cannot be empty")
+            return [TextContent(type="text", text="Error: Memory text cannot be empty")]
-        owner, memories, _visible, scope_error = _scope_entries()
+        entry = _memory_manager.add_entry(text, source="ai_agent", category=category)
-        if scope_error:
+        memories = _memory_manager.load_all()
            return _text_result(scope_error)
        entry = _memory_manager.add_entry(text, source="ai_agent", category=category, owner=owner)
        memories.append(entry)
        _memory_manager.save(memories)
        if _memory_vector and _memory_vector.healthy:
@@ -172,28 +118,25 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
                _memory_vector.add(entry["id"], text)
            except Exception:
                pass
-        return _text_result(f"Memory added: [{category}] {text} (id: {entry['id'][:8]})")
+        return [TextContent(type="text", text=f"Memory added: [{category}] {text} (id: {entry['id'][:8]})")]
    elif action == "edit":
        memory_id = arguments.get("memory_id", "")
        new_text = arguments.get("text", "")
        if not memory_id or not new_text:
-            return _text_result("Error: edit needs memory_id and text")
+            return [TextContent(type="text", text="Error: edit needs memory_id and text")]
-        _owner, memories, visible, scope_error = _scope_entries()
+        memories = _memory_manager.load_all()
-        if scope_error:
+        found = False
            return _text_result(scope_error)
        full_id = None
        for m in visible:
            if m.get("id", "").startswith(memory_id):
                full_id = m["id"]
                break
        if not full_id:
            return _text_result(f"Error: Memory '{memory_id}' not found")
        for m in memories:
-            if m.get("id") == full_id:
+            if m.get("id", "").startswith(memory_id):
                m["text"] = new_text
                m["timestamp"] = int(time.time())
                found = True
                full_id = m["id"]
                break
        if not found:
            return [TextContent(type="text", text=f"Error: Memory '{memory_id}' not found")]
        _memory_manager.save(memories)
        if _memory_vector and _memory_vector.healthy and full_id:
            try:
@@ -201,26 +144,24 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
                _memory_vector.add(full_id, new_text)
            except Exception:
                pass
-        return _text_result(f"Memory updated: {new_text}")
+        return [TextContent(type="text", text=f"Memory updated: {new_text}")]
    elif action == "delete":
        memory_id = arguments.get("memory_id", "")
        if not memory_id:
-            return _text_result("Error: delete needs memory_id")
+            return [TextContent(type="text", text="Error: delete needs memory_id")]
-        _owner, memories, visible, scope_error = _scope_entries()
+        memories = _memory_manager.load_all()
        if scope_error:
            return _text_result(scope_error)
        full_id = None
        deleted_text = ""
        deleted_category = ""
-        for m in visible:
+        for m in memories:
            if m.get("id", "").startswith(memory_id):
                full_id = m["id"]
                deleted_text = m.get("text", "")
                deleted_category = m.get("category", "")
                break
        if not full_id:
-            return _text_result(f"Error: Memory '{memory_id}' not found")
+            return [TextContent(type="text", text=f"Error: Memory '{memory_id}' not found")]
        memories = [m for m in memories if m.get("id") != full_id]
        _memory_manager.save(memories)
        if _memory_vector and _memory_vector.healthy and full_id:
@@ -230,32 +171,30 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
                pass
        cat = f"[{deleted_category}] " if deleted_category else ""
        snippet = deleted_text if len(deleted_text) <= 120 else deleted_text[:117] + "..."
-        return _text_result(f"Memory deleted: {cat}{snippet} (id: {memory_id})")
+        return [TextContent(type="text", text=f"Memory deleted: {cat}{snippet} (id: {memory_id})")]
    elif action == "search":
        query = arguments.get("text", "")
        if not query:
-            return _text_result("Error: search needs text (query)")
+            return [TextContent(type="text", text="Error: search needs text (query)")]
-        _owner, _all_memories, memories, scope_error = _scope_entries()
+        memories = _memory_manager.load()
        if scope_error:
            return _text_result(scope_error)
        if hasattr(_memory_manager, 'get_relevant_memories'):
            results = _memory_manager.get_relevant_memories(query, memories, threshold=0.05, max_items=20)
        else:
            query_lower = query.lower()
            results = [m for m in memories if query_lower in m.get("text", "").lower()][:20]
        if not results:
-            return _text_result(f"No memories found matching '{query}'.")
+            return [TextContent(type="text", text=f"No memories found matching '{query}'.")]
        lines = [f"Found {len(results)} matching memories:\n"]
        for m in results:
            cat = m.get("category", "fact")
            mid = m.get("id", "?")[:8]
            text = m.get("text", "")
            lines.append(f"- [{cat}] `{mid}` — {text}")
-        return _text_result("\n".join(lines))
+        return [TextContent(type="text", text="\n".join(lines))]
    else:
-        return _text_result(f"Error: Unknown action '{action}'. Use: list, add, edit, delete, search")
+        return [TextContent(type="text", text=f"Error: Unknown action '{action}'. Use: list, add, edit, delete, search")]
 async def run():
@@ -4,19 +4,93 @@
  "requires": true,
  "packages": {
    "": {
      "dependencies": {
        "@anthropic-ai/sdk": "^0.104.1"
      },
      "devDependencies": {
-        "@antithesishq/bombadil": "^0.6.1"
+        "@antithesishq/bombadil": "^0.5.0"
      }
    },
    "node_modules/@anthropic-ai/sdk": {
      "version": "0.104.1",
      "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.104.1.tgz",
      "integrity": "sha512-gGACa/+IaiXzRRmF96aOhamoBgapKRBiFWbmmTFP8aMkpaEcuStF+Q61bjo4vPxBM7gqWJNZqsngslRdnLHv0Q==",
      "license": "MIT",
      "dependencies": {
        "json-schema-to-ts": "^3.1.1",
        "standardwebhooks": "^1.0.0"
      },
      "bin": {
        "anthropic-ai-sdk": "bin/cli"
      },
      "peerDependencies": {
        "zod": "^3.25.0 || ^4.0.0"
      },
      "peerDependenciesMeta": {
        "zod": {
          "optional": true
        }
      }
    },
    "node_modules/@antithesishq/bombadil": {
-      "version": "0.6.1",
+      "version": "0.5.0",
-      "resolved": "https://registry.npmjs.org/@antithesishq/bombadil/-/bombadil-0.6.1.tgz",
+      "resolved": "https://registry.npmjs.org/@antithesishq/bombadil/-/bombadil-0.5.0.tgz",
-      "integrity": "sha512-d1iufG3MI7gSMSiSmMeNdcMW+qR0yQXL2zdkVynC3n3DYgFJYlYXKUQzygmqU12m4RWlR5iOdQU1hsx5UT6+IA==",
+      "integrity": "sha512-s0zImmr0iyvSP6QcVLvf40CUiZYIdWBAxiq20uhzujwvfitYa3PGJN652k/pLtVccHM/JrGQxZdvLnihZpltHA==",
      "dev": true,
      "license": "MIT",
      "bin": {
        "bombadil": "bin/bombadil.js"
      }
    },
    "node_modules/@babel/runtime": {
      "version": "7.29.7",
      "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.29.7.tgz",
      "integrity": "sha512-Nq8OhGWiZIZGV6hLHoyAKLLcJihP/xFeBMGJoUrxTX2psI8dCifzLhZISFb+VWS3wFMRDmCGw5R+dOySCqPLhw==",
      "license": "MIT",
      "engines": {
        "node": ">=6.9.0"
      }
    },
    "node_modules/@stablelib/base64": {
      "version": "1.0.1",
      "resolved": "https://registry.npmjs.org/@stablelib/base64/-/base64-1.0.1.tgz",
      "integrity": "sha512-1bnPQqSxSuc3Ii6MhBysoWCg58j97aUjuCSZrGSmDxNqtytIi0k8utUenAwTZN4V5mXXYGsVUI9zeBqy+jBOSQ==",
      "license": "MIT"
    },
    "node_modules/fast-sha256": {
      "version": "1.3.0",
      "resolved": "https://registry.npmjs.org/fast-sha256/-/fast-sha256-1.3.0.tgz",
      "integrity": "sha512-n11RGP/lrWEFI/bWdygLxhI+pVeo1ZYIVwvvPkW7azl/rOy+F3HYRZ2K5zeE9mmkhQppyv9sQFx0JM9UabnpPQ==",
      "license": "Unlicense"
    },
    "node_modules/json-schema-to-ts": {
      "version": "3.1.1",
      "resolved": "https://registry.npmjs.org/json-schema-to-ts/-/json-schema-to-ts-3.1.1.tgz",
      "integrity": "sha512-+DWg8jCJG2TEnpy7kOm/7/AxaYoaRbjVB4LFZLySZlWn8exGs3A4OLJR966cVvU26N7X9TWxl+Jsw7dzAqKT6g==",
      "license": "MIT",
      "dependencies": {
        "@babel/runtime": "^7.18.3",
        "ts-algebra": "^2.0.0"
      },
      "engines": {
        "node": ">=16"
      }
    },
    "node_modules/standardwebhooks": {
      "version": "1.0.0",
      "resolved": "https://registry.npmjs.org/standardwebhooks/-/standardwebhooks-1.0.0.tgz",
      "integrity": "sha512-BbHGOQK9olHPMvQNHWul6MYlrRTAOKn03rOe4A8O3CLWhNf4YHBqq2HJKKC+sfqpxiBY52pNeesD6jIiLDz8jg==",
      "license": "MIT",
      "dependencies": {
        "@stablelib/base64": "^1.0.0",
        "fast-sha256": "^1.3.0"
      }
    },
    "node_modules/ts-algebra": {
      "version": "2.0.0",
      "resolved": "https://registry.npmjs.org/ts-algebra/-/ts-algebra-2.0.0.tgz",
      "integrity": "sha512-FPAhNPFMrkwz76P7cdjdmiShwMynZYN6SgOujD1urY4oNm80Ou9oMdmbR45LotcKOXoy7wSmHkRFE6Mxbrhefw==",
      "license": "MIT"
    }
  }
 }
@@ -4,6 +4,9 @@
    "url": "https://github.com/pewdiepie-archdaemon/odysseus.git"
  },
  "devDependencies": {
-    "@antithesishq/bombadil": "^0.6.1"
+    "@antithesishq/bombadil": "^0.5.0"
  },
  "dependencies": {
    "@anthropic-ai/sdk": "^0.104.1"
  }
 }
@@ -160,8 +160,6 @@ def setup_api_token_routes() -> APIRouter:
            payload = await request.json()
        except Exception:
            payload = {}
        if not isinstance(payload, dict):
            payload = {}
        with get_db_session() as db:
            token = db.query(ApiToken).filter(ApiToken.id == token_id).first()
            if not token:
@@ -16,7 +16,6 @@ from pydantic import BaseModel
 from core.database import SessionLocal, CrewMember, ScheduledTask
 from src.auth_helpers import get_current_user
 from core.auth import RESERVED_USERNAMES
 from src.task_scheduler import compute_next_run
@@ -90,11 +89,11 @@ def setup_assistant_routes(task_scheduler) -> APIRouter:
    # check-in tasks seeded. Hitting any /assistant route under one of these
    # used to seed a full CrewMember + Morning/Midday/Evening tasks under that
    # owner, which then double-fired alongside the real user's check-ins.
-    # RESERVED_USERNAMES covers the same set; the `not owner` guard handles "".
+    _SYNTHETIC_OWNERS = frozenset({"internal-tool", "api", "demo", "system", ""})
    async def _get_or_create(owner: str) -> CrewMember:
        """Return the per-owner assistant CrewMember, creating it on demand."""
-        if not owner or owner in RESERVED_USERNAMES:
+        if not owner or owner in _SYNTHETIC_OWNERS:
            raise HTTPException(status_code=400, detail=f"Cannot seed assistant for {owner!r}")
        db = SessionLocal()
        try:
@@ -12,8 +12,8 @@ import re
 from pathlib import Path
 from core.atomic_io import atomic_write_json, atomic_write_text
-from core.auth import AuthManager, RESERVED_USERNAMES, SetAdminResult, TOKEN_TTL
+from core.auth import AuthManager, SetAdminResult
-from src.constants import DEEP_RESEARCH_DIR, MEMORY_FILE, PASSWORD_MIN_LENGTH, SKILLS_DIR
+from src.constants import DEEP_RESEARCH_DIR, MEMORY_FILE, SKILLS_DIR
 from src.rate_limiter import RateLimiter
 from src.settings_scrub import scrub_settings
 from src.settings import (
@@ -102,12 +102,8 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
            raise HTTPException(429, "Too many requests — try again later")
        if auth_manager.is_configured:
            raise HTTPException(400, "Already configured")
-        if len(body.password) < PASSWORD_MIN_LENGTH:
+        if len(body.password) < 8:
-            raise HTTPException(400, f"Password must be at least {PASSWORD_MIN_LENGTH} characters")
+            raise HTTPException(400, "Password must be at least 8 characters")
        if len(body.username.strip()) < 1:
            raise HTTPException(400, "Username is required")
        if body.username.lower() in RESERVED_USERNAMES:
            raise HTTPException(403, "Username is reserved")
        ok = await asyncio.to_thread(auth_manager.setup, body.username, body.password)
        if not ok:
            raise HTTPException(500, "Setup failed")
@@ -122,12 +118,10 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
            raise HTTPException(400, "Run setup first")
        if not auth_manager.signup_enabled:
            raise HTTPException(403, "Registration is disabled. Ask an admin for an account.")
-        if len(body.password) < PASSWORD_MIN_LENGTH:
+        if len(body.password) < 8:
-            raise HTTPException(400, f"Password must be at least {PASSWORD_MIN_LENGTH} characters")
+            raise HTTPException(400, "Password must be at least 8 characters")
        if len(body.username.strip()) < 1:
            raise HTTPException(400, "Username is required")
        if body.username.lower() in RESERVED_USERNAMES:
            raise HTTPException(403, "Username is reserved")
        ok = await asyncio.to_thread(auth_manager.create_user, body.username, body.password, is_admin=False)
        if not ok:
            raise HTTPException(409, "Username already taken")
@@ -150,8 +144,6 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
                raise HTTPException(401, "Invalid 2FA code")
        # All checks passed — create session (password already verified above)
        token = await asyncio.to_thread(auth_manager.create_session_trusted, username)
        if not token:
            raise HTTPException(401, "Invalid credentials")
        cookie_kwargs = dict(
            key=SESSION_COOKIE,
            value=token,
@@ -161,7 +153,7 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
            path="/",
        )
        if body.remember:
-            cookie_kwargs["max_age"] = TOKEN_TTL
+            cookie_kwargs["max_age"] = 60 * 60 * 24 * 7  # 7 days
        response.set_cookie(**cookie_kwargs)
        return {"ok": True, "username": username}
@@ -190,18 +182,13 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
            pass
        return result
    @router.get("/policy")
    async def auth_policy():
        """Return public auth policy constants for the frontend."""
        return auth_manager.policy()
    @router.post("/change-password")
    async def change_password(body: ChangePasswordRequest, request: Request):
        user = _get_current_user(request)
        if not user:
            raise HTTPException(401, "Not authenticated")
-        if len(body.new_password) < PASSWORD_MIN_LENGTH:
+        if len(body.new_password) < 8:
-            raise HTTPException(400, f"Password must be at least {PASSWORD_MIN_LENGTH} characters")
+            raise HTTPException(400, "Password must be at least 8 characters")
        current_token = request.cookies.get(SESSION_COOKIE)
        ok = await asyncio.to_thread(auth_manager.change_password, user, body.current_password, body.new_password)
        if not ok:
@@ -281,12 +268,8 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
        user = _get_current_user(request)
        if not user or not auth_manager.is_admin(user):
            raise HTTPException(403, "Admin only")
-        if len(body.password) < PASSWORD_MIN_LENGTH:
+        if len(body.password) < 8:
-            raise HTTPException(400, f"Password must be at least {PASSWORD_MIN_LENGTH} characters")
+            raise HTTPException(400, "Password must be at least 8 characters")
        if len(body.username.strip()) < 1:
            raise HTTPException(400, "Username is required")
        if body.username.lower() in RESERVED_USERNAMES:
            raise HTTPException(403, "Username is reserved")
        ok = auth_manager.create_user(body.username, body.password, body.is_admin)
        if not ok:
            raise HTTPException(409, "Username already taken")
@@ -449,23 +432,6 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
        except Exception as e:
            logger.warning("Failed to rename upload owner references %s -> %s: %s", old_username, new_username, e)
        # direct personal RAG uploads live in per-owner directories and the
        # vector metadata also carries the username used for owner-filtered
        # search. Keep both in sync with the auth rename.
        try:
            from routes.personal_routes import rename_personal_upload_owner
            personal_docs_manager = getattr(request.app.state, "personal_docs_manager", None)
            if personal_docs_manager is not None:
                rag_manager = getattr(personal_docs_manager, "rag_manager", None)
                rename_personal_upload_owner(
                    old_username,
                    new_username,
                    personal_docs_manager=personal_docs_manager,
                    rag_manager=rag_manager,
                )
        except Exception as e:
            logger.warning("Failed to rename personal RAG upload owner references %s -> %s: %s", old_username, new_username, e)
        # skills: SKILL.md frontmatter carries owner: <username>; the usage
        # sidecar (_usage.json) keys entries as owner::skill-name. Both must
        # be updated or the renamed user's Skills panel goes empty.
@@ -14,7 +14,7 @@ from core.database import Session as DBSession, ModelEndpoint
 from src.llm_core import normalize_model_id
 from src.endpoint_resolver import normalize_base
 from src.context_compactor import maybe_compact, trim_for_context
-from src.auth_helpers import effective_user
+from src.auth_helpers import get_current_user
 from src.prompt_security import untrusted_context_message
 from routes.prefs_routes import _load_for_user as load_prefs_for_user
@@ -22,47 +22,6 @@ from fastapi import HTTPException
 logger = logging.getLogger(__name__)
 _CASUAL_OPENING_RE = re.compile(
    r"^\s*(?:h+i+|hey+|hello+|yo+|sup+|what'?s up|wass?up|hiya|howdy|"
    r"lol|lmao|haha+|hehe+|thanks?|thank you|ty|idk|dunno|meh|bruh|bro)\b(?P<tail>.*)$",
    re.IGNORECASE,
 )
 _CASUAL_BLOCKLIST_RE = re.compile(
    r"\b(?:cookbook|serve|serving|launch|start|vllm|sglang|llama\.?cpp|ollama|"
    r"download|model|email|document|doc|note|calendar|task|search|web|research|"
    r"file|folder|repo|git|settings?|endpoint|api|token|mcp)\b",
    re.IGNORECASE,
 )
 def _is_casual_low_signal(text: str) -> bool:
    """Short greetings/slang should not pull memory, skills, RAG, or docs."""
    s = str(text or "").strip()
    m = _CASUAL_OPENING_RE.match(s)
    if not m:
        return False
    tail = m.group("tail") or ""
    if _CASUAL_BLOCKLIST_RE.search(tail):
        return False
    tail_words = re.findall(r"[A-Za-z0-9_'-]+", tail)
    return len(tail_words) <= 2
 # Strong references to in-flight fire-and-forget tasks scheduled from this
 # module. asyncio only keeps weak references to tasks created via
 # create_task, so without this the GC can collect a task mid-execution and
 # the background work (extraction, auto-naming) silently never runs.
 # Mirrors WebhookManager._spawn_tracked from src/webhook_manager.py.
 _BG_TASKS: set[asyncio.Task] = set()
 def _spawn_bg(coro) -> asyncio.Task:
    """Schedule a background task and hold a strong reference until it finishes."""
    task = asyncio.create_task(coro)
    _BG_TASKS.add(task)
    task.add_done_callback(_BG_TASKS.discard)
    return task
 # ── Data containers ────────────────────────────────────────────────────── #
@@ -119,7 +78,7 @@ def _enforce_chat_privileges(request, sess) -> None:
    which means unrestricted allowed_models / zero cap -> no-op for them.
    """
    try:
-        user = effective_user(request)
+        user = get_current_user(request)
    except Exception:
        user = None
    if not user:
@@ -200,9 +159,17 @@ async def auto_name_session(session_manager, sess):
            return
        owner = getattr(sess, "owner", None)
-        t_url, t_model, t_headers = resolve_task_endpoint(
+        t_url, t_model, t_headers = resolve_task_endpoint(owner=owner)
-            sess.endpoint_url, sess.model, sess.headers, owner=owner
+        if not t_model:
-        )
+            # If no task/utility model is configured at all, fall back to
            # the session's own model so auto-naming still works even on
            # minimal setups.
            from src.endpoint_resolver import resolve_endpoint
            _fallback = resolve_endpoint("default", owner=owner)
            if _fallback and _fallback[1]:
                t_url, t_model, t_headers = _fallback
            else:
                t_url, t_model, t_headers = sess.endpoint_url, sess.model, sess.headers
        if not t_model:
            logger.debug("[auto-name] No model provided, skipping")
            return
@@ -379,11 +346,11 @@ def add_user_message(sess, chat_handler, preprocessed: PreprocessedMessage, inco
 def fire_message_event(request, webhook_manager, session_id: str, sess, message: str, compare_mode: bool = False):
    """Fire webhook and event_bus events for a new user message."""
    if webhook_manager and not compare_mode:
-        webhook_manager.fire_and_forget("chat.message", {
+        asyncio.create_task(webhook_manager.fire("chat.message", {
            "session_id": session_id, "model": sess.model, "message": message[:2000],
-        })
+        }))
    from src.event_bus import fire_event
-    user = effective_user(request)
+    user = get_current_user(request)
    fire_event("message_sent", user)
@@ -609,11 +576,9 @@ async def build_chat_context(
    if not incognito:
        fire_message_event(request, webhook_manager, session_id, sess, message, compare_mode)
-    # Resolve owner-scoped prefs/context. Browser requests keep the cookie user;
+    # Resolve user prefs
-    # bearer-token chat requests use the token owner instead of the "api" sentinel.
+    user = get_current_user(request)
    user = effective_user(request)
    uprefs = load_prefs_for_user(user)
    casual_low_signal = _is_casual_low_signal(message)
    # Memory enabled?
    mem_enabled = not incognito and not no_memory and uprefs.get("memory_enabled", True)
@@ -623,9 +588,6 @@ async def build_chat_context(
    if not allow_tool_preprocessing:
        mem_enabled = False
        skills_enabled = False
    if casual_low_signal:
        mem_enabled = False
        skills_enabled = False
    logger.debug(
        "Memory enabled=%s for user=%s (incognito=%s, no_memory=%s, pref=%s)",
        mem_enabled, user, incognito, no_memory, uprefs.get("memory_enabled", "NOT_SET"),
@@ -641,11 +603,11 @@ async def build_chat_context(
    # Use RAG?
    use_rag_val = (str(use_rag).lower() != "false") if use_rag is not None else True
-    if incognito or not allow_tool_preprocessing or is_research_spinoff or casual_low_signal:
+    if incognito or not allow_tool_preprocessing or is_research_spinoff:
        use_rag_val = False
    # If pre-fetched search context was provided (compare mode), skip live web search
-    skip_web = bool(search_context) or not allow_tool_preprocessing or casual_low_signal
+    skip_web = bool(search_context) or not allow_tool_preprocessing
    # Build context preface
    # The stream path uses enhanced_message (with CoT/preprocessing applied),
@@ -664,7 +626,7 @@ async def build_chat_context(
        incognito=incognito,
        use_skills=skills_enabled,
    )
-    if use_rag is not None or is_research_spinoff or casual_low_signal:
+    if use_rag is not None or is_research_spinoff:
        _preface_kwargs["use_rag"] = use_rag_val
    preface, rag_sources, web_sources = chat_processor.build_context_preface(**_preface_kwargs)
@@ -672,7 +634,7 @@ async def build_chat_context(
    used_memories = getattr(chat_processor, '_last_used_memories', [])
    # Inject pre-fetched search context (compare mode)
-    if search_context and allow_tool_preprocessing and not casual_low_signal:
+    if search_context and allow_tool_preprocessing:
        preface.append(untrusted_context_message("prefetched search context", search_context))
    # YouTube transcripts
@@ -1150,7 +1112,7 @@ def run_post_response_tasks(
            )))
    if _extraction_jobs:
-        _spawn_bg(_run_extraction_jobs_sequentially(session_id, _extraction_jobs))
+        asyncio.create_task(_run_extraction_jobs_sequentially(session_id, _extraction_jobs))
    # Token accumulation
    if last_metrics:
@@ -1158,11 +1120,11 @@ def run_post_response_tasks(
    # Webhook
    if webhook_manager and not compare_mode:
-        webhook_manager.fire_and_forget("chat.completed", {
+        asyncio.create_task(webhook_manager.fire("chat.completed", {
            "session_id": session_id, "model": sess.model,
            "user_message": message, "response": full_response[:2000],
-        })
+        }))
    # Auto-name
    if needs_auto_name(sess.name):
-        _spawn_bg(auto_name_session(session_manager, sess))
+        asyncio.create_task(auto_name_session(session_manager, sess))
@@ -23,7 +23,7 @@ from src.endpoint_resolver import normalize_base as _normalize_base, build_chat_
 from src.session_search import search_session_messages
 from src.prompt_security import untrusted_context_message
 from core.exceptions import SessionNotFoundError
-from src.auth_helpers import effective_user, get_current_user
+from src.auth_helpers import get_current_user
 from routes.session_routes import _verify_session_owner
 from routes.document_helpers import _owner_session_filter
 from core.database import SessionLocal, get_session_mode, set_session_mode
@@ -126,8 +126,7 @@ def _clear_orphaned_session_endpoint(sess, owner: str | None = None) -> bool:
        sess.model = ""
        sess.headers = {}
        return True
-    except Exception as e:
+    except Exception:
        logger.warning("Failed to clear orphaned session endpoint", exc_info=e)
        db.rollback()
        return False
    finally:
@@ -145,8 +144,7 @@ def _endpoint_cache_contains_model(endpoint, model: str) -> bool:
        return True
    try:
        models = json.loads(raw) if isinstance(raw, str) else raw
-    except Exception as e:
+    except Exception:
        logger.warning("Failed to parse cached models list, treating as containing model", exc_info=e)
        return True
    if not isinstance(models, list) or not models:
        return True
@@ -238,8 +236,7 @@ def _recover_empty_session_model(sess, session_id: str, owner: str | None = None
                is_chatgpt_subscription = False
        try:
            cached = json.loads(ep.cached_models) if isinstance(ep.cached_models, str) else (ep.cached_models or [])
-        except Exception as e:
+        except Exception:
            logger.warning("Failed to parse cached_models for endpoint %r", getattr(ep, "id", "?"), exc_info=e)
            cached = []
        if not cached:
            visible = []
@@ -363,7 +360,7 @@ def setup_chat_routes(
            sess = session_manager.get_session(session)
        except KeyError:
            raise HTTPException(404, f"Session '{session}' not found")
-        owner = effective_user(request)
+        owner = get_current_user(request)
        if _clear_orphaned_session_endpoint(sess, owner=owner):
            raise HTTPException(400, "Selected model endpoint was removed. Pick another model in Settings.")
@@ -603,7 +600,7 @@ def setup_chat_routes(
            # but BEFORE loading. Prevents cross-user session hijack.
            _verify_session_owner(request, session)
            sess = session_manager.get_session(session)
-            owner = effective_user(request)
+            owner = get_current_user(request)
            if _clear_orphaned_session_endpoint(sess, owner=owner):
                raise HTTPException(400, "Selected model endpoint was removed. Pick another model in Settings.")
            # Issue #587: picker shows a model from the endpoint cache but
@@ -634,7 +631,7 @@ def setup_chat_routes(
        _enforce_chat_privileges(request, sess)
        # Ensure session has auth headers
-        resolve_session_auth(sess, session, owner=effective_user(request))
+        resolve_session_auth(sess, session, owner=get_current_user(request))
        # Check for research_pending BEFORE mode persist overwrites it
        do_research = str(use_research).lower() == "true"
@@ -649,8 +646,8 @@ def setup_chat_routes(
        elif attachments:
            try:
                att_ids = [str(x) for x in json.loads(attachments)]
-            except Exception as e:
+            except Exception:
-                logger.warning("Failed to parse attachments JSON, ignoring attachments", exc_info=e)
+                pass
        no_memory = str(form_data.get("no_memory", "")).lower() == "true"
        pre_context_tool_policy = build_effective_tool_policy(
@@ -829,10 +826,6 @@ def setup_chat_routes(
        from src.settings import get_setting
        _global_disabled = get_setting("disabled_tools", [])
        if _global_disabled and isinstance(_global_disabled, list):
            explicit_web_allowed = allow_web_search is not None and str(allow_web_search).lower() == "true"
            if explicit_web_allowed:
                disabled_tools.update(t for t in _global_disabled if t not in {"web_search", "web_fetch"})
            else:
            disabled_tools.update(_global_disabled)
        # Light auto-escalation: the user is in chat mode and just expressed a
@@ -1263,10 +1256,6 @@ def setup_chat_routes(
                        _max_rounds = _DEFAULT_ROUNDS
                    _max_rounds = max(1, min(_max_rounds, 200))
                    _forced_tools = None
                    if allow_web_search is not None and str(allow_web_search).lower() == "true":
                        _forced_tools = {"web_search", "web_fetch"}
                    async for chunk in stream_agent_loop(
                        sess.endpoint_url,
                        sess.model,
@@ -1288,7 +1277,6 @@ def setup_chat_routes(
                        plan_mode=plan_mode,
                        approved_plan=approved_plan or None,
                        workspace=workspace or None,
                        forced_tools=_forced_tools,
                    ):
                        if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
                            try:
@@ -1309,6 +1297,8 @@ def setup_chat_routes(
                                    "doc_stream_open", "doc_stream_delta",
                                    "doc_update", "doc_suggestions", "ui_control",
                                    "rounds_exhausted",
                                    "loop_breaker_triggered",
                                    "intent_nudge_exhausted",
                                    "ask_user",
                                    "plan_update",
                                ):
@@ -1494,7 +1484,7 @@ def setup_chat_routes(
        if not q or not q.strip():
            return []
-        _user = effective_user(request)
+        _user = get_current_user(request)
        return [
            result.to_dict()
            for result in search_session_messages(
@@ -46,12 +46,8 @@ def _ssh_prefix_for_task(task: dict) -> tuple[str, str]:
    shell metacharacters in ``remoteHost`` is rejected with 400 rather than
    injected.
    """
-    raw_host = task.get("remoteHost")
+    host = validate_remote_host((task.get("remoteHost") or "").strip() or None) or ""
-    raw_port = task.get("sshPort")
+    ssh_port = validate_ssh_port((task.get("sshPort") or "").strip() or None) or ""
    host_value = str(raw_host).strip() if raw_host is not None else None
    port_value = str(raw_port).strip() if raw_port is not None else None
    host = validate_remote_host(host_value or None) or ""
    ssh_port = validate_ssh_port(port_value or None) or ""
    port_flag = f"-p {ssh_port} " if ssh_port and ssh_port != "22" else ""
    return host, port_flag
@@ -310,10 +306,7 @@ def setup_codex_routes(
    @router.post("/emails/draft-document")
    async def codex_email_draft_document(request: Request, body: dict[str, Any] = Body(default_factory=dict)):
-        owner = _scope_owner(request, EMAIL_DRAFT_SCOPES)
+        owner = _scope_owner_all(request, {"email:draft", "documents:write"})
        docs_owner = _scope_owner_all(request, DOCS_WRITE_SCOPES)
        if docs_owner != owner:
            raise HTTPException(403, "API token owner mismatch")
        if documents_create_endpoint is None:
            raise HTTPException(503, "Documents integration is not available")
        from routes.document_routes import DocumentCreate
@@ -505,8 +505,6 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None, add_hf_cache:
        "    if u.startswith('KB'): return int(n * 1024)",
        "    return int(n)",
        "def scan_ollama():",
        "    if any(m.get('is_ollama') for m in models): return",
        "    if os.name == 'nt' and not os.environ.get('ODYSSEUS_ALLOW_OLLAMA_CLI_SCAN'): return",
        "    if not shutil.which('ollama'): return",
        "    try:",
        "        p = subprocess.run(['ollama', 'list'], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True, timeout=6)",
@@ -537,8 +535,8 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None, add_hf_cache:
        "            models.append({'repo_id':name,'size_bytes':size_bytes,'nb_files':1,'has_incomplete':False,'path':'ollama','backend':'ollama','is_ollama':True})",
        "        return",
        "for _hf_cache in hf_cache_paths(): scan_hf(_hf_cache)",
        "scan_ollama_api()",
        "scan_ollama()",
        "scan_ollama_api()",
    ]
    for model_dir in model_dirs or []:
        lines.append(f"scan_dir(os.path.expanduser({model_dir!r}))")
@@ -786,149 +784,25 @@ def _append_llama_cpp_linux_accel_build_lines(runner_lines: list[str]) -> None:
    to hard-wire CUDA on Linux. That made ROCm hosts attempt a CUDA configure and
    fail with "CUDA Toolkit not found" instead of building with HIP.
    """
    # Try a prebuilt binary from llama.cpp's GitHub releases FIRST — no
    # cmake/build-essential/git/CUDA-headers needed at all. The from-source
    # build below stays as a fallback (custom flags, esoteric arch, no
    # internet, etc). 30 seconds vs 5+ minutes of compile, and removes
    # every OS-package dep from the launch path. Sets _odysseus_have_prebuilt=1
    # on success; the existing build-tier if/elif chain below is gated on
    # that variable so we never compile twice or shadow the prebuilt symlink.
    runner_lines.append('    _odysseus_have_prebuilt=""')
    runner_lines.append('    _odysseus_arch="$(uname -m)"')
    runner_lines.append('    _odysseus_prebuilt_url=""')
    runner_lines.append('    if command -v curl >/dev/null 2>&1 && [ "$_odysseus_arch" = "x86_64" ]; then')
    runner_lines.append('      _odysseus_pat=""')
    runner_lines.append('      _odysseus_has_nv_inline() { command -v nvidia-smi >/dev/null 2>&1 && nvidia-smi -L 2>/dev/null | grep -q "GPU "; }')
    runner_lines.append('      _odysseus_has_vk_inline() { ldconfig -p 2>/dev/null | grep -q "libvulkan\\.so" || command -v vulkaninfo >/dev/null 2>&1 || [ -e /usr/lib/x86_64-linux-gnu/libvulkan.so.1 ]; }')
    runner_lines.append('      _odysseus_has_vkdev_inline() { ls /dev/dri/renderD* >/dev/null 2>&1 || (lspci 2>/dev/null | grep -Ei \'VGA|3D|Display\' | grep -Eiq \'AMD|ATI|Radeon\'); }')
    runner_lines.append('      if _odysseus_has_nv_inline; then')
    runner_lines.append('        _odysseus_pat="ubuntu.*cuda"')
    runner_lines.append('      elif _odysseus_has_vkdev_inline && _odysseus_has_vk_inline; then')
    runner_lines.append('        _odysseus_pat="ubuntu.*vulkan"')
    runner_lines.append('      else')
    runner_lines.append('        _odysseus_pat="ubuntu-x64\\\\.zip"')
    runner_lines.append('      fi')
    runner_lines.append('      _odysseus_prebuilt_url="$(curl -fsSL --max-time 15 https://api.github.com/repos/ggml-org/llama.cpp/releases/latest 2>/dev/null | grep \'"browser_download_url"\' | cut -d\'"\' -f4 | grep -iE "$_odysseus_pat" | grep -iv "arm\\|aarch64" | head -1)"')
    runner_lines.append('    fi')
    # Accept any of unzip / bsdtar / python3 -m zipfile as the extractor.
    # python3 is essentially always present on modern Linux, so this lets
    # the prebuilt path work on minimal Ubuntu installs that lack `unzip`.
    runner_lines.append('    if [ -n "$_odysseus_prebuilt_url" ] && (command -v unzip >/dev/null 2>&1 || command -v bsdtar >/dev/null 2>&1 || command -v python3 >/dev/null 2>&1); then')
    runner_lines.append('      echo "[odysseus] Found prebuilt llama-server: $_odysseus_prebuilt_url"')
    runner_lines.append('      mkdir -p ~/bin "$HOME/.cache/odysseus/llama-cpp-prebuilt" && cd "$HOME/.cache/odysseus/llama-cpp-prebuilt"')
    runner_lines.append('      rm -f llama-cpp.zip')
    runner_lines.append('      if curl -fsSL --max-time 120 "$_odysseus_prebuilt_url" -o llama-cpp.zip && [ -s llama-cpp.zip ]; then')
    runner_lines.append('        rm -rf build && mkdir -p build')
    runner_lines.append('        if command -v unzip >/dev/null 2>&1; then unzip -qq -o llama-cpp.zip -d build; elif command -v bsdtar >/dev/null 2>&1; then bsdtar -xf llama-cpp.zip -C build; else python3 -c "import zipfile; zipfile.ZipFile(\\"llama-cpp.zip\\").extractall(\\"build\\")"; fi')
    runner_lines.append('        _odysseus_extracted="$(find build -type f -name llama-server 2>/dev/null | head -1)"')
    runner_lines.append('        if [ -n "$_odysseus_extracted" ]; then')
    runner_lines.append('          chmod +x "$_odysseus_extracted"')
    runner_lines.append('          ln -sf "$_odysseus_extracted" ~/bin/llama-server')
    runner_lines.append('          _odysseus_libdir="$(dirname "$_odysseus_extracted")"')
    runner_lines.append('          mkdir -p ~/.config && echo "export LD_LIBRARY_PATH=\\"$_odysseus_libdir:\\${LD_LIBRARY_PATH:-}\\"" > ~/.config/odysseus-llama-cpp-env')
    runner_lines.append('          _odysseus_have_prebuilt=1')
    runner_lines.append('          echo "[odysseus] Prebuilt llama-server installed at $_odysseus_extracted"')
    runner_lines.append('        fi')
    runner_lines.append('      fi')
    runner_lines.append('      [ -z "$_odysseus_have_prebuilt" ] && echo "[odysseus] Prebuilt download/extract failed — falling back to from-source build."')
    runner_lines.append('    elif [ -z "$_odysseus_prebuilt_url" ]; then')
    runner_lines.append('      echo "[odysseus] No matching prebuilt llama-server for this host (arch=$_odysseus_arch) — will build from source."')
    runner_lines.append('    fi')
    runner_lines.append('  if [ -z "$_odysseus_have_prebuilt" ]; then')
    # Detect pip-installed nvcc (from vLLM/nvidia CUDA wheels) and put it on PATH
-    # so cmake's CUDA configure can find it — BUT only when actual NVIDIA
+    # so cmake's CUDA configure can find it. We keep this after the ROCm/HIP
-    # hardware is present. On AMD/Intel hosts the pip nvcc is a misleading
+    # check — a machine with both stacks should honor the native HIP toolchain on
-    # leftover (no libcudart, no GPU it could target) and would otherwise
+    # AMD hosts instead of accidentally preferring a stray nvcc wheel.
    # send the build down the CUDA branch and fail with "CUDA Toolkit not
    # found" instead of trying Vulkan.
    runner_lines.append('    _odysseus_has_nvidia_hw() {')
    runner_lines.append('      command -v nvidia-smi >/dev/null 2>&1 && nvidia-smi -L 2>/dev/null | grep -q "GPU " && return 0')
    runner_lines.append('      ls /dev/nvidia* >/dev/null 2>&1 && return 0')
    runner_lines.append('      lspci 2>/dev/null | grep -iE \'VGA|3D|Display\' | grep -iq nvidia && return 0')
    runner_lines.append('      return 1')
    runner_lines.append('    }')
    runner_lines.append('    if _odysseus_has_nvidia_hw; then')
    runner_lines.append('    for _cudir in ~/.local/lib/python*/site-packages/nvidia/cu13 ~/.local/lib/python*/site-packages/nvidia/cu12 ~/.local/lib/python*/site-packages/nvidia/cuda_nvcc; do')
    runner_lines.append('      [ -x "$_cudir/bin/nvcc" ] && export CUDA_HOME="$_cudir" && export PATH="$_cudir/bin:$PATH" && break')
    runner_lines.append('    done')
    runner_lines.append('    fi')
    # rm -rf build so a prior poisoned CMakeCache.txt (e.g. from a failed CUDA
    # or HIP attempt) doesn't cause the next configure to reuse stale settings.
    runner_lines.append('    mkdir -p ~/bin')
-    # Try to install cmake / build-essential / git automatically before the
+    runner_lines.append('    cd ~/llama.cpp && rm -rf build')
    # build, but ONLY via passwordless sudo (`sudo -n`) — interactive sudo
    # would hang a tmux-backgrounded serve task waiting for a password. If
    # sudo asks for a password the install is skipped silently and the
    # diagnosis pattern (cookbook_routes.py / cookbook_helpers.py) surfaces
    # an explicit "install cmake" suggestion in the Cookbook diagnosis
    # toolbar after the inevitable build failure.
    runner_lines.append('    _odysseus_apt_bootstrap() {')
    runner_lines.append('      local _missing=""')
    runner_lines.append('      command -v cmake >/dev/null 2>&1 || _missing="$_missing cmake"')
    runner_lines.append('      command -v g++ >/dev/null 2>&1 || command -v gcc >/dev/null 2>&1 || _missing="$_missing build-essential"')
    runner_lines.append('      command -v git >/dev/null 2>&1 || _missing="$_missing git"')
    runner_lines.append('      [ -z "$_missing" ] && return 0')
    runner_lines.append('      if command -v apt-get >/dev/null 2>&1 && sudo -n true 2>/dev/null; then')
    runner_lines.append('        echo "[odysseus] Auto-installing missing build deps via apt:$_missing"')
    runner_lines.append('        sudo -n env DEBIAN_FRONTEND=noninteractive apt-get update -qq 2>&1 | tail -3')
    runner_lines.append('        sudo -n env DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends $_missing 2>&1 | tail -5 || true')
    runner_lines.append('      elif command -v pacman >/dev/null 2>&1 && sudo -n true 2>/dev/null; then')
    runner_lines.append('        echo "[odysseus] Auto-installing missing build deps via pacman:$_missing"')
    runner_lines.append('        local _pacpkgs="$(echo "$_missing" | sed -e \'s/build-essential/base-devel/g\')"')
    runner_lines.append('        sudo -n pacman -Sy --needed --noconfirm $_pacpkgs 2>&1 | tail -5 || true')
    runner_lines.append('      elif command -v dnf >/dev/null 2>&1 && sudo -n true 2>/dev/null; then')
    runner_lines.append('        echo "[odysseus] Auto-installing missing build deps via dnf:$_missing"')
    runner_lines.append('        local _dnfpkgs="$(echo "$_missing" | sed -e \'s/build-essential/gcc gcc-c++ make/g\')"')
    runner_lines.append('        sudo -n dnf install -y $_dnfpkgs 2>&1 | tail -5 || true')
    runner_lines.append('      else')
    runner_lines.append('        echo "[odysseus] WARNING: missing build deps ($_missing) — passwordless sudo is unavailable, cannot auto-install. Cookbook Diagnosis will explain the fix after the build fails."')
    runner_lines.append('      fi')
    runner_lines.append('    }')
    runner_lines.append('    _odysseus_apt_bootstrap')
    runner_lines.append('    _odysseus_missing_build_deps=""')
    runner_lines.append('    command -v cmake >/dev/null 2>&1 || _odysseus_missing_build_deps="$_odysseus_missing_build_deps cmake"')
    runner_lines.append('    command -v git >/dev/null 2>&1 || _odysseus_missing_build_deps="$_odysseus_missing_build_deps git"')
    runner_lines.append('    command -v g++ >/dev/null 2>&1 || command -v gcc >/dev/null 2>&1 || _odysseus_missing_build_deps="$_odysseus_missing_build_deps build-essential"')
    runner_lines.append('    if [ -n "$_odysseus_missing_build_deps" ]; then')
    runner_lines.append('      echo "ERROR: llama.cpp source build needs missing packages:$_odysseus_missing_build_deps"')
    runner_lines.append('      if command -v apt-get >/dev/null 2>&1; then')
    runner_lines.append('        echo "Install on this host: sudo apt-get update && sudo apt-get install -y cmake build-essential git"')
    runner_lines.append('      elif command -v pacman >/dev/null 2>&1; then')
    runner_lines.append('        echo "Install on this host: sudo pacman -Sy --needed cmake base-devel git"')
    runner_lines.append('      elif command -v dnf >/dev/null 2>&1; then')
    runner_lines.append('        echo "Install on this host: sudo dnf install -y cmake gcc gcc-c++ make git"')
    runner_lines.append('      fi')
    runner_lines.append('      echo "Alternative: install a native llama-server on PATH, then relaunch."')
    runner_lines.append('      ODYSSEUS_PREFLIGHT_EXIT=127')
    runner_lines.append('    fi')
    runner_lines.append('    cd ~/llama.cpp')
    runner_lines.append('    _odysseus_has_vulkan() {')
    runner_lines.append('      ldconfig -p 2>/dev/null | grep -q \'libvulkan\\.so\' && return 0')
    runner_lines.append('      [ -e /usr/lib/libvulkan.so.1 ] && return 0')
    runner_lines.append('      [ -e /usr/lib/x86_64-linux-gnu/libvulkan.so.1 ] && return 0')
    runner_lines.append('      command -v vulkaninfo >/dev/null 2>&1 && return 0')
    runner_lines.append('      return 1')
    runner_lines.append('    }')
    runner_lines.append('    _odysseus_has_vulkan_device() {')
    runner_lines.append('      ls /dev/dri/renderD* >/dev/null 2>&1 && return 0')
    runner_lines.append('      lspci 2>/dev/null | grep -Ei \'VGA|3D|Display\' | grep -Eiq \'AMD|ATI|Radeon\' && return 0')
    runner_lines.append('      return 1')
    runner_lines.append('    }')
    # Backend preference: native ROCm/HIP > native CUDA > Vulkan > CPU.
    # Vulkan is a portable fallback that works on AMD when ROCm isn't
    # installed (e.g. Strix Halo) and on any vendor's discrete GPU, but
    # it's ~30-40% slower than native HIP/CUDA for LLM inference — only
    # pick it when no native toolchain is present.
    runner_lines.append('    if command -v hipconfig &>/dev/null || [ -d /opt/rocm ] || [ -n "$ROCM_PATH" ] || [ -n "$HIP_PATH" ]; then')
    runner_lines.append('      rm -rf build')
    runner_lines.append('      if command -v hipconfig &>/dev/null; then')
    runner_lines.append('        export HIPCXX="${HIPCXX:-$(hipconfig -l)/clang}"')
    runner_lines.append('        export HIP_PATH="${HIP_PATH:-$(hipconfig -R)}"')
    runner_lines.append('      fi')
    runner_lines.append('      echo "[odysseus] ROCm/HIP detected — building llama-server with HIP support..."')
    runner_lines.append('      cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_HIP=ON && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
-    runner_lines.append('    elif command -v nvcc &>/dev/null && _odysseus_has_nvidia_hw; then')
+    runner_lines.append('    elif command -v nvcc &>/dev/null; then')
    runner_lines.append('      rm -rf build')
    # nvcc alone is not sufficient — pip-installed CUDA wheels or incomplete
    # tooling can expose nvcc without shipping libcudart, causing cmake to fail
    # mid-build with "CUDA runtime library not found". Check cudart explicitly
@@ -952,50 +826,31 @@ def _append_llama_cpp_linux_accel_build_lines(runner_lines: list[str]) -> None:
    runner_lines.append('        echo "[odysseus]   Ensure libcudart is installed (e.g. cuda-runtime package) and visible via ldconfig or CUDA_HOME."')
    runner_lines.append('        cmake -B build -DCMAKE_BUILD_TYPE=Release && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
    runner_lines.append('      fi')
    runner_lines.append('    elif _odysseus_has_vulkan_device && _odysseus_has_vulkan; then')
    runner_lines.append('      echo "[odysseus] Vulkan-capable GPU detected (no ROCm/CUDA toolchain installed) — building llama-server with Vulkan support..."')
    runner_lines.append('      rm -rf build-vulkan')
    runner_lines.append('      cmake -B build-vulkan -DCMAKE_BUILD_TYPE=Release -DGGML_VULKAN=ON && cmake --build build-vulkan -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build-vulkan/bin/llama-server ~/bin/llama-server')
    runner_lines.append('    else')
-    runner_lines.append('      echo "[odysseus] WARNING: no HIP/CUDA/Vulkan toolchain found — building llama-server for CPU only."')
+    runner_lines.append('      echo "[odysseus] WARNING: no HIP/CUDA toolchain found — building llama-server for CPU only."')
    runner_lines.append('      echo "[odysseus]   GPU inference will not be available for this llama.cpp build."')
-    runner_lines.append('      echo "[odysseus]   Install Vulkan (libvulkan-dev) / ROCm for AMD GPUs or CUDA tooling for NVIDIA, then re-launch this serve task."')
+    runner_lines.append('      echo "[odysseus]   Install ROCm for AMD GPUs or vLLM/CUDA tooling for NVIDIA, then re-launch this serve task."')
    runner_lines.append('      rm -rf build')
    runner_lines.append('      cmake -B build -DCMAKE_BUILD_TYPE=Release && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
    runner_lines.append('    fi')
    runner_lines.append('  fi  # end _odysseus_have_prebuilt guard')
-def _llama_cpp_rebuild_cmd(update_source: bool = False) -> str:
+def _llama_cpp_rebuild_cmd() -> str:
    """Shell command that clears the Cookbook-managed llama.cpp build.
-    Removes the cached ``llama-server`` symlink and the ``~/llama.cpp/build*``
+    Removes the cached ``llama-server`` symlink and the ``~/llama.cpp/build``
    directory so the next llama.cpp serve recompiles from source, picking up a
    CUDA or HIP toolchain if one is now available. The serve bootstrap only
    builds when ``llama-server`` is missing from PATH, so without this an
-    existing CPU-only build is reused forever. When ``update_source`` is true,
+    existing CPU-only build is reused forever. It deliberately installs and
-    the command also fast-forwards the Cookbook-managed ``~/llama.cpp`` checkout
+    downloads nothing; the rebuild itself happens on the next serve.
    if it exists. The rebuild itself happens on the next serve.
    """
    update_cmd = ''
    if update_source:
        update_cmd = (
            'if [ -d "$HOME/llama.cpp/.git" ]; then '
            'git -C "$HOME/llama.cpp" pull --ff-only --depth 1 || '
            'echo "[odysseus] WARNING: llama.cpp source update failed; clearing cached build anyway."; '
            'elif command -v git >/dev/null 2>&1; then '
            'git clone --depth 1 https://github.com/ggml-org/llama.cpp "$HOME/llama.cpp" || '
            'echo "[odysseus] WARNING: llama.cpp clone failed; clearing cached build anyway."; '
            'fi && '
        )
    return (
        'mkdir -p "$HOME/bin" && '
        f'{update_cmd}'
        'rm -f "$HOME/bin/llama-server" && '
-        'rm -rf "$HOME/llama.cpp/build" "$HOME/llama.cpp/build-vulkan" && '
+        'rm -rf "$HOME/llama.cpp/build" && '
        'echo "[odysseus] Cleared the cached llama.cpp build. '
        'Re-launch the serve task to rebuild llama-server from source '
-        '(Vulkan, HIP, or CUDA will be used if a matching toolchain is now available)."'
+        '(CUDA or HIP will be used if a toolchain is now available)."'
    )
@@ -1258,27 +1113,8 @@ def _diagnose_serve_output(text: str) -> dict | None:
            "SGLang is not installed or not in PATH on this server.",
            [{"label": "install SGLang in Cookbook Dependencies", "op": "dependency", "package": "sglang[all]"}],
        ),
        # System build deps come BEFORE the generic llama.cpp catch-all so
        # cmake / build-essential / git missing → a specific OS-package
        # remediation instead of "install llama-cpp-python[server]" (which
        # itself fails to compile when cmake is absent).
        (
-            r"cmake: command not found|cmake.*not found.*[Cc]ould not",
+            r"llama-server.*command not found|llama\.cpp.*not found|No module named.*llama_cpp|No module named 'starlette_context'|git: command not found|cmake: command not found",
            "cmake is required to build llama.cpp from source but isn't installed on this server.",
            [{"label": "install build deps for llama.cpp (apt: cmake build-essential git / pacman: cmake base-devel git / dnf: cmake gcc-c++ make git / brew: cmake git)", "op": "dependency", "package": "llama-cpp-python[server]"}],
        ),
        (
            r"^(make|g\+\+|gcc): command not found|Could not find C\+\+ compiler",
            "A C/C++ compiler (build-essential) is required to build llama.cpp from source.",
            [{"label": "install build deps for llama.cpp on this server", "op": "dependency", "package": "llama-cpp-python[server]"}],
        ),
        (
            r"^git: command not found",
            "git is required to clone the llama.cpp source tree.",
            [{"label": "install build deps for llama.cpp on this server", "op": "dependency", "package": "llama-cpp-python[server]"}],
        ),
        (
            r"llama-server.*command not found|llama\.cpp.*not found|No module named.*llama_cpp|No module named 'starlette_context'",
            "llama.cpp / llama-cpp-python dependencies are missing.",
            [{"label": "install llama.cpp dependencies or llama-cpp-python[server]", "op": "dependency", "package": "llama-cpp-python[server]"}],
        ),
@@ -189,27 +189,8 @@ def setup_cookbook_routes() -> APIRouter:
                "SGLang is not installed or not in PATH on this server.",
                [{"label": "install SGLang in Cookbook Dependencies", "op": "dependency", "package": "sglang[all]"}],
            ),
            # System build deps come BEFORE the generic llama.cpp catch-all
            # so cmake / build-essential / git missing → a specific OS-package
            # remediation instead of "install llama-cpp-python[server]" (which
            # itself fails to compile when cmake is absent).
            (
-                r"cmake: command not found|cmake.*not found.*[Cc]ould not",
+                r"llama-server.*command not found|llama\.cpp.*not found|No module named.*llama_cpp|No module named 'starlette_context'|git: command not found|cmake: command not found",
                "cmake is required to build llama.cpp from source but isn't installed on this server.",
                [{"label": "install build deps for llama.cpp (apt: cmake build-essential git / pacman: cmake base-devel git / dnf: cmake gcc-c++ make git / brew: cmake git)", "op": "dependency", "package": "llama-cpp-python[server]"}],
            ),
            (
                r"^(make|g\+\+|gcc): command not found|Could not find C\+\+ compiler",
                "A C/C++ compiler (build-essential) is required to build llama.cpp from source.",
                [{"label": "install build deps for llama.cpp on this server", "op": "dependency", "package": "llama-cpp-python[server]"}],
            ),
            (
                r"^git: command not found",
                "git is required to clone the llama.cpp source tree.",
                [{"label": "install build deps for llama.cpp on this server", "op": "dependency", "package": "llama-cpp-python[server]"}],
            ),
            (
                r"llama-server.*command not found|llama\.cpp.*not found|No module named.*llama_cpp|No module named 'starlette_context'",
                "llama.cpp / llama-cpp-python dependencies are missing.",
                [{"label": "install llama.cpp dependencies or llama-cpp-python[server]", "op": "dependency", "package": "llama-cpp-python[server]"}],
            ),
@@ -273,79 +254,6 @@ def setup_cookbook_routes() -> APIRouter:
    def _load_stored_hf_token() -> str:
        return load_stored_hf_token(state_path=_cookbook_state_path)
    def _normalize_minimax_m3_vllm_cmd(cmd: str) -> str:
        """Patch MiniMax M3 vLLM launches into the known-good local form.
        The browser form can be stale or omit advanced-only fields. MiniMax M3
        is sensitive to several flags: using the HF repo id with block-size 128
        fails KV-cache setup, and FlashInfer sampler JIT fails on this host's
        system nvcc. Normalize server-side before writing the tmux runner.
        """
        cmd_lower = (cmd or "").lower()
        if not cmd or "vllm serve" not in cmd_lower or "minimax" not in cmd_lower or "m3" not in cmd_lower:
            return cmd
        try:
            parts = shlex.split(cmd)
        except ValueError:
            return cmd
        if "serve" not in parts:
            return cmd
        env_re = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*=")
        env_parts = [p for p in parts if env_re.match(p)]
        body = [p for p in parts if not env_re.match(p)]
        try:
            serve_i = body.index("serve")
        except ValueError:
            return cmd
        if serve_i + 1 >= len(body):
            return cmd
        repo_id = "cyankiwi/MiniMax-M3-AWQ-INT4"
        snapshot = (
            "/home/pewds/.cache/huggingface/hub/"
            "models--cyankiwi--MiniMax-M3-AWQ-INT4/"
            "snapshots/4082acbbec1236d21828d55b6bb0fe02ade4ab5b"
        )
        if body[serve_i + 1] == repo_id:
            body[serve_i + 1] = snapshot
        def add_env(key: str, value: str) -> None:
            if not any(p.startswith(f"{key}=") for p in env_parts):
                env_parts.append(f"{key}={value}")
        def has_flag(flag: str) -> bool:
            return any(p == flag or p.startswith(flag + "=") for p in body)
        def set_flag(flag: str, value: str) -> None:
            for i, part in enumerate(body):
                if part == flag:
                    if i + 1 < len(body):
                        body[i + 1] = value
                    else:
                        body.append(value)
                    return
                if part.startswith(flag + "="):
                    body[i] = f"{flag}={value}"
                    return
            body.extend([flag, value])
        def add_bool(flag: str) -> None:
            if not has_flag(flag):
                body.append(flag)
        add_env("VLLM_TARGET_DEVICE", "cuda")
        add_env("VLLM_USE_FLASHINFER_SAMPLER", "0")
        set_flag("--served-model-name", repo_id)
        set_flag("--tool-call-parser", "minimax_m3")
        set_flag("--reasoning-parser", "minimax_m3")
        set_flag("--attention-backend", "TRITON_ATTN")
        set_flag("--block-size", "128")
        add_bool("--language-model-only")
        add_bool("--disable-custom-all-reduce")
        add_bool("--enable-expert-parallel")
        return shlex.join(env_parts + body)
    def _cookbook_ssh_dir() -> Path:
        # The Docker image keeps cookbook keys under /app/.ssh; that path only
        # exists inside the container. On Windows (and any non-container host)
@@ -1322,7 +1230,6 @@ def setup_cookbook_routes() -> APIRouter:
        # `TypeError: argument of type 'NoneType'` (a 500 instead of a clean 400).
        req.cmd = _validate_serve_cmd(req.cmd) or ""
        req.cmd = _normalize_llama_cpp_python_cache_types(req.cmd) or ""
        req.cmd = _normalize_minimax_m3_vllm_cmd(req.cmd)
        req.cmd = _venv_safe_local_pip_install_cmd(
            req.cmd,
            local=not bool(req.remote_host),
@@ -1336,16 +1243,8 @@ def setup_cookbook_routes() -> APIRouter:
            req.cmd = _pip_install_no_cache(req.cmd)
            # Accept common aliases and enforce server extras for llama-cpp so
            # `python -m llama_cpp.server` has all runtime dependencies.
-            # CRITICAL: the lookbehind / lookahead must also exclude `/` so
+            req.cmd = re.sub(r"(?<![A-Za-z0-9_.-])llama_cpp(?![A-Za-z0-9_.-])", "llama-cpp-python[server]", req.cmd)
-            # the regex DOESN'T mangle a URL path like
+            req.cmd = re.sub(r"(?<![A-Za-z0-9_.-])llama-cpp-python(?!\[)", "llama-cpp-python[server]", req.cmd)
            #   https://abetlen.github.io/llama-cpp-python/whl/cu124
            # The previous regex turned that URL into
            #   https://abetlen.github.io/llama-cpp-python[server]/whl/cu124
            # which pip then couldn't resolve → silent fallback to source
            # build of the .tar.gz → CPU-only binary (because CMAKE_ARGS
            # isn't set), defeating the entire purpose of the CUDA index.
            req.cmd = re.sub(r"(?<![A-Za-z0-9_.\-/])llama_cpp(?![A-Za-z0-9_.\-/])", "llama-cpp-python[server]", req.cmd)
            req.cmd = re.sub(r"(?<![A-Za-z0-9_.\-/])llama-cpp-python(?![\[/])", "llama-cpp-python[server]", req.cmd)
            if "llama-cpp-python" in req.cmd and "--extra-index-url" not in req.cmd:
                req.cmd += " --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu"
            # PEP-508-style package spec — letters, digits, `.-_` for the
@@ -1385,11 +1284,6 @@ def setup_cookbook_routes() -> APIRouter:
        # LOCAL execution on a native-Windows host never uses tmux (detached
        # process path below), regardless of the UI-supplied platform.
        local_windows = IS_WINDOWS and not remote
        if is_windows and remote and "diffusion_server.py" in req.cmd:
            raise HTTPException(
                400,
                "Remote Windows Diffusers serving is not supported yet; use local Windows or a Linux remote server.",
            )
        if not is_windows and not local_windows and not await _binary_available("tmux", remote, req.ssh_port):
            return {
@@ -1532,69 +1426,6 @@ def setup_cookbook_routes() -> APIRouter:
                runner_lines.append('  else')
                _append_llama_cpp_linux_accel_build_lines(runner_lines)
                runner_lines.append('  fi')
                # Source the env file the prebuilt-download path writes so
                # LD_LIBRARY_PATH includes the directory holding libllama.so
                # and friends. No-op when prebuilt wasn't used.
                runner_lines.append('  [ -r ~/.config/odysseus-llama-cpp-env ] && . ~/.config/odysseus-llama-cpp-env')
                # Auto-upgrade pip llama-cpp-python to the CUDA-enabled
                # wheel when (a) NVIDIA hardware is present and (b) the
                # currently-installed wheel is CPU-only. Without this the
                # user gets the Python server happily running at 3 tok/s
                # because pip's default index ships CPU-only wheels.
                # Forward-compat: cu124 wheels work on driver/runtime
                # 12.4+ including the cu13.x line.
                runner_lines.append('  if command -v nvidia-smi >/dev/null 2>&1 && nvidia-smi -L 2>/dev/null | grep -q "GPU " && python3 -c "import llama_cpp" 2>/dev/null; then')
                runner_lines.append('    if ! python3 -c "import llama_cpp; import sys; sys.exit(0 if llama_cpp.llama_supports_gpu_offload() else 1)" 2>/dev/null; then')
                runner_lines.append('      echo "[odysseus] NVIDIA detected but installed llama-cpp-python is CPU-only — reinstalling with CUDA wheel index for GPU offload..."')
                runner_lines.append('      python3 -m pip install --user --break-system-packages --force-reinstall --no-cache-dir "llama-cpp-python[server]" --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124 2>&1 | tail -8 || echo "[odysseus] WARNING: CUDA wheel reinstall failed — Python server will stay CPU-only (slow). Manual fix: pip install --user --force-reinstall \'llama-cpp-python[server]\' --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124"')
                runner_lines.append('      if python3 -c "import llama_cpp; import sys; sys.exit(0 if llama_cpp.llama_supports_gpu_offload() else 1)" 2>/dev/null; then')
                runner_lines.append('        echo "[odysseus] llama-cpp-python now supports GPU offload."')
                runner_lines.append('      fi')
                runner_lines.append('    fi')
                runner_lines.append('  fi')
                # SHORT-CIRCUIT before the build/pip fallback: if the
                # native binary is missing but llama_cpp Python is already
                # installed, drop a wrapper at ~/bin/llama-server that
                # translates llama-server CLI args to llama_cpp.server's
                # underscore-style flags. The user's serve command stays
                # `llama-server ...` and "just works" — no build, no cmake,
                # no second install. This is the path that unblocks every
                # remote where pip-installed llama-cpp-python is already
                # working but Cookbook used to insist on a native binary.
                runner_lines.append('  if ! command -v llama-server >/dev/null 2>&1 && python3 -c "import llama_cpp" 2>/dev/null; then')
                runner_lines.append('    mkdir -p ~/bin')
                runner_lines.append('    cat > ~/bin/llama-server <<\'_ODY_LLAMA_SHIM_EOF\'')
                runner_lines.append('#!/usr/bin/env bash')
                runner_lines.append('# Auto-generated by Odysseus Cookbook: a `llama-server` lookalike')
                runner_lines.append('# that translates the native CLI to `python -m llama_cpp.server`.')
                runner_lines.append('# Lets cookbook-generated launch commands run unchanged on hosts')
                runner_lines.append('# where only the pip llama-cpp-python package is installed.')
                runner_lines.append('ARGS=()')
                runner_lines.append('while [ $# -gt 0 ]; do')
                runner_lines.append('  case "$1" in')
                runner_lines.append('    -ngl|--gpu-layers|--n-gpu-layers) ARGS+=(--n_gpu_layers "$2"); shift 2 ;;')
                runner_lines.append('    -c|--ctx-size) ARGS+=(--n_ctx "$2"); shift 2 ;;')
                runner_lines.append('    -b|--batch-size) ARGS+=(--n_batch "$2"); shift 2 ;;')
                runner_lines.append('    -ub|--ubatch-size) shift 2 ;;  # llama-cpp-python has no separate ubatch')
                runner_lines.append('    --flash-attn) ARGS+=(--flash_attn true); shift 2 ;;')
                runner_lines.append('    --cache-type-k) ARGS+=(--type_k "$2"); shift 2 ;;')
                runner_lines.append('    --cache-type-v) ARGS+=(--type_v "$2"); shift 2 ;;')
                runner_lines.append('    --n-cpu-moe) ARGS+=(--n_cpu_moe "$2"); shift 2 ;;')
                runner_lines.append('    --mmproj) ARGS+=(--clip_model_path "$2"); shift 2 ;;')
                runner_lines.append('    --image-max-tokens) shift 2 ;;  # native-only')
                runner_lines.append('    --no-mmap) ARGS+=(--no_mmap true); shift ;;')
                runner_lines.append('    --no-warmup) shift ;;  # native-only')
                runner_lines.append('    --chat-template) ARGS+=(--chat_format "$2"); shift 2 ;;')
                runner_lines.append('    --fit|--split-mode|--tensor-split|--main-gpu|--parallel) shift 2 ;;  # native-only')
                runner_lines.append('    --mlock) ARGS+=(--use_mlock true); shift ;;')
                runner_lines.append('    *) ARGS+=("$1"); shift ;;')
                runner_lines.append('  esac')
                runner_lines.append('done')
                runner_lines.append('exec python3 -m llama_cpp.server "${ARGS[@]}"')
                runner_lines.append('_ODY_LLAMA_SHIM_EOF')
                runner_lines.append('    chmod +x ~/bin/llama-server')
                runner_lines.append('    echo "[odysseus] Created llama-server shim → python -m llama_cpp.server (no native binary needed)"')
                runner_lines.append('  fi')
                runner_lines.append('  # If the native build failed, fall back to the Python bindings.')
                runner_lines.append('  if ! command -v llama-server &>/dev/null && ! python3 -c "import llama_cpp" 2>/dev/null; then')
                runner_lines.append('    echo "llama-server build failed — installing Python bindings as fallback..."')
@@ -1658,96 +1489,6 @@ def setup_cookbook_routes() -> APIRouter:
                runner_lines.append('  echo "ERROR: vLLM is not installed."')
                runner_lines.append('  ODYSSEUS_PREFLIGHT_EXIT=127')
                runner_lines.append('fi')
                runner_lines.append(f"ODYSSEUS_SERVE_CMD='{_bash_squote(req.cmd)}'")
                runner_lines.append('if [ -z "$ODYSSEUS_PREFLIGHT_EXIT" ]; then')
                runner_lines.append('  ODYSSEUS_VLLM_HELP_CMD="$(python3 - "$ODYSSEUS_SERVE_CMD" <<\'PY\'')
                runner_lines.append('import shlex, sys')
                runner_lines.append('parts = shlex.split(sys.argv[1])')
                runner_lines.append('try:')
                runner_lines.append('    serve_i = parts.index("serve")')
                runner_lines.append('except ValueError:')
                runner_lines.append('    print("vllm serve --help")')
                runner_lines.append('else:')
                runner_lines.append('    print(shlex.join(parts[:serve_i + 1] + ["--help"]))')
                runner_lines.append('PY')
                runner_lines.append(')"')
                runner_lines.append('  ODYSSEUS_VLLM_SUPPORTS_SWAP=0')
                runner_lines.append('  if eval "$ODYSSEUS_VLLM_HELP_CMD" 2>&1 | grep -q -- "--swap-space"; then ODYSSEUS_VLLM_SUPPORTS_SWAP=1; fi')
                runner_lines.append('fi')
                runner_lines.append('if [ -z "$ODYSSEUS_PREFLIGHT_EXIT" ] && [ "${ODYSSEUS_VLLM_SUPPORTS_SWAP:-0}" = "1" ] && ! printf "%s" "$ODYSSEUS_SERVE_CMD" | grep -q -- "--swap-space"; then')
                runner_lines.append('  echo "[odysseus] Setting vLLM --swap-space 0 so the runtime does not reserve CPU swap per GPU."')
                runner_lines.append('  ODYSSEUS_SERVE_CMD="${ODYSSEUS_SERVE_CMD} --swap-space 0"')
                runner_lines.append('fi')
                runner_lines.append('if [ -z "$ODYSSEUS_PREFLIGHT_EXIT" ] && [ "${ODYSSEUS_VLLM_SUPPORTS_SWAP:-0}" != "1" ]; then')
                runner_lines.append('  if printf "%s" "$ODYSSEUS_SERVE_CMD" | grep -q -- "--swap-space"; then')
                runner_lines.append('    echo "[odysseus] vLLM serve does not expose --swap-space; removing the flag and patching the runtime default to 0."')
                runner_lines.append('    ODYSSEUS_SERVE_CMD="$(python3 - "$ODYSSEUS_SERVE_CMD" <<\'PY\'')
                runner_lines.append('import shlex, sys')
                runner_lines.append('parts = shlex.split(sys.argv[1])')
                runner_lines.append('out = []')
                runner_lines.append('skip = False')
                runner_lines.append('for part in parts:')
                runner_lines.append('    if skip:')
                runner_lines.append('        skip = False')
                runner_lines.append('        continue')
                runner_lines.append('    if part == "--swap-space":')
                runner_lines.append('        skip = True')
                runner_lines.append('        continue')
                runner_lines.append('    if part.startswith("--swap-space="):')
                runner_lines.append('        continue')
                runner_lines.append('    out.append(part)')
                runner_lines.append('print(shlex.join(out))')
                runner_lines.append('PY')
                runner_lines.append(')"')
                runner_lines.append('  fi')
                runner_lines.append('  ODYSSEUS_SERVE_CMD="$(python3 - "$ODYSSEUS_SERVE_CMD" <<\'PY\'')
                runner_lines.append('import shlex, sys')
                runner_lines.append('parts = shlex.split(sys.argv[1])')
                runner_lines.append('patch = r"""import inspect, sys')
                runner_lines.append('from vllm.engine.arg_utils import EngineArgs, AsyncEngineArgs')
                runner_lines.append('def _odysseus_swap0(cls):')
                runner_lines.append('    params = list(inspect.signature(cls).parameters)')
                runner_lines.append('    if "swap_space" not in params:')
                runner_lines.append('        return')
                runner_lines.append('    idx = params.index("swap_space")')
                runner_lines.append('    defaults = list(cls.__init__.__defaults__ or ())')
                runner_lines.append('    if idx < len(defaults):')
                runner_lines.append('        defaults[idx] = 0')
                runner_lines.append('        cls.__init__.__defaults__ = tuple(defaults)')
                runner_lines.append('    fields = getattr(cls, "__dataclass_fields__", {})')
                runner_lines.append('    if "swap_space" in fields:')
                runner_lines.append('        fields["swap_space"].default = 0')
                runner_lines.append('_odysseus_swap0(EngineArgs)')
                runner_lines.append('_odysseus_swap0(AsyncEngineArgs)')
                runner_lines.append('try:')
                runner_lines.append('    from vllm.config import CacheConfig')
                runner_lines.append('    CacheConfig.swap_space = 0')
                runner_lines.append('except Exception:')
                runner_lines.append('    pass')
                runner_lines.append('_orig_create_engine_config = EngineArgs.create_engine_config')
                runner_lines.append('def _odysseus_create_engine_config(self, *args, **kwargs):')
                runner_lines.append('    self.swap_space = 0')
                runner_lines.append('    return _orig_create_engine_config(self, *args, **kwargs)')
                runner_lines.append('EngineArgs.create_engine_config = _odysseus_create_engine_config')
                runner_lines.append('AsyncEngineArgs.create_engine_config = _odysseus_create_engine_config')
                runner_lines.append('from vllm.entrypoints.cli.main import main')
                runner_lines.append('sys.exit(main())"""')
                runner_lines.append('try:')
                runner_lines.append('    serve_i = parts.index("serve")')
                runner_lines.append('except ValueError:')
                runner_lines.append('    print(shlex.join(parts))')
                runner_lines.append('else:')
                runner_lines.append('    exe_i = serve_i - 1')
                runner_lines.append('    exe = parts[exe_i] if exe_i >= 0 else "vllm"')
                runner_lines.append('    py = "python3"')
                runner_lines.append('    if exe.endswith("/bin/vllm"):')
                runner_lines.append('        py = exe[:-len("/bin/vllm")] + "/bin/python"')
                runner_lines.append('    parts[exe_i:serve_i] = [py, "-c", patch]')
                runner_lines.append('    print(shlex.join(parts))')
                runner_lines.append('PY')
                runner_lines.append(')"')
                runner_lines.append('  echo "[odysseus] Patched vLLM internal swap_space default to 0 for this runtime."')
                runner_lines.append('fi')
            elif "sglang.launch_server" in req.cmd:
                runner_lines.append('export PATH="$HOME/.local/bin:$PATH"')
                runner_lines.append('if ! command -v sglang &>/dev/null; then')
@@ -1789,9 +1530,6 @@ def setup_cookbook_routes() -> APIRouter:
                    runner_lines,
                    keep_shell_open=not local_windows,
                )
                if "vllm serve" in req.cmd:
                    runner_lines.append('eval "$ODYSSEUS_SERVE_CMD"')
                else:
                runner_lines.append(req.cmd)
                if local_windows:
                    # Detached background process — no interactive shell to keep open.
@@ -2096,25 +1834,6 @@ def setup_cookbook_routes() -> APIRouter:
        out, err = await _run_gpu_shell("ls -1 /sys/class/drm 2>/dev/null", host, ssh_port, timeout=4)
        if err is not None or not out:
            return []
        # Pick the runtime label up-front so each GPU dict gets the
        # right `backend`. AMD silicon can be driven by ROCm/HIP (native)
        # OR Vulkan (mesa RADV). Reporting "rocm" on a host where no
        # ROCm toolchain is installed misleads the frontend env-var
        # prefix logic — it would emit `HIP_VISIBLE_DEVICES=` for a
        # Vulkan-only stack, which is a silent no-op at best.
        rt_out, _ = await _run_gpu_shell(
            'command -v rocminfo >/dev/null 2>&1 && echo rocm '
            '|| (command -v hipconfig >/dev/null 2>&1 && echo rocm) '
            '|| (command -v vulkaninfo >/dev/null 2>&1 && echo vulkan) '
            '|| echo unknown',
            host, ssh_port, timeout=4,
        )
        _amd_runtime = (rt_out or "").strip().splitlines()[-1:][0].strip() if rt_out else "rocm"
        if _amd_runtime not in ("rocm", "vulkan"):
            # Default to rocm so existing ROCm-installed hosts keep
            # working; "unknown" only happens when neither toolchain is
            # detected (e.g. minimal sysfs read on a fresh box).
            _amd_runtime = "rocm"
        gpus = []
        for entry in out.split():
            if not entry.startswith("card") or "-" in entry:
@@ -2158,7 +1877,7 @@ def setup_cookbook_routes() -> APIRouter:
                "free_mb": free_mb, "total_mb": total_mb, "used_mb": used_mb,
                "gtt_used_mb": gtt_used_mb,
                "util_pct": 0, "busy": bool(total_mb and (free_mb / total_mb) < 0.85),
-                "processes": [], "backend": _amd_runtime, "source": "amd-sysfs",
+                "processes": [], "backend": "rocm", "source": "amd-sysfs",
                "unified_memory": unified,
            })
        if gpus:
@@ -2299,15 +2018,10 @@ def setup_cookbook_routes() -> APIRouter:
        amd_gpus = await _probe_amd_sysfs(host, ssh_port)
        if amd_gpus:
            # The per-GPU dict already carries the runtime label picked by
            # _probe_amd_sysfs (rocm vs vulkan); mirror that into the
            # wrapper so the frontend can read `data.backend` directly
            # without scanning the list.
            _amd_wrap_backend = str(amd_gpus[0].get("backend") or "rocm")
            return {
                "ok": True,
                "gpus": amd_gpus,
-                "backend": _amd_wrap_backend,
+                "backend": "rocm",
                "source": "amd-sysfs",
                "fallback_from": "nvidia-smi",
                "nvidia_error": nvidia_error,
@@ -2447,17 +2161,6 @@ def setup_cookbook_routes() -> APIRouter:
            disk_tasks = on_disk.get("tasks") or [] if isinstance(on_disk, dict) else []
            incoming_tasks = data.get("tasks") if isinstance(data.get("tasks"), list) else []
            incoming_removed = data.get("removedTasks") if isinstance(data.get("removedTasks"), dict) else {}
            disk_removed = on_disk.get("removedTasks") if isinstance(on_disk, dict) and isinstance(on_disk.get("removedTasks"), dict) else {}
            removed_tasks = {**disk_removed, **incoming_removed}
            data["removedTasks"] = removed_tasks
            removed_ids = set(removed_tasks.keys())
            if removed_ids:
                incoming_tasks = [
                    t for t in incoming_tasks
                    if not (isinstance(t, dict) and t.get("sessionId") in removed_ids)
                ]
                data["tasks"] = incoming_tasks
            # Anti-poisoning guard: a stale browser tab can keep POSTing a
            # download task as status='done' from before the strict-finish
            # fix landed, undoing any server-side correction. For each
@@ -2495,8 +2198,6 @@ def setup_cookbook_routes() -> APIRouter:
                sid = t.get("sessionId")
                if not sid or sid in incoming_ids:
                    continue  # client's version wins
                if sid in removed_ids:
                    continue  # intentional cross-device clear/remove
                ts = t.get("ts") or 0
                if isinstance(ts, (int, float)) and (now_ms - ts) <= RACE_WINDOW_MS:
                    preserved.append(t)
@@ -2603,14 +2304,16 @@ def setup_cookbook_routes() -> APIRouter:
            # Add 30% headroom for KV cache, activations, etc.
            needed_vram = (est_vram * 1.3) if est_vram else None
-            if vram_gb > 0:
+            if vram_gb > 0 and needed_vram is not None and needed_vram > vram_gb:
                if needed_vram is None:
                    # The "trending models that fit" list must be conservative:
                    # if we cannot estimate size from the repo id/tags, do not
                    # present it as runnable on this hardware.
                    continue
                if needed_vram > vram_gb:
                continue
            # Unknown-size models (e.g. MiniMax-M2.7, DeepSeek-V4-Flash) have no
            # "NB" in the repo id, so the regex above can't extract their
            # param count. Previously we dropped them entirely, which made
            # brand-new flagship releases silently vanish from this list even
            # on rigs with hundreds of GB of VRAM. Adapters/LoRAs are already
            # filtered by _is_excluded(), so what falls through here is
            # overwhelmingly full models — keep them, just without a size
            # badge (the frontend handles needed_vram_gb=null gracefully).
            out.append({
                "repo_id": repo_id,
@@ -2807,33 +2510,6 @@ def setup_cookbook_routes() -> APIRouter:
            except Exception as e:
                logger.warning(f"orphan sweep: state write failed: {e}")
    @router.get("/api/cookbook/hf-gguf-files")
    async def hf_gguf_files(repo_id: str, owner: str = Depends(require_user)):
        """List GGUF files in a HuggingFace repo for the direct-download picker."""
        import httpx
        repo_id = _validate_repo_id(repo_id)
        url = f"https://huggingface.co/api/models/{repo_id}"
        try:
            headers = {}
            token = _load_stored_hf_token()
            if token:
                headers["Authorization"] = f"Bearer {token}"
            async with httpx.AsyncClient(timeout=15, follow_redirects=True) as client:
                resp = await client.get(url, headers=headers)
                if resp.status_code != 200:
                    return {"ok": False, "files": [], "error": f"HF API HTTP {resp.status_code}"}
                data = resp.json()
        except Exception:
            logger.exception("HF GGUF file scan failed for %s", repo)
            return {"ok": False, "files": [], "error": "HF API request failed"}
        files = [
            str(s.get("rfilename") or "")
            for s in data.get("siblings", [])
            if str(s.get("rfilename") or "").lower().endswith(".gguf")
        ]
        return {"ok": True, "repo_id": repo_id, "files": files}
    # In-memory cache for the Ollama library scrape. ollama.com is a public
    # site, but it doesn't expose a stable JSON listing — we fetch the HTML
    # search page and regex out the model cards. Cached for 1 h so a busy
@@ -102,11 +102,8 @@ def _owner_session_filter(q, user):
    The owner backfill runs in init_db before the app serves requests, so
    by the time this filter is live there are no NULL-owner rows to leak;
-    we therefore match the owner strictly for authenticated callers."""
+    we therefore match the owner strictly."""
-    if not user:
+    if user is None:
        from src.auth_helpers import _auth_disabled
        if user == "" or _auth_disabled():
            return q
        return q.filter(False)
    return q.filter(Document.owner == user)
@@ -503,8 +503,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
        user = get_current_user(request)
        try:
            data = await request.json()
-        except Exception as e:
+        except Exception:
            logger.warning("Failed to parse export request body, defaulting to empty", exc_info=e)
            data = {}
        ids = data.get("ids") or []
        if not ids:
@@ -646,8 +645,8 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
                    try:
                        from src.agent_tools.document_tools import clear_active_document
                        clear_active_document(doc_id)
-                    except Exception as e:
+                    except Exception:
-                        logger.warning("Failed to clear active document %r on detach", doc_id, exc_info=e)
+                        pass
            db.commit()
            db.refresh(doc)
            return _doc_to_dict(doc)
@@ -1332,12 +1331,6 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
            if not pdf_path:
                raise HTTPException(404, f"Source PDF {upload_id} not found")
            # Fail fast with a clear 503 if the optional PyMuPDF dependency
            # is missing — fill_fields/stamp_annotations will otherwise
            # raise RuntimeError deep inside and bubble out as a 500.
            # Mirrors the convention in _load_pdf_viewer_fitz above.
            _load_pdf_viewer_fitz()
            values = parse_markdown_to_values(doc.current_content or "")
            out_path = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False).name
            _to_unlink.append(out_path)
@@ -1233,30 +1233,22 @@ def _list_attachments_from_msg(msg):
        return attachments
    idx = 0
    for part in msg.walk():
        if part.is_multipart():
            continue
        cd = str(part.get("Content-Disposition", ""))
        ct = part.get_content_type()
        is_attached_email = ct == "message/rfc822" and ("attachment" in cd.lower() or part.get_filename())
        if part.is_multipart() and not is_attached_email:
            continue
        # Skip text/html body parts (only consider real attachments)
        if ct in ("text/plain", "text/html") and "attachment" not in cd:
            continue
        filename = part.get_filename()
        if filename:
            filename = _decode_header(filename)
            if ct == "message/rfc822" and not re.search(r"\.[A-Za-z0-9]{1,8}$", filename):
                filename = f"{filename}.eml"
        else:
            # Inline images, etc. - generate a name
-            ext = "eml" if ct == "message/rfc822" else (ct.split("/")[-1] if "/" in ct else "bin")
+            ext = ct.split("/")[-1] if "/" in ct else "bin"
            filename = f"attachment_{idx}.{ext}"
        payload = part.get_payload(decode=True)
-        if payload is None and ct == "message/rfc822":
+        size = len(payload) if payload else 0
            try:
                payload = part.as_bytes()
            except Exception:
                payload = b""
        size = len(payload) if payload is not None else 0
        attachments.append({
            "index": idx,
            "filename": filename,
@@ -1268,58 +1260,29 @@ def _list_attachments_from_msg(msg):
    return attachments
 def _is_likely_signature_image_attachment(att: dict) -> bool:
    """Match the reader's inline signature/logo image filter."""
    filename = str((att or {}).get("filename") or "").lower()
    if not re.search(r"\.(png|jpe?g|gif|bmp|svg|webp)$", filename):
        return False
    size = int((att or {}).get("size") or 0)
    if re.search(r"^image\d{3,}\.(png|jpe?g|gif)$", filename):
        return True
    if re.search(r"^(signature|logo|sig|footer|banner)[-_\d]*\.(png|jpe?g|gif|svg)$", filename):
        return True
    return 0 < size < 30 * 1024
 def _has_visible_attachments(msg) -> bool:
    """Return True only for attachments the reader will render as chips."""
    return any(
        not _is_likely_signature_image_attachment(att)
        for att in _list_attachments_from_msg(msg)
    )
 def _extract_attachment_to_disk(msg, index, target_dir):
    """Extract a specific attachment to disk and return the file path."""
    if not msg.is_multipart():
        return None
    idx = 0
    for part in msg.walk():
        if part.is_multipart():
            continue
        cd = str(part.get("Content-Disposition", ""))
        ct = part.get_content_type()
        is_attached_email = ct == "message/rfc822" and ("attachment" in cd.lower() or part.get_filename())
        if part.is_multipart() and not is_attached_email:
            continue
        if ct in ("text/plain", "text/html") and "attachment" not in cd:
            continue
        if idx == index:
            filename = part.get_filename()
            if filename:
                filename = _decode_header(filename)
                if ct == "message/rfc822" and not re.search(r"\.[A-Za-z0-9]{1,8}$", filename):
                    filename = f"{filename}.eml"
            else:
-                ext = "eml" if ct == "message/rfc822" else (ct.split("/")[-1] if "/" in ct else "bin")
+                ext = ct.split("/")[-1] if "/" in ct else "bin"
                filename = f"attachment_{idx}.{ext}"
            # Sanitize
            safe_name = re.sub(r"[^\w\s\-.]", "_", filename).strip()
            payload = part.get_payload(decode=True)
-            if payload is None and ct == "message/rfc822":
+            if not payload:
                try:
                    payload = part.as_bytes()
                except Exception:
                    payload = b""
            if payload is None:
                return None
            target_dir.mkdir(parents=True, exist_ok=True)
            filepath = target_dir / safe_name
@@ -47,7 +47,7 @@ from routes.email_helpers import (
    _IMAP_TIMEOUT_SECONDS, _open_imap_connection,
    make_oauth_state, verify_oauth_state,
    _imap_connect, _imap, _decode_header, _detect_sent_folder, _detect_drafts_folder,
-    _extract_attachment_text, _list_attachments_from_msg, _has_visible_attachments, _is_likely_signature_image_attachment,
+    _extract_attachment_text, _list_attachments_from_msg,
    _extract_attachment_to_disk, _extract_html, _extract_text,
    _fetch_sender_thread_context, _pre_retrieve_context,
    _EMAIL_REPLY_SYS_PROMPT_BASE, _POOL_HOOKS,
@@ -61,7 +61,6 @@ from routes.email_pollers import _start_poller
 logger = logging.getLogger(__name__)
 ODYSSEUS_MAIL_ORIGIN = "odysseus-ui"
 EMAIL_READ_ATTACHMENT_VERSION = 2
 def _email_tag_owner_aliases(account_id: str | None, owner: str = "") -> list[str]:
@@ -80,16 +79,15 @@ def _email_tag_owner_aliases(account_id: str | None, owner: str = "") -> list[st
                        cfg.get("smtp_user") or "",
                        cfg.get("from_address") or "",
                    ])
-                except Exception as _e:
+                except Exception:
                    logger.warning("Failed to resolve email account alias", exc_info=_e)
                    resolved_account_id = None
            row = db.get(_EA, resolved_account_id) if resolved_account_id else None
            if row:
                aliases.extend([row.owner or "", row.imap_user or "", row.from_address or ""])
        finally:
            db.close()
-    except Exception as _e:
+    except Exception:
-        logger.warning("Failed to load email aliases", exc_info=_e)
+        pass
    out = []
    for a in aliases:
        a = (a or "").strip()
@@ -249,21 +247,6 @@ def _imap_uid_fetch(conn, uid_set: str | bytes, query: str):
    return conn.uid("FETCH", _uid_bytes(uid_set), query)
 def _imap_search_quote(value: str) -> str:
    return '"' + str(value or "").replace("\\", "\\\\").replace('"', '\\"') + '"'
 def _message_id_chain(*values: str) -> list[str]:
    seen = set()
    out = []
    for value in values:
        for mid in re.findall(r"<[^>]+>", value or ""):
            if mid not in seen:
                seen.add(mid)
                out.append(mid)
    return out
 def _uid_from_fetch_meta(meta_b: bytes) -> str:
    m = re.search(rb"\bUID\s+(\d+)\b", meta_b)
    return m.group(1).decode() if m else ""
@@ -382,21 +365,6 @@ def _apply_odysseus_headers(msg, kind: str | None = None, ref_id: str | None = N
        msg["X-Odysseus-Ref"] = re.sub(r"[^A-Za-z0-9_.:-]", "-", ref_id)[:128]
 def _normalize_addr_field(field: str) -> str:
    """Strip the malformed-but-common trailing/leading commas and stray
    whitespace from a To/Cc/Bcc string before it lands in the MIME header
    or the SMTP envelope. Users often paste a single address with a
    trailing comma (e.g. `felix@pewdiepie.com,`) and most MTAs reject the
    resulting `To: felix@pewdiepie.com,` line as a syntax error. Collapse
    any run of separator junk between addresses too."""
    if not field:
        return field
    # Split on commas, drop empty tokens, rejoin with a single ', '.
    parts = [p.strip() for p in field.split(",")]
    parts = [p for p in parts if p]
    return ", ".join(parts)
 def _envelope_recipients(*fields: str) -> list:
    """Extract bare SMTP envelope addresses from one or more To/Cc/Bcc header
    strings. A naive `field.split(",")` corrupts display names that contain a
@@ -1025,65 +993,6 @@ def setup_email_routes():
                except Exception:
                    pass
    def _related_thread_attachments_sync(
        folder: str,
        account_id: str | None,
        owner: str,
        current_uid: str,
        current_message_id: str,
        in_reply_to: str,
        references: str,
        limit: int = 12,
    ) -> list[dict]:
        """Return visible attachments from referenced messages in this folder."""
        wanted_ids = _message_id_chain(references, in_reply_to)
        current_mid = (current_message_id or "").strip()
        wanted_ids = [mid for mid in wanted_ids if mid and mid != current_mid]
        if not wanted_ids:
            return []
        related: list[dict] = []
        try:
            with _imap(account_id, owner=owner) as conn:
                conn.select(_q(folder), readonly=True)
                # Search newest referenced messages first; cap work so opening
                # a long thread stays bounded.
                for mid in reversed(wanted_ids[-10:]):
                    if len(related) >= limit:
                        break
                    status, data = _imap_uid_search(conn, f'(HEADER Message-ID {_imap_search_quote(mid)})')
                    if status != "OK" or not data or not data[0]:
                        continue
                    for uid_b in reversed(data[0].split()[-3:]):
                        source_uid = uid_b.decode(errors="ignore")
                        if not source_uid or source_uid == str(current_uid):
                            continue
                        st2, msg_data = _imap_uid_fetch(conn, source_uid, "(BODY.PEEK[])")
                        if st2 != "OK" or not msg_data or not isinstance(msg_data[0], tuple):
                            continue
                        msg = email_mod.message_from_bytes(msg_data[0][1])
                        source_from = _decode_header(msg.get("From", ""))
                        source_subject = _decode_header(msg.get("Subject", ""))
                        source_date = msg.get("Date", "")
                        for att in _list_attachments_from_msg(msg):
                            if _is_likely_signature_image_attachment(att):
                                continue
                            enriched = dict(att)
                            enriched.update({
                                "source_uid": source_uid,
                                "source_folder": folder,
                                "source_message_id": (msg.get("Message-ID") or "").strip(),
                                "source_from": source_from,
                                "source_subject": source_subject,
                                "source_date": source_date,
                            })
                            related.append(enriched)
                            if len(related) >= limit:
                                break
        except Exception as e:
            logger.debug(f"related thread attachment lookup failed uid={current_uid}: {e}")
        return related
    @router.get("/list")
    async def list_emails(
        folder: str = Query("INBOX"),
@@ -1354,17 +1263,6 @@ def setup_email_routes():
            sender_name, sender_addr = email.utils.parseaddr(sender)
            parsed_date = email.utils.parsedate_to_datetime(date_str) if date_str else None
            attachments = _list_attachments_from_msg(msg)
            related_attachments = []
            if not _has_visible_attachments(msg):
                related_attachments = _related_thread_attachments_sync(
                    folder,
                    account_id,
                    owner,
                    uid,
                    message_id,
                    in_reply_to,
                    references,
                )
            if mark_seen:
                # Set \Seen in a separate readwrite session so concurrent reads
@@ -1473,8 +1371,6 @@ def setup_email_routes():
                "body": body,
                "body_html": body_html,
                "attachments": attachments,
                "related_attachments": related_attachments,
                "attachment_version": EMAIL_READ_ATTACHMENT_VERSION,
                "cached_summary": cached_summary,
                "cached_ai_reply": cached_ai_reply,
                "boundaries": cached_boundaries,
@@ -1505,12 +1401,6 @@ def setup_email_routes():
        """Read email body. Cached for 30m, sync IMAP work runs in a thread."""
        ck = _read_cache_key(account_id, folder, uid, owner=owner)
        cached = _read_cache_get(ck)
        if cached is not None:
            # Older cached read responses lack the thread-attachment fallback.
            # Fetch once so replies that reference prior attachments can show
            # those files without waiting for cache expiry.
            if cached.get("attachment_version") != EMAIL_READ_ATTACHMENT_VERSION:
                cached = None
        if cached is not None:
            if mark_seen:
                try:
@@ -1645,12 +1535,6 @@ def setup_email_routes():
                return {"error": f"Attachment index {index} not found"}
            from pathlib import Path as _Path
            target_root = os.path.abspath(str(target_dir))
            filepath_str = os.path.abspath(str(filepath))
            if os.path.commonpath([target_root, filepath_str]) != target_root:
                logger.warning("Rejected attachment path outside extraction dir: %s", filepath)
                return {"error": "Invalid attachment path"}
            filepath = _Path(filepath_str)
            base = _Path(filepath).name
            if base.startswith("."):
                return {"error": "Invalid filename", "filename": base}
@@ -1705,65 +1589,6 @@ def setup_email_routes():
                    return None
            doc_session_id = _resolve_doc_session()
            def _create_markdown_doc(content: str, summary: str):
                from src.database import SessionLocal as _SL, Document as _Doc, DocumentVersion as _DV
                doc_id = str(uuid.uuid4())
                ver_id = str(uuid.uuid4())
                _db = _SL()
                try:
                    _db.query(_Doc).filter(_Doc.is_active == True).update({"is_active": False})
                    _db.add(_Doc(
                        id=doc_id, session_id=doc_session_id, title=title,
                        language="markdown", current_content=content,
                        version_count=1, is_active=True,
                    ))
                    _db.add(_DV(
                        id=ver_id, document_id=doc_id, version_number=1,
                        content=content, summary=summary, source="upload",
                    ))
                    _db.commit()
                finally:
                    _db.close()
                _tag_doc_with_source(doc_id)
                return doc_id
            def _attached_email_markdown(raw_bytes: bytes):
                if not raw_bytes:
                    return f"# Attached email: {base}\n\n_(empty email attachment)_"
                try:
                    attached_msg = email_mod.message_from_bytes(raw_bytes)
                except Exception:
                    logger.exception("Failed to parse attached email %s", base)
                    return f"# Attached email: {base}\n\nCould not parse this email attachment."
                attached_subject = _decode_header(attached_msg.get("Subject", "")) or base
                attached_from = _decode_header(attached_msg.get("From", ""))
                attached_to = _decode_header(attached_msg.get("To", ""))
                attached_cc = _decode_header(attached_msg.get("Cc", ""))
                attached_date = attached_msg.get("Date", "")
                attached_body = _extract_text(attached_msg).strip()
                attached_atts = _list_attachments_from_msg(attached_msg)
                lines = [f"# Attached email: {attached_subject}", ""]
                if attached_from:
                    lines.append(f"**From:** {attached_from}")
                if attached_to:
                    lines.append(f"**To:** {attached_to}")
                if attached_cc:
                    lines.append(f"**Cc:** {attached_cc}")
                if attached_date:
                    lines.append(f"**Date:** {attached_date}")
                lines.extend(["", "## Body", "", attached_body or "_(no readable body)_"])
                if attached_atts:
                    lines.extend(["", "## Attachments", ""])
                    for att in attached_atts:
                        size = int(att.get("size") or 0)
                        size_label = f"{size} B" if size < 1024 else f"{round(size / 1024)} KB"
                        name = att.get("filename") or f"attachment_{att.get('index', '')}"
                        ctype = att.get("content_type") or "application/octet-stream"
                        lines.append(f"- {name} ({ctype}, {size_label})")
                return "\n".join(lines).strip()
            # ── PDF path (existing) ────────────────────────────────────
            if ext == ".pdf":
                import shutil as _shutil
@@ -1810,39 +1635,6 @@ def setup_email_routes():
                _tag_doc_with_source(doc_id)
                return {"doc_id": doc_id, "filename": filepath.name}
            # ── Attached email (.eml / message/rfc822) ────────────────
            if ext == ".eml":
                def _attachment_bytes_from_msg():
                    if not msg.is_multipart():
                        return b""
                    idx = 0
                    for part in msg.walk():
                        cd = str(part.get("Content-Disposition", ""))
                        ct = part.get_content_type()
                        is_attached_email = ct == "message/rfc822" and ("attachment" in cd.lower() or part.get_filename())
                        if part.is_multipart() and not is_attached_email:
                            continue
                        if ct in ("text/plain", "text/html") and "attachment" not in cd:
                            continue
                        if idx == index:
                            payload = part.get_payload(decode=True)
                            if payload is None and ct == "message/rfc822":
                                try:
                                    payload = part.as_bytes()
                                except Exception:
                                    payload = b""
                            return payload or b""
                        idx += 1
                    return b""
                try:
                    content = _attached_email_markdown(_attachment_bytes_from_msg())
                except Exception:
                    logger.exception("Failed to read email attachment %s", base)
                    return {"error": "Failed to read email attachment", "filename": base}
                doc_id = _create_markdown_doc(content, "Imported attached email")
                return {"doc_id": doc_id, "filename": filepath.name}
            # ── DOCX path: extract text → markdown document ───────────
            if ext == ".docx":
                try:
@@ -1880,7 +1672,25 @@ def setup_email_routes():
                    lines.append("")
                content = "\n".join(lines).strip() or f"_(empty {base})_"
-                doc_id = _create_markdown_doc(content, "Imported from DOCX")
+                from src.database import SessionLocal as _SL, Document as _Doc, DocumentVersion as _DV
                doc_id = str(uuid.uuid4())
                ver_id = str(uuid.uuid4())
                _db = _SL()
                try:
                    _db.query(_Doc).filter(_Doc.is_active == True).update({"is_active": False})
                    _db.add(_Doc(
                        id=doc_id, session_id=doc_session_id, title=title,
                        language="markdown", current_content=content,
                        version_count=1, is_active=True,
                    ))
                    _db.add(_DV(
                        id=ver_id, document_id=doc_id, version_number=1,
                        content=content, summary="Imported from DOCX", source="upload",
                    ))
                    _db.commit()
                finally:
                    _db.close()
                _tag_doc_with_source(doc_id)
                return {"doc_id": doc_id, "filename": filepath.name}
            # ── Plain text / markdown ────────────────────────────────
@@ -1889,7 +1699,25 @@ def setup_email_routes():
                    content = filepath.read_text(encoding="utf-8", errors="replace")
                except Exception as e:
                    return {"error": f"Failed to read text file: {e}", "filename": base}
-                doc_id = _create_markdown_doc(content, "Imported from email attachment")
+                from src.database import SessionLocal as _SL, Document as _Doc, DocumentVersion as _DV
                doc_id = str(uuid.uuid4())
                ver_id = str(uuid.uuid4())
                _db = _SL()
                try:
                    _db.query(_Doc).filter(_Doc.is_active == True).update({"is_active": False})
                    _db.add(_Doc(
                        id=doc_id, session_id=doc_session_id, title=title,
                        language="markdown", current_content=content,
                        version_count=1, is_active=True,
                    ))
                    _db.add(_DV(
                        id=ver_id, document_id=doc_id, version_number=1,
                        content=content, summary="Imported from email attachment", source="upload",
                    ))
                    _db.commit()
                finally:
                    _db.close()
                _tag_doc_with_source(doc_id)
                return {"doc_id": doc_id, "filename": filepath.name}
            return {"error": f"Unsupported attachment type: {ext}", "filename": base}
@@ -2198,9 +2026,6 @@ def setup_email_routes():
            outer = MIMEMultipart("alternative")
            body_container = outer
        to = _normalize_addr_field(to or "")
        cc = _normalize_addr_field(cc or "")
        bcc = _normalize_addr_field(bcc or "")
        outer["From"] = email.utils.formataddr((cfg.get("display_name") or "", cfg["from_address"]))
        outer["To"] = to
        if cc:
@@ -2345,10 +2170,12 @@ def setup_email_routes():
        try:
            conn = sqlite3.connect(SCHEDULED_DB)
            conn.row_factory = sqlite3.Row
            # The MCP server can't easily set owner, so it stores '' — fall
            # back to those rows in addition to the caller's owner.
            rows = conn.execute(
                """SELECT id, to_addr, subject, body, created_at, account_id
                   FROM scheduled_emails
-                   WHERE status = 'agent_draft' AND owner = ?
+                   WHERE status = 'agent_draft' AND (owner = ? OR owner = '')
                   ORDER BY created_at DESC""",
                (owner or "",),
            ).fetchall()
@@ -2369,7 +2196,7 @@ def setup_email_routes():
            cur = conn.execute(
                """UPDATE scheduled_emails
                   SET status = 'pending', send_at = ?
-                   WHERE id = ? AND status = 'agent_draft' AND owner = ?""",
+                   WHERE id = ? AND status = 'agent_draft' AND (owner = ? OR owner = '')""",
                (datetime.utcnow().isoformat(), sid, owner or ""),
            )
            conn.commit()
@@ -2390,7 +2217,7 @@ def setup_email_routes():
            conn = sqlite3.connect(SCHEDULED_DB)
            cur = conn.execute(
                """UPDATE scheduled_emails SET status = 'cancelled'
-                   WHERE id = ? AND status = 'agent_draft' AND owner = ?""",
+                   WHERE id = ? AND status = 'agent_draft' AND (owner = ? OR owner = '')""",
                (sid, owner or ""),
            )
            conn.commit()
@@ -2476,9 +2303,6 @@ def setup_email_routes():
            outer = MIMEMultipart("alternative")
            body_container = outer
        req.to = _normalize_addr_field(req.to or "")
        req.cc = _normalize_addr_field(req.cc or "")
        req.bcc = _normalize_addr_field(req.bcc or "")
        outer["From"] = email.utils.formataddr((cfg.get("display_name") or "", cfg["from_address"]))
        outer["To"] = req.to
        if req.cc:
@@ -9,7 +9,6 @@ from pathlib import Path
 from fastapi import APIRouter, HTTPException, Form, Depends
 from core.constants import EMBEDDING_ENDPOINT_FILE, FASTEMBED_CACHE_DIR
 from core.middleware import require_admin
 from src.runtime_paths import get_app_root
 logger = logging.getLogger(__name__)
@@ -67,6 +67,14 @@ def _gallery_image_path(filename: str) -> Path:
        raise HTTPException(400, "Unsafe gallery filename")
    if safe_name != original:
        raise HTTPException(400, "Unsafe gallery filename")
    if not path.exists():
        cwd_root = (Path.cwd() / "data" / "generated_images").resolve()
        cwd_path = (cwd_root / safe_name).resolve()
        try:
            if os.path.commonpath([str(cwd_root), str(cwd_path)]) == str(cwd_root) and cwd_path.exists():
                return cwd_path
        except Exception:
            pass
    return path
@@ -1,13 +1,8 @@
 import json
 import os
 import re
 import shlex
 import subprocess
 from copy import deepcopy
 from fastapi import APIRouter, HTTPException
 from core.platform_compat import run_ssh_command
 from routes._validators import validate_remote_host, validate_ssh_port
@@ -112,73 +107,6 @@ def _apply_manual_hardware(system, manual_mode="", manual_gpu_count="", manual_v
    return system
 def _run_model_probe(host: str, ssh_port: str, cmd: str) -> str:
    try:
        if host:
            r = run_ssh_command(
                host,
                ssh_port or None,
                cmd,
                timeout=15,
                connect_timeout=5,
                strict_host_key_checking=False,
                text=True,
            )
        else:
            r = subprocess.run(["bash", "-lc", cmd], capture_output=True, text=True, timeout=15)
        if r.returncode == 0:
            return (r.stdout or "").strip()
    except Exception:
        return ""
    return ""
 def _inspect_model_path(model_path: str, host: str = "", ssh_port: str = "") -> dict:
    """Read lightweight metadata from a local or SSH-visible HF model folder."""
    path = (model_path or "").strip()
    if not path or path.startswith(("http://", "https://")):
        return {}
    if not (path.startswith("/") or path.startswith("~")):
        return {}
    qpath = shlex.quote(path)
    qconfig = shlex.quote(os.path.join(path, "config.json"))
    out = {}
    exists = _run_model_probe(host, ssh_port, f"test -d {qpath} && printf found || printf missing")
    if exists != "found":
        target = host or "local container"
        out["model_probe_error"] = f"Model path is not visible on {target}: {path}"
        return out
    raw_config = _run_model_probe(host, ssh_port, f"test -f {qconfig} && sed -n '1,240p' {qconfig}")
    if raw_config:
        try:
            cfg = json.loads(raw_config)
        except Exception:
            cfg = {}
        for key in ("context_length", "max_position_embeddings", "n_ctx_train", "model_max_length", "max_seq_len"):
            value = cfg.get(key)
            if isinstance(value, (int, float)) and value > 0:
                out["model_ctx_max"] = int(value)
                break
    else:
        out["model_probe_error"] = f"config.json not found in model path: {path}"
    size_cmd = (
        f"find {qpath} -type f \\( -name '*.safetensors' -o -name '*.bin' -o -name '*.gguf' \\) "
        "-printf '%s\\n' 2>/dev/null | awk '{s+=$1} END {if (s>0) printf \"%.6f\", s/1073741824}'"
    )
    weights = _run_model_probe(host, ssh_port, size_cmd)
    try:
        weights_gb = float(weights)
    except Exception:
        weights_gb = 0.0
    if weights_gb > 0:
        out["model_weights_gb"] = round(weights_gb, 3)
    elif "model_probe_error" not in out:
        out["model_probe_error"] = f"No model weight files found in: {path}"
    return out
 def setup_hwfit_routes():
    router = APIRouter(prefix="/api/hwfit", tags=["hwfit"])
@@ -307,7 +235,7 @@ def setup_hwfit_routes():
        return {"system": system, "models": results}
    @router.get("/profiles")
-    def get_serve_profiles(model: str = "", model_path: str = "", host: str = "", ssh_port: str = "", platform: str = "", fresh: bool = False, serve_weights_gb: float = 0.0, serve_quant: str = ""):
+    def get_serve_profiles(model: str = "", host: str = "", ssh_port: str = "", platform: str = "", fresh: bool = False, serve_weights_gb: float = 0.0, serve_quant: str = ""):
        """Compute llama.cpp serve profiles (Quality/Balanced/Speed) for `model`
        against the detected hardware on `host` (or local). Returns concrete
        flags (n_gpu_layers, n_cpu_moe, cache_type, ctx) the serve UI can apply.
@@ -332,23 +260,8 @@ def setup_hwfit_routes():
            # "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct".
            s = (s or "").lower().strip()
            s = s.split("/")[-1]                     # drop org prefix
-            for suffix in ("-gguf", "_gguf", ".gguf", "gguf"):
+            s = re.sub(r"[-_.]?gguf$", "", s)        # drop trailing gguf marker
-                if s.endswith(suffix):
+            s = re.sub(r"[-_.](q\d[^/]*|iq\d[^/]*|fp8|bf16|f16|awq[^/]*|gptq[^/]*)$", "", s)
                    s = s[: -len(suffix)]
                    break
            cut_at = None
            for idx, ch in enumerate(s):
                if ch not in "-_." or idx + 1 >= len(s):
                    continue
                suffix = s[idx + 1:]
                if (
                    suffix in {"fp8", "bf16", "f16"}
                    or suffix.startswith(("awq", "gptq", "iq"))
                    or (suffix.startswith("q") and len(suffix) > 1 and suffix[1].isdigit())
                ):
                    cut_at = idx
            if cut_at is not None:
                s = s[:cut_at]
            return s
        m = catalog.get(model)
@@ -359,16 +272,8 @@ def setup_hwfit_routes():
                if nn and (nn == want or want.endswith(nn) or nn.endswith(want)):
                    m = entry
                    break
        path_meta = _inspect_model_path(model_path or model, host=host, ssh_port=ssh_port)
        if m is None:
-            return {
+            return {"system": system, "profiles": [], "error": "model not in catalog"}
                "system": system,
                "profiles": [],
                "error": "model not in catalog",
                "model_ctx_max": int(path_meta.get("model_ctx_max") or 0),
                "model_weights_gb": float(path_meta.get("model_weights_gb") or 0),
                "model_probe_error": path_meta.get("model_probe_error") or "",
            }
        # Surface the model's trained context limit so the serve UI can clamp a
        # user-typed context down to it (asking for ctx > n_ctx_train overflows
        # and, with a quantized KV cache, can crash the GPU).
@@ -378,16 +283,6 @@ def setup_hwfit_routes():
            if isinstance(v, (int, float)) and v > 0:
                model_ctx_max = int(v)
                break
        path_ctx_max = int(path_meta.get("model_ctx_max") or 0)
        if path_ctx_max > 0:
            model_ctx_max = max(model_ctx_max, path_ctx_max)
        model_weights_gb = float(path_meta.get("model_weights_gb") or 0)
        if model_weights_gb <= 0:
            for k in ("min_vram_gb", "required_gb", "size_gb", "recommended_ram_gb", "min_ram_gb"):
                v = m.get(k)
                if isinstance(v, (int, float)) and v > 0:
                    model_weights_gb = float(v)
                    break
        return {
            "system": system,
            "profiles": compute_serve_profiles(
@@ -396,8 +291,6 @@ def setup_hwfit_routes():
                serve_quant=(serve_quant or None),
            ),
            "model_ctx_max": model_ctx_max,
            "model_weights_gb": model_weights_gb,
            "model_probe_error": path_meta.get("model_probe_error") or "",
        }
    @router.get("/image-models")
@@ -273,30 +273,65 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
    async def api_audit_memories(request: Request, session: str = Form(None)):
        """Deduplicate and consolidate memories via LLM.
-        Uses task/utility/default settings through the shared resolver, with
+        Uses the default model from settings, or falls back to a session's model.
        the active session as fallback when no task or utility model is set.
        Returns before and after memory counts.
        """
        from routes.model_routes import _load_settings, _normalize_base, build_chat_url
        from core.database import ModelEndpoint
        import json as _json
        endpoint_url = model = None
        headers = {}
        # Try utility model from settings first — memory audit is a background
        # task and should prefer the lighter utility model over the main chat model.
        from src.task_endpoint import resolve_task_endpoint
        user = _owner(request)
-        fallback_url = fallback_model = None
+        t_url, t_model, t_headers = resolve_task_endpoint(owner=user)
-        fallback_headers = None
+        if t_url and t_model:
-        if session:
+            endpoint_url, model, headers = t_url, t_model, t_headers
        else:
            # Fall back to default model if no task/utility model configured
            settings = _load_settings()
            ep_id = settings.get("default_endpoint_id", "")
            default_model = settings.get("default_model", "")
            if ep_id:
                db = SessionLocal()
                try:
                    ep = db.query(ModelEndpoint).filter(
                        ModelEndpoint.id == ep_id, ModelEndpoint.is_enabled == True
                    ).first()
                    if ep:
                        base = _normalize_base(ep.base_url)
                        endpoint_url = build_chat_url(base)
                        model = default_model
                        if not model and ep.models:
                            try:
                                models = _json.loads(ep.models) if isinstance(ep.models, str) else ep.models
                                if models:
                                    model = models[0]
                            except Exception:
                                pass
                        if ep.api_key:
                            headers = {"Authorization": f"Bearer {ep.api_key}"}
                finally:
                    db.close()
            # Fall back to session model if no default configured
            if not endpoint_url and session:
                try:
                    sess = session_manager.get_session(session)
-                _assert_session_owner(sess, user)
+                    _assert_session_owner(sess, _owner(request))
-                fallback_url = sess.endpoint_url
+                    endpoint_url = sess.endpoint_url
-                fallback_model = sess.model
+                    model = sess.model
-                fallback_headers = sess.headers
+                    headers = sess.headers
                except KeyError:
                    pass
        endpoint_url, model, headers = resolve_task_endpoint(
            fallback_url, fallback_model, fallback_headers, owner=user
        )
        if not endpoint_url or not model:
            raise HTTPException(400, "No default model configured — set one in Settings")
        user = _owner(request)
        result = await audit_memories(
            memory_manager,
            memory_vector,
@@ -334,28 +369,18 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
        model = None
        headers = {}
        user = _owner(request)
        if session:
            try:
                sess = session_manager.get_session(session)
-                _assert_session_owner(sess, user)
+                _assert_session_owner(sess, _owner(request))
            except KeyError:
                sess = None
            except HTTPException as exc:
                if exc.status_code != 404:
                    raise
                sess = None
            if sess is None:
                logger.warning("Session %s not found or inaccessible, falling back to utility endpoint", session)
                endpoint_url, model, headers = resolve_endpoint("utility", owner=user)
            else:
                endpoint_url, model, headers = resolve_task_endpoint(
-                    sess.endpoint_url, sess.model, sess.headers, owner=user
+                    sess.endpoint_url, sess.model, sess.headers, owner=_owner(request)
                )
            except KeyError:
                logger.warning("Session %s not found, falling back to utility endpoint", session)
                endpoint_url, model, headers = resolve_endpoint("utility", owner=_owner(request))
        else:
-            endpoint_url, model, headers = resolve_task_endpoint(owner=user)
+            endpoint_url, model, headers = resolve_task_endpoint(owner=_owner(request))
        if not endpoint_url or not model:
            raise HTTPException(400, "No LLM model configured. Set a default model in Settings.")
@@ -5,7 +5,6 @@ import re
 import uuid
 import json
 import hashlib
 import ipaddress
 import socket
 import time as _time
 import logging
@@ -406,11 +405,8 @@ def _endpoint_refresh_timeout(ep: Any, category: str) -> float:
    except Exception:
        val = 0
    if val > 0:
-        return float(max(1, min(60, val)))
+        return float(max(1, min(30, val)))
-    # llama.cpp and other local OpenAI-compatible servers can block briefly
+    return 2.5 if category == "local" else 2.0
    # while warming/loading. A 2s local timeout makes working endpoints flicker
    # offline before /v1/models is ready.
    return 10.0 if category == "local" else 2.0
 def _manual_refresh_timeout(ep: Any, category: str, requested: Any = None) -> float:
@@ -477,7 +473,7 @@ def _explicit_model_list_timeout(base_url: str, endpoint_kind: str = "auto", req
    category = _classify_endpoint(base_url, kind)
    if kind in ("api", "proxy") or category == "api":
        return 30.0
-    return 15.0 if category == "local" else (3.0 if _is_ollama_base(base_url) else 2.0)
+    return 3.0 if _is_ollama_base(base_url) else 2.0
 def _cached_model_ids(ep: Any) -> List[str]:
@@ -566,8 +562,6 @@ def _safe_build_models_url(base_url: str) -> str:
    """Build a /models URL without letting optional provider imports break probes."""
    try:
        return build_models_url(base_url)
    except ValueError:
        raise
    except Exception as exc:
        logger.debug("Model URL detection failed for %s: %s", base_url, exc)
        return f"{(base_url or '').rstrip('/')}/models"
@@ -582,18 +576,6 @@ def _safe_build_headers(api_key: Optional[str], base_url: str) -> dict:
        return {"Authorization": f"Bearer {api_key}"} if api_key else {}
 def _redact_url_for_log(url: str) -> str:
    """Return a URL safe for logs by removing userinfo and query/fragment."""
    try:
        parsed = urlparse(url or "")
        host = parsed.hostname or ""
        if parsed.port:
            host = f"{host}:{parsed.port}"
        return urlunparse((parsed.scheme, host, parsed.path, "", "", ""))
    except Exception:
        return "<endpoint>"
 def _is_discovery_only_provider(provider: str) -> bool:
    return provider == "chatgpt-subscription"
@@ -651,7 +633,7 @@ def _probe_single_model(base: str, api_key: str, model_id: str, timeout: int = 1
    try:
        t0 = _time.time()
-        r = httpx.post(target_url, headers=h, json=payload, timeout=timeout, verify=llm_verify())
+        r = httpx.post(target_url, headers=h, json=payload, timeout=timeout)
        latency = round((_time.time() - t0) * 1000)
        if r.is_success:
            return {"status": "ok", "latency_ms": latency}
@@ -677,20 +659,13 @@ def _probe_single_model(base: str, api_key: str, model_id: str, timeout: int = 1
 # Hostnames / IP prefixes that indicate a local endpoint
 _LOCAL_HOSTS = {"localhost", "127.0.0.1", "0.0.0.0", "::1"}
-_PRIVATE_NETWORKS = (
+_PRIVATE_PREFIXES = ("10.", "172.16.", "172.17.", "172.18.", "172.19.",
-    ipaddress.ip_network("10.0.0.0/8"),
+                     "172.20.", "172.21.", "172.22.", "172.23.", "172.24.",
-    ipaddress.ip_network("172.16.0.0/12"),
+                     "172.25.", "172.26.", "172.27.", "172.28.", "172.29.",
-    ipaddress.ip_network("192.168.0.0/16"),
+                     "172.30.", "172.31.", "192.168.")
 )
 _TAILSCALE_CGNAT = ipaddress.ip_network("100.64.0.0/10")
-def _local_ip_literal(host: str) -> bool:
+_TAILSCALE_RE = re.compile(r"^100\.(6[4-9]|[7-9]\d|1[01]\d|12[0-7])\.")
    try:
        ip = ipaddress.ip_address(host)
    except ValueError:
        return False
    return any(ip in network for network in _PRIVATE_NETWORKS) or ip in _TAILSCALE_CGNAT
 def _classify_endpoint(base_url: str, endpoint_kind: str = "auto") -> str:
@@ -704,7 +679,9 @@ def _classify_endpoint(base_url: str, endpoint_kind: str = "auto") -> str:
        return "api"
    try:
        host = urlparse(base_url).hostname or ""
-        if host in _LOCAL_HOSTS or _local_ip_literal(host):
+        if host in _LOCAL_HOSTS or host.startswith(_PRIVATE_PREFIXES):
            return "local"
        if _TAILSCALE_RE.match(host):
            return "local"
    except Exception:
        pass
@@ -726,16 +703,6 @@ def _effective_endpoint_kind(ep: Any, base_url: str) -> str:
    return "auto"
 def _is_loading_model_response(resp: Any) -> bool:
    if getattr(resp, "status_code", None) != 503:
        return False
    try:
        body = resp.text or ""
    except Exception:
        body = ""
    return "loading model" in body.lower()
 def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> List[str]:
    """Probe a base URL's /models endpoint and return list of model IDs.
@@ -800,14 +767,11 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
                        models.append(_e)
            return [m for m in models if _is_chat_model(m)]
    except httpx.HTTPStatusError as e:
        if e.response is not None and _is_loading_model_response(e.response):
            logger.info("Endpoint still loading model at %s", _redact_url_for_log(url))
            return []
        if api_key:
            status = e.response.status_code if e.response is not None else "unknown"
-            logger.warning("Failed to probe %s with API key: HTTP %s", _redact_url_for_log(url), status)
+            logger.warning(f"Failed to probe {url} with API key: HTTP {status}")
            return []
-        logger.warning("Failed to probe %s: %s", _redact_url_for_log(url), e)
+        logger.warning(f"Failed to probe {url}: {e}")
    except Exception as e:
        if api_key:
            logger.warning(f"Failed to probe {url} with API key: {e}")
@@ -852,15 +816,6 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
        or "ollama" in (parsed_base.hostname or "").lower()
    )
    def _is_loading_model_response(r) -> bool:
        if getattr(r, "status_code", None) != 503:
            return False
        try:
            body = r.text or ""
        except Exception:
            body = ""
        return "loading model" in body.lower()
    def _result_from_response(r) -> Dict[str, Any]:
        if 300 <= r.status_code < 400:
            loc = r.headers.get("location", "")
@@ -877,13 +832,6 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
                "status_code": r.status_code,
                "error": None,
            }
        if _is_loading_model_response(r):
            return {
                "reachable": True,
                "loading": True,
                "status_code": r.status_code,
                "error": "Loading model",
            }
        return {"reachable": False, "status_code": r.status_code, "error": f"HTTP {r.status_code}"}
    last_error: Optional[str] = None
@@ -1100,11 +1048,9 @@ def setup_model_routes(model_discovery):
        except Exception:
            return 0.0
-    def _failure_delay(fails: int, *, empty_local: bool = False) -> float:
+    def _failure_delay(fails: int) -> float:
        if fails <= 0:
            return 0.0
        if empty_local:
            return min(5.0 * (2 ** max(0, fails - 1)), 30.0)
        return min(_REFRESH_FAILURE_BASE * (2 ** max(0, fails - 1)), _REFRESH_FAILURE_MAX)
    def _should_refresh_endpoint(ep: Any, now: float, force: bool = False) -> tuple[bool, Dict[str, Any]]:
@@ -1135,12 +1081,7 @@ def setup_model_routes(model_discovery):
        fails = int(state.get("fail_count") or 0)
        if fails and not force:
            last_failure = float(state.get("last_failure") or 0.0)
-            empty_local = (
+            if now - last_failure < _failure_delay(fails):
                not cached
                and category == "local"
                and str(getattr(ep, "id", "") or "").startswith("local-")
            )
            if now - last_failure < _failure_delay(fails, empty_local=empty_local):
                return False, info
        if cached and not force:
            interval = _endpoint_refresh_interval(ep, category)
@@ -1455,7 +1396,7 @@ def setup_model_routes(model_discovery):
                t0 = _time.time()
                ping = _ping_endpoint(base, ep.api_key, timeout=1.5)
                entry["latency_ms"] = round((_time.time() - t0) * 1000)
-                entry["status"] = "loading" if ping.get("loading") else ("online" if ping.get("reachable") or cached_count else "offline")
+                entry["status"] = "online" if ping.get("reachable") or cached_count else "offline"
                entry["error"] = ping.get("error")
                entry["model_count"] = cached_count or (len(ANTHROPIC_MODELS) if provider == "anthropic" else 0)
            except Exception as e:
@@ -1629,37 +1570,9 @@ def setup_model_routes(model_discovery):
                # "everything's already cached" path because this branch only
                # runs for endpoints with an empty cached_models.
                if not all_models and not pinned and r.is_enabled:
-                    base_for_ping = _normalize_base(r.base_url)
+                    ping = _ping_endpoint(r.base_url, r.api_key, timeout=3.5)
                    kind_for_ping = _effective_endpoint_kind(r, base_for_ping)
                    ping_timeout = 10.0 if _classify_endpoint(base_for_ping, kind_for_ping) == "local" else 3.5
                    ping = _ping_endpoint(r.base_url, r.api_key, timeout=ping_timeout)
                    if ping.get("reachable"):
-                        status = "loading" if ping.get("loading") else "empty"
+                        status = "empty"
                        if ping.get("loading"):
                            base = _normalize_base(r.base_url)
                            kind = _effective_endpoint_kind(r, base)
                            results.append({
                                "id": r.id,
                                "name": r.name,
                                "base_url": r.base_url,
                                "has_key": bool(r.api_key),
                                "api_key_fingerprint": _api_key_fingerprint(r.api_key),
                                "is_enabled": r.is_enabled,
                                "models": visible,
                                "pinned_models": pinned,
                                "hidden_count": len(hidden),
                                "online": True,
                                "status": status,
                                "ping_error": (ping or {}).get("error") if ping else None,
                                "model_type": getattr(r, "model_type", None) or "llm",
                                "supports_tools": getattr(r, "supports_tools", None),
                                "endpoint_kind": kind,
                                "category": _classify_endpoint(base, kind),
                                "model_refresh_mode": _endpoint_refresh_mode(r, kind),
                                "model_refresh_interval": getattr(r, "model_refresh_interval", None),
                                "model_refresh_timeout": getattr(r, "model_refresh_timeout", None),
                            })
                            continue
                        # Best-effort: if the probe came back reachable, try
                        # to populate cached_models in the background so the
                        # NEXT picker load shows "online" instead of "empty".
@@ -1667,7 +1580,7 @@ def setup_model_routes(model_discovery):
                        # "empty" status, and the existing background refresh
                        # path will eventually fill it in too.
                        try:
-                            probed = _probe_endpoint(r.base_url, r.api_key, timeout=max(5, int(ping_timeout)))
+                            probed = _probe_endpoint(r.base_url, r.api_key, timeout=5)
                            if probed:
                                r.cached_models = json.dumps(probed)
                                db.commit()
@@ -1845,7 +1758,7 @@ def setup_model_routes(model_discovery):
        model_ids = _probe_endpoint(base_url, api_key.strip() or None, timeout=explicit_timeout) if should_probe else []
        ping = {"reachable": False, "error": None}
        if (should_probe or requested_kind in ("api", "proxy")) and not model_ids:
-            ping = _ping_endpoint(base_url, api_key.strip() or None, timeout=min(explicit_timeout, 10.0))
+            ping = _ping_endpoint(base_url, api_key.strip() or None, timeout=min(explicit_timeout, 2.0))
        if require_model_list and not model_ids:
            raise HTTPException(400, _model_endpoint_error_message(base_url, ping))
@@ -1912,7 +1825,7 @@ def setup_model_routes(model_discovery):
            "models": _merge_model_ids(model_ids, _pinned),
            "pinned_models": _pinned,
            "online": bool(model_ids) or bool(_pinned) or bool(ping.get("reachable")),
-            "status": "online" if (model_ids or _pinned) else ("loading" if ping.get("loading") else ("empty" if ping.get("reachable") else "offline")),
+            "status": "online" if (model_ids or _pinned) else ("empty" if ping.get("reachable") else "offline"),
            "ping_error": ping.get("error") if ping else None,
            "endpoint_kind": requested_kind,
            "category": _classify_endpoint(base_url, requested_kind),
@@ -1937,11 +1850,11 @@ def setup_model_routes(model_discovery):
        configured_timeout = _parse_positive_int(model_refresh_timeout, minimum=1, maximum=60)
        probe_timeout = _explicit_model_list_timeout(base_url, requested_kind, configured_timeout)
        models = _probe_endpoint(base_url, api_key.strip() or None, timeout=probe_timeout)
-        ping = {"reachable": True, "error": None} if models else _ping_endpoint(base_url, api_key.strip() or None, timeout=min(probe_timeout, 10.0))
+        ping = {"reachable": True, "error": None} if models else _ping_endpoint(base_url, api_key.strip() or None, timeout=min(probe_timeout, 2.0))
        return {
            "base_url": base_url,
            "online": bool(models) or bool(ping.get("reachable")),
-            "status": "online" if models else ("loading" if ping.get("loading") else ("empty" if ping.get("reachable") else "offline")),
+            "status": "online" if models else ("empty" if ping.get("reachable") else "offline"),
            "ping_error": ping.get("error") if ping else None,
            "models": models,
            "count": len(models),
@@ -10,7 +10,6 @@ from fastapi import APIRouter, HTTPException, Request
 from pydantic import BaseModel
 from core.database import SessionLocal, Note
 from core.middleware import INTERNAL_TOOL_USER
 from src.auth_helpers import require_user
 from src.constants import DATA_DIR
 from sqlalchemy.orm.attributes import flag_modified
@@ -583,7 +582,7 @@ def setup_note_routes(task_scheduler=None):
        return require_user(request) or None
    def _is_admin_or_single_user(request: Request, user: str | None) -> bool:
-        if user == INTERNAL_TOOL_USER:
+        if user == "internal-tool":
            return True
        if not user:
            # require_user() already admitted this request, which only happens
@@ -2,9 +2,8 @@
 """Routes for personal documents management."""
 import os
 import logging
 import shutil
 import uuid
-from typing import Any, Dict, List, Tuple
+from typing import List, Tuple
 from fastapi import APIRouter, HTTPException, Query, Request, UploadFile, File, Depends
 from src.request_models import DirectoryRequest
 from core.constants import BASE_DIR, PERSONAL_DIR, PERSONAL_UPLOADS_DIR
@@ -19,14 +18,13 @@ UPLOADS_DIR = PERSONAL_UPLOADS_DIR
 logger = logging.getLogger(__name__)
-def _personal_upload_dir_for_owner(owner: str | None, *, create: bool = True) -> str:
+def _personal_upload_dir_for_owner(owner: str | None) -> str:
    """Return the per-owner upload directory used for direct RAG uploads."""
    owner_segment = secure_filename((owner or "local").strip())[:80] or "local"
    upload_dir = os.path.abspath(os.path.join(UPLOADS_DIR, owner_segment))
    base_abs = os.path.abspath(UPLOADS_DIR)
    if os.path.commonpath([upload_dir, base_abs]) != base_abs:
        raise ValueError("Unsafe upload owner path")
    if create:
    os.makedirs(upload_dir, exist_ok=True)
    return upload_dir
@@ -46,87 +44,6 @@ def _unique_personal_upload_path(upload_dir: str, original_name: str | None) ->
        raise ValueError("Unsafe upload filename")
    return file_path, filename, safe_name
 def _unique_existing_target(path: str) -> str:
    """Return a non-existing sibling path for rename collision handling."""
    if not os.path.exists(path):
        return path
    stem, ext = os.path.splitext(path)
    while True:
        candidate = f"{stem}-{uuid.uuid4().hex[:10]}{ext}"
        if not os.path.exists(candidate):
            return candidate
 def _remove_empty_tree(path: str) -> None:
    """Best-effort removal of empty directories under ``path``."""
    if not os.path.isdir(path):
        return
    for root, dirs, _files in os.walk(path, topdown=False):
        for dirname in dirs:
            candidate = os.path.join(root, dirname)
            try:
                os.rmdir(candidate)
            except OSError:
                pass
    try:
        os.rmdir(path)
    except OSError:
        pass
 def rename_personal_upload_owner(
    old_owner: str,
    new_owner: str,
    *,
    personal_docs_manager: Any = None,
    rag_manager: Any = None,
 ) -> Dict[str, Any]:
    """Move direct personal uploads and rewrite RAG owner metadata on user rename."""
    old_dir = _personal_upload_dir_for_owner(old_owner, create=False)
    new_dir = _personal_upload_dir_for_owner(new_owner, create=False)
    path_map: Dict[str, str] = {}
    moved_files = 0
    if os.path.isdir(old_dir) and old_dir != new_dir:
        os.makedirs(new_dir, exist_ok=True)
        for root, _dirs, files in os.walk(old_dir):
            rel_root = os.path.relpath(root, old_dir)
            target_root = new_dir if rel_root == "." else os.path.join(new_dir, rel_root)
            os.makedirs(target_root, exist_ok=True)
            for filename in files:
                source = os.path.abspath(os.path.join(root, filename))
                target = _unique_existing_target(os.path.abspath(os.path.join(target_root, filename)))
                shutil.move(source, target)
                path_map[source] = target
                moved_files += 1
        _remove_empty_tree(old_dir)
    if personal_docs_manager is not None:
        rename_directory = getattr(personal_docs_manager, "rename_directory", None)
        if callable(rename_directory):
            rename_directory(old_dir, new_dir, path_map=path_map)
    rag_result = None
    if rag_manager is not None:
        rename_owner = getattr(rag_manager, "rename_owner", None)
        if callable(rename_owner):
            rag_result = rename_owner(
                old_owner,
                new_owner,
                path_map=path_map,
                path_prefixes=[(old_dir, new_dir)],
            )
    return {
        "old_dir": old_dir,
        "new_dir": new_dir,
        "moved_files": moved_files,
        "path_map": path_map,
        "rag_result": rag_result,
    }
 def setup_personal_routes(personal_docs_manager, rag_manager, rag_available):
    """
    Setup personal documents related routes.
@@ -358,13 +275,11 @@ def setup_personal_routes(personal_docs_manager, rag_manager, rag_available):
                except Exception as e:
                    logger.warning(f"RAG removal failed for {filepath}: {e}")
-            # Delete file from disk if it's in the caller's own uploads dir.
+            # Delete file from disk if it's in uploads dir
            # Scope to the per-owner subdir, not the shared uploads root, so one
            # admin can't delete another user's personal files by path.
            deleted_from_disk = False
            try:
                abs_target = os.path.realpath(filepath)
-                base_abs = os.path.realpath(_personal_upload_dir_for_owner(owner, create=False))
+                base_abs = os.path.realpath(UPLOADS_DIR)
                in_uploads = (
                    abs_target == base_abs
                    or os.path.commonpath([abs_target, base_abs]) == base_abs
@@ -12,10 +12,8 @@ from typing import Optional
 from fastapi import APIRouter, HTTPException, Query, Request
 from fastapi.responses import HTMLResponse, StreamingResponse
 from pydantic import BaseModel, Field
 from core.middleware import INTERNAL_TOOL_USER
 from src.endpoint_resolver import resolve_endpoint
 from src.auth_helpers import _auth_disabled, get_current_user
 from core.auth import RESERVED_USERNAMES
 from src.constants import DEEP_RESEARCH_DIR
 _SESSION_ID_RE = re.compile(r"^[a-zA-Z0-9-]{1,128}$")
@@ -387,9 +385,9 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
        """Launch a research job from the dedicated panel."""
        from src.auth_helpers import require_privilege
        user = require_privilege(request, "can_use_research")
-        if user == INTERNAL_TOOL_USER:
+        if user == "internal-tool":
            tool_owner = (request.headers.get("X-Odysseus-Owner") or "").strip()
-            if tool_owner and tool_owner not in RESERVED_USERNAMES:
+            if tool_owner and tool_owner not in {"internal-tool", "api", "demo", "system"}:
                auth_mgr = getattr(request.app.state, "auth_manager", None)
                if auth_mgr is not None and getattr(auth_mgr, "is_configured", False):
                    try:
@@ -11,7 +11,7 @@ from core.session_manager import SessionManager
 from core.models import ChatMessage
 from src.request_models import SessionResponse
 from core.database import Session as DbSession, SessionLocal, Document, GalleryImage, utcnow_naive
-from src.auth_helpers import effective_user, _auth_disabled, owner_filter
+from src.auth_helpers import get_current_user, effective_user, _auth_disabled, owner_filter
 from src.session_actions import is_session_recently_active
@@ -328,7 +328,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
        endpoint_id: str = Form(""),
    ):
        skip_val = str(skip_validation).lower() == "true"
-        user = effective_user(request)
+        user = get_current_user(request)
        endpoint_api_key = ""
        endpoint_base_url = ""
        _reject_raw_endpoint_url_for_non_admin(request, user, endpoint_id, endpoint_url)
@@ -477,7 +477,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
                db.close()
        # Switch model/endpoint mid-session
        if model is not None and endpoint_url is not None:
-            user = effective_user(request)
+            user = get_current_user(request)
            _reject_raw_endpoint_url_for_non_admin(request, user, endpoint_id, endpoint_url)
            endpoint_api_key = ""
            endpoint_base_url = ""
@@ -1004,7 +1004,6 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
        """
        from src.llm_core import llm_call
        user = effective_user(request)
        single_user_mode = not user and _auth_disabled()
        user_sessions = session_manager.get_sessions_for_user(user)
        # Delete empty and throwaway sessions before sorting
@@ -1023,12 +1022,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
        }
        _THROWAWAY_MAX_MESSAGES = 4  # only delete if <= this many messages
        try:
-            rows_q = db.query(DbSession).filter(DbSession.archived == False)
+            rows = db.query(DbSession).filter(DbSession.archived == False, DbSession.owner == user).limit(2000).all()
            if user:
                rows_q = rows_q.filter(DbSession.owner == user)
            elif not single_user_mode:
                rows_q = rows_q.filter(DbSession.owner == user)
            rows = rows_q.limit(2000).all()
            folder_map = {r.id: r.folder for r in rows}
            # Precompute per-session message counts in TWO aggregate queries
            # instead of 1–3 queries PER session — with many chats the per-row
@@ -1248,12 +1242,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
        db = SessionLocal()
        try:
            for sid, folder_name in assignments.items():
-                db_session_q = db.query(DbSession).filter(DbSession.id == sid)
+                db_session = db.query(DbSession).filter(DbSession.id == sid, DbSession.owner == user).first()
                if user:
                    db_session_q = db_session_q.filter(DbSession.owner == user)
                elif not single_user_mode:
                    db_session_q = db_session_q.filter(DbSession.owner == user)
                db_session = db_session_q.first()
                if db_session:
                    db_session.folder = folder_name
                    db_session.updated_at = datetime.utcnow()
@@ -15,7 +15,6 @@ from collections import namedtuple
 from pathlib import Path
 from typing import Dict, Any
 from core.platform_compat import IS_APPLE_SILICON, which_tool
 from core.middleware import INTERNAL_TOOL_USER
 from src.optional_deps import prepare_optional_dependency_import
 # POSIX-only: `pty`/`fcntl` transitively import `termios`, which does NOT exist
@@ -56,7 +55,7 @@ def _require_admin(request: Request):
    # In-process tool loopback. The AuthMiddleware already validated the
    # internal token + loopback client before setting this marker, so
    # honour it here as admin-equivalent.
-    if user == INTERNAL_TOOL_USER:
+    if user == "internal-tool":
        return
    if not user or user == "api":
        raise HTTPException(403, "Admin only")
@@ -331,9 +330,6 @@ def add_user_install_bins_to_path():
        candidates.append(os.path.join(site.USER_BASE, 'bin'))
    except Exception:
        pass
    candidates.append(os.path.expanduser('~/bin'))
    candidates.append(os.path.expanduser('~/llama.cpp/build/bin'))
    candidates.append(os.path.expanduser('~/llama.cpp/build-vulkan/bin'))
    candidates.append(os.path.expanduser('~/.local/bin'))
    parts = os.environ.get('PATH', '').split(os.pathsep) if os.environ.get('PATH') else []
    changed = False
@@ -965,84 +961,12 @@ def setup_shell_routes() -> APIRouter:
        return StreamingResponse(generate(), media_type="text/event-stream")
    def _os_id_from_release(text: str) -> str:
        """Map /etc/os-release contents to a canonical family for our matrix."""
        if not text:
            return ""
        ids = []
        for line in text.splitlines():
            line = line.strip()
            if line.startswith("ID=") or line.startswith("ID_LIKE="):
                ids += line.split("=", 1)[1].strip().strip('"').split()
        ids = [i.lower() for i in ids]
        if any(x in ids for x in ("debian", "ubuntu", "linuxmint", "pop", "elementary")):
            return "debian"
        if any(x in ids for x in ("arch", "manjaro", "endeavouros", "cachyos", "garuda")):
            return "arch"
        if any(x in ids for x in ("fedora", "rhel", "centos", "rocky", "almalinux", "ol")):
            return "fedora"
        if "alpine" in ids:
            return "alpine"
        if any(x in ids for x in ("suse", "opensuse", "opensuse-leap", "opensuse-tumbleweed", "sles")):
            return "suse"
        return ""
    # Matrix lookup keyed on (os_family, backend) → (pkg_mgr_cmd_template, pkg_list_per_dep).
    # Each `system_prereqs` name resolves to a list of OS-specific package
    # names that get joined into the final `sudo apt install -y …` etc.
    # command. Backend-specific extras (CUDA toolkit, ROCm, Vulkan headers)
    # are added only when the detected backend needs them.
    _PKG_NAMES = {
        # canonical-name → {os_id: [actual_pkg_names_on_this_os]}
        "cmake":           {"debian": ["cmake"], "arch": ["cmake"], "fedora": ["cmake"], "alpine": ["cmake"], "suse": ["cmake"], "macos": ["cmake"]},
        "build-essential": {"debian": ["build-essential"], "arch": ["base-devel"], "fedora": ["gcc", "gcc-c++", "make"], "alpine": ["build-base"], "suse": ["gcc-c++", "make"], "macos": []},
        "g++":             {"debian": ["g++"], "arch": ["gcc"], "fedora": ["gcc-c++"], "alpine": ["g++"], "suse": ["gcc-c++"], "macos": []},
        "gcc":             {"debian": ["gcc"], "arch": ["gcc"], "fedora": ["gcc"], "alpine": ["gcc"], "suse": ["gcc"], "macos": []},
        "make":            {"debian": ["make"], "arch": ["make"], "fedora": ["make"], "alpine": ["make"], "suse": ["make"], "macos": []},
        "git":             {"debian": ["git"], "arch": ["git"], "fedora": ["git"], "alpine": ["git"], "suse": ["git"], "macos": ["git"]},
        "tmux":            {"debian": ["tmux"], "arch": ["tmux"], "fedora": ["tmux"], "alpine": ["tmux"], "suse": ["tmux"], "macos": ["tmux"]},
    }
    _BACKEND_EXTRAS = {
        "cuda":   {"debian": ["nvidia-cuda-toolkit"], "arch": ["cuda"], "fedora": ["cuda-toolkit"], "alpine": [], "suse": ["cuda"], "macos": []},
        "rocm":   {"debian": ["rocm-dev"], "arch": ["rocm-hip-sdk"], "fedora": ["rocm-devel"], "alpine": [], "suse": ["rocm-dev"], "macos": []},
        "vulkan": {"debian": ["libvulkan-dev", "vulkan-tools"], "arch": ["vulkan-headers", "vulkan-tools"], "fedora": ["vulkan-headers", "vulkan-tools"], "alpine": ["vulkan-loader-dev", "vulkan-tools"], "suse": ["vulkan-devel", "vulkan-tools"], "macos": []},
    }
    _PKG_MGR = {
        "debian": "sudo apt install -y {pkgs}",
        "arch":   "sudo pacman -S --needed {pkgs}",
        "fedora": "sudo dnf install -y {pkgs}",
        "alpine": "sudo apk add {pkgs}",
        "suse":   "sudo zypper install -n {pkgs}",
        "macos":  "brew install {pkgs}",
    }
    def _install_cmd_for_target(os_id: str, backend: str, missing: list[str]) -> str:
        """Build a single OS+backend-aware install command for the missing prereqs."""
        if not os_id or os_id not in _PKG_MGR:
            return ""
        pkgs: list[str] = []
        seen: set[str] = set()
        for m in missing:
            for p in _PKG_NAMES.get(m, {}).get(os_id, []):
                if p not in seen:
                    pkgs.append(p); seen.add(p)
        # Add backend-specific extras only when the build would actually
        # consume them (a CUDA toolkit isn't useful on a Vulkan box).
        backend = (backend or "").lower()
        for p in _BACKEND_EXTRAS.get(backend, {}).get(os_id, []):
            if p not in seen:
                pkgs.append(p); seen.add(p)
        if not pkgs:
            return ""
        return _PKG_MGR[os_id].format(pkgs=" ".join(pkgs))
    @router.get("/api/cookbook/packages")
    async def list_packages(
        request: Request,
        host: str | None = None,
        ssh_port: str | None = None,
        venv: str | None = None,
        backend: str | None = None,
    ):
        """Check which optional packages are installed.
@@ -1091,12 +1015,6 @@ def setup_shell_routes() -> APIRouter:
                "kind": "system",
                "install_hint": "Install Docker on the selected server and allow this user to run docker.",
            },
            # Note: cmake / gcc / git are not separate dependency rows —
            # they're declared as `system_prereqs` on llama_cpp (and any
            # other engine that compiles from source) so they appear as
            # an inline status note on that engine's row instead of
            # cluttering the panel with raw OS package names that aren't
            # meaningful product-level dependencies on their own.
            # ── LLM ── installs on GPU servers for model serving/downloading
            {
                "name": "hf_transfer",
@@ -1108,16 +1026,9 @@ def setup_shell_routes() -> APIRouter:
            {
                "name": "llama_cpp",
                "pip": "llama-cpp-python[server]",
-                "desc": "Great for single-GPU or CPU inference with GGUF models",
+                "desc": "Serve GGUF models via llama.cpp",
                "category": "LLM",
                "target": "remote",
                # Build-toolchain prereqs. Cookbook's launch bootstrap
                # compiles llama-server from source when no prebuilt
                # binary is present; without these the build aborts
                # with `cmake: command not found`. Surfaced inline on
                # this row so the user doesn't have to chase three
                # separate OS-package rows.
                "system_prereqs": ["cmake", "g++", "git"],
            },
            {
                "name": "sglang",
@@ -1129,7 +1040,7 @@ def setup_shell_routes() -> APIRouter:
            {
                "name": "vllm",
                "pip": "vllm",
-                "desc": "Great for high-throughput multi-GPU inference",
+                "desc": "High-throughput LLM serving engine",
                "category": "LLM",
                "target": "remote",
            },
@@ -1192,7 +1103,6 @@ def setup_shell_routes() -> APIRouter:
        # venv over SSH so a remote `pip install` actually reflects here.
        remote_status: dict = {}
        remote_details: dict = {}
        remote_probe_error = ""
        remote_names = [
            p["name"]
            for p in packages
@@ -1231,56 +1141,16 @@ def setup_shell_routes() -> APIRouter:
                        break
            except ValueError as e:
                raise HTTPException(400, str(e))
-            except Exception as e:
+            except Exception:
                remote_status = {}
-                remote_probe_error = f"SSH package probe failed: {str(e)[:160]}"
+        if host and remote_system_names:
            if "llama_cpp" in remote_names:
                try:
                    inner = (
                        'export PATH="$HOME/.local/bin:$HOME/bin:'
                        '$HOME/llama.cpp/build/bin:$HOME/llama.cpp/build-vulkan/bin:$PATH"; '
                        "command -v llama-server 2>/dev/null || true"
                    )
                    argv = _ssh_base_argv(host, ssh_port) + [inner]
                    proc = await asyncio.create_subprocess_exec(
                        *argv,
                        stdout=asyncio.subprocess.PIPE,
                        stderr=asyncio.subprocess.PIPE,
                    )
                    out, _err = await asyncio.wait_for(proc.communicate(), timeout=8)
                    llama_server_path = out.decode("utf-8", errors="replace").strip().splitlines()
                    llama_server_path = llama_server_path[-1].strip() if llama_server_path else ""
                    if llama_server_path:
                        remote_status["llama_cpp"] = True
                        probe = remote_details.setdefault("llama_cpp", {})
                        if isinstance(probe, dict):
                            probe.setdefault("binaries", {})["llama-server"] = llama_server_path
                except Exception as e:
                    if not remote_probe_error:
                        remote_probe_error = f"SSH llama-server probe failed: {str(e)[:160]}"
                    pass
        # Union of system_names + every package's system_prereqs. Probing
        # the prereqs alongside the main system deps in a single SSH call
        # avoids a second round-trip per Cookbook → Dependencies refresh.
        prereq_names: set[str] = set()
        for p in packages:
            for pr in p.get("system_prereqs") or []:
                prereq_names.add(str(pr))
        all_system_names = list(set(remote_system_names) | prereq_names)
        # Detect the target's OS family + read /etc/os-release in the same
        # SSH round-trip as the prereq probe — used downstream to render a
        # single OS-specific install command per row instead of dumping
        # every distro's syntax onto the user.
        target_os_id: str = ""
        if host and all_system_names:
            try:
                checks = []
-                for name in all_system_names:
+                for name in remote_system_names:
                    qn = shlex.quote(name)
                    checks.append(
                        f"if command -v {qn} >/dev/null 2>&1; then echo {qn}=1; else echo {qn}=0; fi"
                    )
                checks.append("echo '---OSREL---'; cat /etc/os-release 2>/dev/null || true")
                inner = " ; ".join(checks)
                argv = _ssh_base_argv(host, ssh_port) + [inner]
                proc = await asyncio.create_subprocess_exec(
@@ -1290,44 +1160,19 @@ def setup_shell_routes() -> APIRouter:
                )
                out, _err = await asyncio.wait_for(proc.communicate(), timeout=12)
                txt = out.decode("utf-8", errors="replace").strip()
                _section, _osrel_lines = "probe", []
                for line in txt.splitlines():
                    if line.strip() == "---OSREL---":
                        _section = "osrel"; continue
                    if _section == "osrel":
                        _osrel_lines.append(line)
                        continue
                    name, sep, value = line.strip().partition("=")
-                    if sep and name in all_system_names:
+                    if sep and name in remote_system_names:
                        remote_status[name] = value == "1"
                target_os_id = _os_id_from_release("\n".join(_osrel_lines))
            except ValueError as e:
                raise HTTPException(400, str(e))
            except Exception as e:
                if not remote_probe_error:
                    remote_probe_error = f"SSH system probe failed: {str(e)[:160]}"
                pass
        elif not host:
            # Local target — probe in-process so the inline install command
            # still appears in the dep panel when the cookbook container
            # itself is the selected server.
            try:
                with open("/etc/os-release", encoding="utf-8") as f:
                    target_os_id = _os_id_from_release(f.read())
            except Exception:
-                target_os_id = ""
+                pass
            if sys.platform == "darwin":
                target_os_id = "macos"
        for pkg in packages:
            on_remote = bool(host and pkg.get("target") == "remote")
            probe = None
            if on_remote:
                if remote_probe_error and pkg["name"] not in remote_status:
                    pkg["installed"] = None
                    pkg["probe_error"] = remote_probe_error
                    pkg["status_note"] = remote_probe_error
                else:
                pkg["installed"] = bool(remote_status.get(pkg["name"], False))
                probe = remote_details.get(pkg["name"])
                if isinstance(probe, dict):
@@ -1384,104 +1229,6 @@ def setup_shell_routes() -> APIRouter:
                    # 500 the entire packages panel; report it as not usable.
                    pkg["installed"] = False
            # llama_cpp partial-state probe: when the package is installed
            # but the wheel was built CPU-only AND the target has NVIDIA
            # hardware, mark the row as partial (yellow/orange) with a
            # one-click upgrade to the CUDA wheel. Without this the row
            # reads "ready" green while inference runs at 3 tok/s on GPU
            # silicon — actively misleading.
            if pkg["name"] == "llama_cpp" and pkg.get("installed"):
                _native_llama_server = bool(
                    isinstance(probe, dict)
                    and isinstance(probe.get("binaries"), dict)
                    and probe["binaries"].get("llama-server")
                )
                _gpu_capable = False
                _has_nvidia_target = False
                if _native_llama_server:
                    # Native llama-server is the launcher path Cookbook now
                    # prefers. Do not mark this as a CPU-only Python wheel just
                    # because llama-cpp-python is absent from the selected venv.
                    _gpu_capable = True
                elif on_remote and host:
                    try:
                        # Activate the configured venv FIRST so the probe
                        # runs against the same python the launch script
                        # would activate. Without this prefix, bare
                        # `python3` was checked — which can disagree with
                        # the venv's wheel (e.g. user-site has CUDA wheel
                        # but venv has CPU-only), and the dep panel then
                        # showed "ready" green while every launch fell to
                        # CPU.
                        _vp = _venv_activate_prefix(venv)
                        probe = (
                            f'{_vp}python3 -c "import llama_cpp; import sys; '
                            'sys.exit(0 if llama_cpp.llama_supports_gpu_offload() else 1)" '
                            '&& echo llama_cpp_gpu=1 || echo llama_cpp_gpu=0; '
                            'command -v nvidia-smi >/dev/null 2>&1 '
                            '&& nvidia-smi -L 2>/dev/null | grep -q "GPU " '
                            '&& echo nvidia=1 || echo nvidia=0'
                        )
                        argv = _ssh_base_argv(host, ssh_port) + [probe]
                        proc = await asyncio.create_subprocess_exec(
                            *argv, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE,
                        )
                        out, _ = await asyncio.wait_for(proc.communicate(), timeout=8)
                        txt = out.decode("utf-8", errors="replace")
                        if "llama_cpp_gpu=1" in txt:
                            _gpu_capable = True
                        if "nvidia=1" in txt:
                            _has_nvidia_target = True
                    except Exception:
                        pass
                else:
                    try:
                        import llama_cpp as _lcp  # type: ignore
                        _gpu_capable = bool(_lcp.llama_supports_gpu_offload())
                    except Exception:
                        _gpu_capable = False
                    _has_nvidia_target = shutil.which("nvidia-smi") is not None
                if (not _gpu_capable) and _has_nvidia_target:
                    pkg["partial"] = True
                    pkg["partial_reason"] = "Installed but CPU-only wheel — GPU detected on this target. Upgrade to a CUDA wheel for ~10× faster inference."
                    pkg["partial_action"] = "reinstall_llama_cpp_cuda"
            # Attach per-package system_prereqs status. We probed each
            # prereq name above; surface "Missing build deps: …" ONLY
            # when the package itself is not installed — if the package
            # works (e.g. llama-cpp-python already imports cleanly), the
            # build toolchain is irrelevant and surfacing it as a red
            # flag confuses users ("ready" + "missing" on the same row).
            _prereqs = list(pkg.get("system_prereqs") or [])
            if _prereqs:
                if on_remote:
                    _pr_present = {n: bool(remote_status.get(n)) for n in _prereqs}
                else:
                    _pr_present = {n: shutil.which(n) is not None for n in _prereqs}
                pkg["system_prereqs_status"] = _pr_present
                _missing = [n for n, ok in _pr_present.items() if not ok]
                # Suppress the "missing build deps" hint when the package
                # itself is installed — build deps are only relevant if
                # the user would need to recompile from source.
                if pkg.get("installed"):
                    _missing = []
                if _missing:
                    # Build a target-specific install command from the
                    # (os_family, backend) matrix when we know both. Fall
                    # back to the multi-distro hint only when the target's
                    # OS can't be classified (e.g. ssh probe failed).
                    _resolved_os = target_os_id or "debian"  # safest default
                    _cmd = _install_cmd_for_target(_resolved_os, backend or "", _missing)
                    if _cmd and target_os_id:
                        _hint = "Missing build deps for this target: " + ", ".join(_missing)
                        pkg["install_cmd_for_target"] = _cmd
                        pkg["install_cmd_os"] = target_os_id
                        pkg["install_cmd_backend"] = (backend or "").lower()
                    else:
                        _hint = "Missing build deps: " + ", ".join(_missing) + ". Install via apt: cmake build-essential git / pacman: cmake base-devel git / dnf: cmake gcc-c++ make git / brew: cmake git."
                    _existing_note = pkg.get("status_note") or ""
                    pkg["status_note"] = (_existing_note + " — " + _hint) if _existing_note else _hint
                    pkg["build_deps_missing"] = _missing
            if pkg.get("installed"):
                update_status = _package_pip_update_status(pkg, probe)
                pkg["pip_update_available"] = update_status.available
@@ -1541,102 +1288,6 @@ def setup_shell_routes() -> APIRouter:
            return {"ok": True, "output": stdout.decode()[-200:]}
        return {"ok": False, "error": stderr.decode()[-300:]}
    @router.post("/api/cookbook/install-system-deps")
    async def install_system_deps(request: Request):
        """Install OS-level system packages (cmake/build-essential/git/tmux)
        on a remote target or in the local container. Admin only.
        Bounded by a per-package allowlist — anything outside the catalog
        is rejected so the route can't be coerced into installing arbitrary
        OS packages. Uses `sudo -n` (passwordless) so the call returns a
        clear "needs sudo password" error instead of hanging when interactive
        sudo is required.
        """
        _require_admin(request)
        body = await request.json()
        raw = body.get("packages") or []
        host = (body.get("remote_host") or "").strip()
        ssh_port = body.get("ssh_port")
        # Names users can request — must match canonical names used in the
        # deps catalog's `system_prereqs` field and on the System rows.
        ALLOWED = {"cmake", "build-essential", "g++", "gcc", "git", "tmux", "make"}
        pkgs = [str(p).strip() for p in raw if str(p).strip() in ALLOWED]
        if not pkgs:
            return {"ok": False, "error": "no installable packages requested (allowlist: " + ", ".join(sorted(ALLOWED)) + ")"}
        # Re-map to the right package name per OS. apt/dpkg use the names
        # as-is; pacman has base-devel for build-essential, etc.
        def _apt(names): return list(names)
        def _pacman(names):
            return ["base-devel" if n == "build-essential" else n for n in names]
        def _dnf(names):
            out = []
            for n in names:
                if n == "build-essential": out += ["gcc", "gcc-c++", "make"]
                elif n == "g++": out += ["gcc-c++"]
                else: out.append(n)
            return out
        def _brew(names):
            return [n for n in names if n not in ("build-essential", "g++", "gcc", "make")]
        # Build a single shell snippet that detects the package manager and
        # runs the right install. Non-interactive sudo (-n) only — if sudo
        # asks for a password the script reports it instead of hanging.
        apt_pkgs = " ".join(shlex.quote(p) for p in _apt(pkgs))
        pac_pkgs = " ".join(shlex.quote(p) for p in _pacman(pkgs))
        dnf_pkgs = " ".join(shlex.quote(p) for p in _dnf(pkgs))
        brew_pkgs = " ".join(shlex.quote(p) for p in _brew(pkgs))
        # Error messages go to stderr (>&2) so the route's error field
        # gets populated. Without the redirect, `echo "ERROR…"` on stdout
        # left stderr empty and the frontend toast fell through to a
        # bare "HTTP 200" instead of surfacing the real reason.
        script = (
            'set -e; '
            'if ! sudo -n true 2>/dev/null; then '
            '  echo "ERROR: passwordless sudo unavailable on this target. Run once: sudo apt install -y ' + " ".join(pkgs) + ' (or your distro equivalent: pacman -S, dnf install, brew install). After that, Cookbook can install the rest." >&2; exit 2; fi; '
            'if command -v apt-get >/dev/null 2>&1; then '
            f'  sudo -n env DEBIAN_FRONTEND=noninteractive apt-get update -qq && sudo -n env DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends {apt_pkgs}; '
            'elif command -v pacman >/dev/null 2>&1; then '
            f'  sudo -n pacman -Sy --needed --noconfirm {pac_pkgs}; '
            'elif command -v dnf >/dev/null 2>&1; then '
            f'  sudo -n dnf install -y {dnf_pkgs}; '
            'elif command -v brew >/dev/null 2>&1; then '
            f'  brew install {brew_pkgs}; '
            'else '
            '  echo "ERROR: no supported package manager (apt/pacman/dnf/brew) on this target." >&2; exit 3; fi'
        )
        try:
            if host:
                argv = _ssh_base_argv(host, ssh_port) + [script]
            else:
                argv = ["bash", "-lc", script]
        except ValueError as e:
            raise HTTPException(400, str(e))
        try:
            proc = await asyncio.create_subprocess_exec(
                *argv, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
            )
            out, err = await asyncio.wait_for(proc.communicate(), timeout=180)
        except asyncio.TimeoutError:
            return {"ok": False, "error": "Install timed out after 180s"}
        ok = (proc.returncode == 0)
        # Combine stderr + (last lines of stdout) into a single error
        # blob when ok=False — some package managers print useful failure
        # context to stdout, and a script that exits via `echo ...; exit N`
        # without `>&2` would otherwise hand back an empty error string
        # and force the frontend to show a bare "HTTP 200".
        err_txt = err.decode("utf-8", errors="replace").strip()
        out_txt = out.decode("utf-8", errors="replace").strip()
        if not ok:
            tail_out = out_txt[-500:] if out_txt else ""
            combined = err_txt or tail_out or f"exit code {proc.returncode}"
        else:
            combined = None
        return {
            "ok": ok,
            "exit_code": proc.returncode,
            "output": out_txt[-1000:],
            "error": combined,
        }
    @router.post("/api/cookbook/rebuild-engine")
    async def rebuild_engine(request: Request):
        """Clear the cached llama.cpp build so the next serve recompiles.
@@ -1657,8 +1308,7 @@ def setup_shell_routes() -> APIRouter:
            return {"ok": False, "error": f"Unsupported engine: {engine}"}
        host = str(body.get("remote_host") or "").strip()
        ssh_port = body.get("ssh_port")
-        update_source = bool(body.get("update_source"))
+        cmd = _llama_cpp_rebuild_cmd()
        cmd = _llama_cpp_rebuild_cmd(update_source=update_source)
        try:
            argv = (
                (_ssh_base_argv(host, ssh_port) + [cmd])
@@ -11,7 +11,6 @@ from fastapi import APIRouter, HTTPException, Request
 from pydantic import BaseModel
 from core.database import SessionLocal, ScheduledTask, TaskRun
 from core.middleware import INTERNAL_TOOL_USER
 from core.constants import internal_api_base
 from src.auth_helpers import get_current_user
 from src.constants import DATA_DIR, EMAIL_URGENCY_CACHE_DIR
@@ -428,7 +427,7 @@ def setup_task_routes(task_scheduler) -> APIRouter:
        # In-process tool-loopback marker — AuthMiddleware validated
        # the internal token + loopback client before stamping this,
        # so treat as admin-equivalent.
-        if user == INTERNAL_TOOL_USER:
+        if user == "internal-tool":
            return True
        try:
            from core.auth import AuthManager
@@ -3,16 +3,11 @@ import os
 import time
 import json
 import asyncio
 import shutil
 import uuid
 from pathlib import Path
 from fastapi import APIRouter, Request, File, UploadFile, HTTPException
 from typing import List
 import logging
 from core.middleware import require_admin
-from core.database import SessionLocal, GalleryImage
+from src.auth_helpers import get_current_user
 from src.auth_helpers import effective_user
 from src.constants import GENERATED_IMAGES_DIR
 from src.upload_handler import count_recent_uploads
 logger = logging.getLogger(__name__)
@@ -56,69 +51,6 @@ def setup_upload_routes(upload_handler):
        raise HTTPException(404, "File not found")
    def _promote_chat_image_to_gallery(meta: dict, owner: str | None) -> str | None:
        """Make chat-uploaded images visible in Gallery without changing chat storage."""
        is_image_file = getattr(upload_handler, "is_image_file", None)
        if not callable(is_image_file):
            return None
        if not is_image_file(meta.get("name", ""), meta.get("mime", "")):
            return None
        source_path = meta.get("path")
        if not source_path or not os.path.isfile(source_path):
            return None
        db = SessionLocal()
        try:
            file_hash = meta.get("hash")
            if file_hash:
                q = db.query(GalleryImage).filter(
                    GalleryImage.file_hash == file_hash,
                    GalleryImage.is_active == True,  # noqa: E712
                )
                if owner:
                    q = q.filter(GalleryImage.owner == owner)
                existing = q.first()
                if existing:
                    return existing.id
            image_dir = Path(GENERATED_IMAGES_DIR)
            image_dir.mkdir(parents=True, exist_ok=True)
            ext = Path(meta.get("name") or source_path).suffix.lower()
            if ext not in {".png", ".jpg", ".jpeg", ".webp", ".gif"}:
                mime_ext = {
                    "image/png": ".png",
                    "image/jpeg": ".jpg",
                    "image/jpg": ".jpg",
                    "image/webp": ".webp",
                    "image/gif": ".gif",
                }.get(meta.get("mime", ""))
                ext = mime_ext or ".png"
            filename = f"{uuid.uuid4().hex[:12]}{ext}"
            dest_path = image_dir / filename
            shutil.copy2(source_path, dest_path)
            image_id = str(uuid.uuid4())
            db.add(GalleryImage(
                id=image_id,
                filename=filename,
                prompt=meta.get("name") or "Chat upload",
                model="chat-upload",
                owner=owner,
                file_hash=file_hash,
                width=meta.get("width"),
                height=meta.get("height"),
                file_size=meta.get("size"),
            ))
            db.commit()
            return image_id
        except Exception as e:
            db.rollback()
            logger.warning("Failed to add chat image upload to gallery: %s", e)
            return None
        finally:
            db.close()
    @router.post("")
    async def api_upload(request: Request, files: List[UploadFile] = File(...)):
        """Upload files with enhanced security and organization."""
@@ -146,10 +78,8 @@ def setup_upload_routes(upload_handler):
        for u in files:
            try:
-                owner = effective_user(request)
+                meta = upload_handler.save_upload(u, client_ip, owner=get_current_user(request))
-                meta = upload_handler.save_upload(u, client_ip, owner=owner)
+                out.append({
                gallery_id = _promote_chat_image_to_gallery(meta, owner)
                item = {
                    "id": meta["id"],
                    "name": meta["name"],
                    "mime": meta["mime"],
@@ -159,10 +89,7 @@ def setup_upload_routes(upload_handler):
                    "width": meta.get("width"),
                    "height": meta.get("height"),
                    "is_duplicate": meta.get("is_duplicate", False)
-                }
+                })
                if gallery_id:
                    item["gallery_id"] = gallery_id
                out.append(item)
            except HTTPException:
                raise
            except Exception as e:
@@ -211,7 +138,7 @@ def setup_upload_routes(upload_handler):
                original_name = info.get("name", file_id)
        auth_mgr = getattr(request.app.state, "auth_manager", None)
        auth_configured = bool(auth_mgr and auth_mgr.is_configured)
-        current_user = effective_user(request)
+        current_user = get_current_user(request)
        file_owner = info.get("owner") if info else None
        if auth_configured:
            if not current_user:
@@ -277,7 +204,7 @@ def setup_upload_routes(upload_handler):
        info = _load_upload_info(file_id)
        auth_mgr = getattr(request.app.state, "auth_manager", None)
        auth_configured = bool(auth_mgr and auth_mgr.is_configured)
-        current_user = effective_user(request)
+        current_user = get_current_user(request)
        file_owner = info.get("owner") if info else None
        if auth_configured:
            if not current_user:
@@ -320,7 +247,7 @@ def setup_upload_routes(upload_handler):
            raise HTTPException(404, "File not found")
        auth_mgr = getattr(request.app.state, "auth_manager", None)
        auth_configured = bool(auth_mgr and auth_mgr.is_configured)
-        current_user = effective_user(request)
+        current_user = get_current_user(request)
        file_owner = info.get("owner")
        if auth_configured:
            if not current_user:
@@ -1,5 +1,6 @@
 """Webhook, API Token, and sync chat routes."""
 import asyncio
 import uuid
 import logging
 from typing import Optional
@@ -384,10 +385,10 @@ def setup_webhook_routes(
        sess.add_message(ChatMessage("assistant", reply))
        session_manager.save_sessions()
-        webhook_manager.fire_and_forget("chat.completed", {
+        asyncio.create_task(webhook_manager.fire("chat.completed", {
            "session_id": session_id, "model": sess.model,
            "user_message": message[:2000], "response": reply[:2000],
-        })
+        }))
        return {"response": reply, "session_id": session_id, "model": sess.model}
@@ -103,13 +103,9 @@ def cmd_list(args) -> None:
    end = _parse_dt(args.end) if args.end else (start + timedelta(days=30))
    db = SessionLocal()
    try:
        # Overlap semantics, matching the web route (routes/calendar_routes.py)
        # and the recurring-expansion contract: an event is in the window when
        # it starts before the window end AND ends after the window start. This
        # includes multi-day / in-progress events that began before `start`.
        q = db.query(CalendarEvent).filter(
            CalendarEvent.dtstart >= start,
            CalendarEvent.dtstart < end,
            CalendarEvent.dtend > start,
        )
        if args.calendar:
            cal = db.query(CalendarCal).filter(CalendarCal.name == args.calendar).first()
@@ -19,10 +19,6 @@ GPU_BANDWIDTH = {
    "6950 xt": 576, "6900 xt": 512, "6800 xt": 512, "6800": 512, "6700 xt": 384, "6600 xt": 256, "6600": 224,
    "mi300x": 5300, "mi300": 5300, "mi250x": 3277, "mi250": 3277, "mi210": 1638, "mi100": 1229,
    "9070 xt": 624, "9070": 488, "9060 xt": 322, "9060": 322,
    # NVIDIA GB10 Grace-Blackwell superchip (DGX Spark). Unified LPDDR5X memory,
    # not Apple Silicon, so it lives in the generic GPU table — the Apple-only
    # lookup never matches it (its name carries no "apple").
    "gb10": 273,
 }
 # Pre-sort keys by length descending for correct substring matching
@@ -130,44 +126,6 @@ def _lookup_bandwidth(system):
    return None
 def _canonical_cpu_backend(system):
    """Return the canonical CPU backend for cpu_only speed estimation.
    Normalizes CPU-architecture aliases separately from the GPU backend, and
    overrides GPU-only backends (CUDA/ROCm/Metal) so they do not inherit a
    discrete-GPU fallback constant when the model is actually running on CPU.
    """
    backend = (system.get("backend") or "").lower().strip()
    cpu_arch = (system.get("cpu_arch") or "").lower().strip()
    cpu_name = (system.get("cpu_name") or "").lower()
    gpu_name = (system.get("gpu_name") or "").lower()
    # Already-canonical CPU backends
    if backend in ("cpu_x86", "cpu_arm"):
        return backend
    # Raw CPU-architecture aliases. Treat plain "arm" as 32-bit ARM, not the
    # ARM64-class CPU fallback used for Apple Silicon/aarch64 machines.
    if backend in ("x86_64", "amd64", "i386", "i686"):
        return "cpu_x86"
    if backend in ("arm64", "aarch64"):
        return "cpu_arm"
    # Prefer an explicit CPU architecture field when present
    if cpu_arch:
        if cpu_arch in ("x86_64", "amd64", "x86", "i386", "i686"):
            return "cpu_x86"
        if cpu_arch in ("arm64", "aarch64"):
            return "cpu_arm"
    # Apple Silicon enters ranking as backend="metal"; its CPU path is ARM.
    if backend in ("metal", "mps", "apple") or "apple" in cpu_name or "apple" in gpu_name:
        return "cpu_arm"
    # Conservative default for CUDA/ROCm/discrete GPU backends and unknowns.
    return "cpu_x86"
 def _estimate_speed(model, quant, run_mode, system, offload_frac=0.0):
    """Estimate tok/s. Uses active params for MoE (only active experts run per token).
@@ -185,11 +143,6 @@ def _estimate_speed(model, quant, run_mode, system, offload_frac=0.0):
    bw = _lookup_bandwidth(system)
    backend = system.get("backend", "cpu_x86")
    # CPU-only inference must never inherit a GPU backend's fallback constant,
    # even if the detected system happens to report a CUDA/Metal/ROCm backend.
    if run_mode == "cpu_only":
        backend = _canonical_cpu_backend(system)
    if bw and run_mode in ("gpu", "cpu_offload"):
        bpp = QUANT_BYTES_PER_PARAM.get(quant, 0.5)
        model_gb = pb * bpp
@@ -282,17 +282,7 @@ def _detect_amd():
            "gpus": cards,
            "gpu_groups": groups,
            "homogeneous": len(groups) <= 1,
-            # Pick the actual runtime label: ROCm/HIP only when its
+            "backend": "rocm",
            # toolchain is installed, otherwise Vulkan if vulkaninfo is
            # present (mesa RADV works fine on RDNA/CDNA when ROCm
            # packages are absent — see Strix Halo where ROCm support
            # is still backporting). Reporting "rocm" on a Vulkan-only
            # host misleads downstream env-var pinning
            # (HIP_VISIBLE_DEVICES is a no-op there).
            "backend": (
                "rocm" if (_run(["which", "rocminfo"]) or _run(["which", "hipconfig"]))
                else ("vulkan" if _run(["which", "vulkaninfo"]) else "rocm")
            ),
            "unified_memory": is_apu,
            # AMD ISA/family so downstream can tell datacenter Instinct (CDNA,
            # where vLLM/SGLang run AWQ/GPTQ reliably) from consumer Radeon
@@ -330,7 +320,7 @@ def _detect_apple_silicon():
    # Only Apple Silicon (arm64) has a Metal GPU worth serving LLMs on; Intel
    # Macs fall through to the CPU path.
-    if _canonical_cpu_arch(arch) != "arm64":
+    if "arm" not in arch and "aarch64" not in arch:
        return None
    # Chip name, e.g. "Apple M4 Max" — carries the Pro/Max/Ultra variant that
@@ -513,25 +503,6 @@ def _get_cpu_count():
    return os.cpu_count() or 1
 def _canonical_cpu_arch(value):
    arch = str(value or "").lower().strip().replace("-", "_")
    if arch in ("x86_64", "amd64", "x64"):
        return "x86_64"
    if arch in ("i386", "i686", "x86"):
        return "x86"
    if arch in ("arm64", "aarch64"):
        return "arm64"
    if arch == "arm" or arch.startswith("armv"):
        return "arm"
    return arch
 def _get_cpu_arch():
    if _remote_host:
        return _canonical_cpu_arch(_run(["uname", "-m"]) or "")
    return _canonical_cpu_arch(platform.machine())
 def _powershell_exe():
    """Pick the best PowerShell executable for LOCAL execution: prefer pwsh
    (PowerShell 7+), fall back to Windows PowerShell 5.1. Returns an absolute
@@ -557,7 +528,6 @@ def _detect_windows():
        $r.cpu_name = $cpu.Name
        $r.cpu_cores = (Get-CimInstance Win32_Processor | Measure-Object -Property NumberOfLogicalProcessors -Sum).Sum
        $r.arch = $cpu.AddressWidth
        $r.cpu_arch = if ($env:PROCESSOR_ARCHITEW6432) { $env:PROCESSOR_ARCHITEW6432 } else { $env:PROCESSOR_ARCHITECTURE }
        # GPU detection via nvidia-smi (fastest) or WMI fallback
        try { 
            $nv = nvidia-smi --query-gpu=memory.total,name --format=csv,noheader,nounits 2>$null
@@ -629,7 +599,6 @@ def _detect_windows():
            "available_ram_gb": d.get("avail_gb", 0),
            "cpu_cores": _as_int(d.get("cpu_cores"), 1),
            "cpu_name": _cpu_name,
            "cpu_arch": _canonical_cpu_arch(d.get("cpu_arch")),
            "has_gpu": bool(d.get("gpu_name")),
            "gpu_name": d.get("gpu_name"),
            "gpu_vram_gb": d.get("gpu_vram_gb"),
@@ -825,7 +794,6 @@ def detect_system(host="", ssh_port="", platform="", fresh=False):
    available_ram = round(_get_available_ram_gb(), 1)
    cpu_cores = _get_cpu_count()
    cpu_name = _get_cpu_name()
    cpu_arch = _get_cpu_arch()
    gpu_info = _detect_apple_silicon() or _detect_nvidia() or _detect_amd()
@@ -835,7 +803,6 @@ def detect_system(host="", ssh_port="", platform="", fresh=False):
            "available_ram_gb": available_ram,
            "cpu_cores": cpu_cores,
            "cpu_name": cpu_name,
            "cpu_arch": cpu_arch,
            "has_gpu": True,
            "gpu_name": gpu_info["gpu_name"],
            "gpu_vram_gb": gpu_info["gpu_vram_gb"],
@@ -850,13 +817,17 @@ def detect_system(host="", ssh_port="", platform="", fresh=False):
            "unified_memory": gpu_info.get("unified_memory", False),
        }
    else:
-        backend = "cpu_arm" if cpu_arch == "arm64" else "cpu_x86"
+        if _remote_host:
            arch_out = _run(["uname", "-m"]) or ""
        else:
            import platform as _platform
            arch_out = _platform.machine().lower()
        backend = "cpu_arm" if "aarch64" in arch_out or "arm" in arch_out else "cpu_x86"
        result = {
            "total_ram_gb": total_ram,
            "available_ram_gb": available_ram,
            "cpu_cores": cpu_cores,
            "cpu_name": cpu_name,
            "cpu_arch": cpu_arch,
            "has_gpu": False,
            "gpu_name": None,
            "gpu_vram_gb": None,
@@ -15,8 +15,6 @@ from urllib.parse import urljoin, urlparse
 import httpx
 from bs4 import BeautifulSoup
 from src.constants import WEB_FETCH_SOFT_MAX_BYTES, WEB_FETCH_HARD_MAX_BYTES, WEB_FETCH_USER_AGENT
 from .analytics import RateLimitError, error_logger
 from .cache import (
    CONTENT_CACHE_DIR,
@@ -91,128 +89,18 @@ def _public_http_url(url: str) -> bool:
        return False
-class BodyTooLargeError(Exception):
+def _get_public_url(url: str, headers: dict, timeout: int, max_redirects: int = 5) -> httpx.Response:
    """The server declared a body larger than the hard fetch ceiling."""
    def __init__(self, url: str, declared_bytes: int):
        self.url = url
        self.declared_bytes = declared_bytes
        super().__init__(
            f"response body is {declared_bytes:,} bytes, over the "
            f"{WEB_FETCH_HARD_MAX_BYTES:,}-byte hard cap"
        )
 class _CappedFetch:
    """Result of a size-capped streaming GET.
    Carries just what fetch_webpage_content needs from an httpx.Response,
    plus the cap bookkeeping: the (possibly truncated) body, whether the
    cap cut it short, and the size the server declared via Content-Length
    (wire bytes; None when absent).
    """
    __slots__ = ("status_code", "headers", "content", "truncated",
                 "declared_bytes", "encoding", "url")
    def __init__(self, status_code, headers, content, truncated,
                 declared_bytes, encoding, url):
        self.status_code = status_code
        self.headers = headers
        self.content = content
        self.truncated = truncated
        self.declared_bytes = declared_bytes
        self.encoding = encoding
        self.url = url
    @property
    def text(self) -> str:
        return self.content.decode(self.encoding or "utf-8", errors="replace")
    def raise_for_status(self):
        if self.status_code >= 400:
            request = httpx.Request("GET", self.url)
            raise httpx.HTTPStatusError(
                f"HTTP {self.status_code} for {self.url}",
                request=request,
                response=httpx.Response(self.status_code, request=request),
            )
 def _get_public_url(url: str, headers: dict, timeout: int, max_redirects: int = 5,
                    max_bytes: int = None) -> "_CappedFetch":
    """Capped streaming GET with SSRF-guarded manual redirects.
    The body is streamed and buffering stops at ``max_bytes`` (default: the
    soft cap), so an oversized resource cannot be pulled into memory or the
    content cache in full. When Content-Length already declares a body over
    the hard ceiling, the fetch is refused before any body bytes are read.
    """
    cap = min(max_bytes or WEB_FETCH_SOFT_MAX_BYTES, WEB_FETCH_HARD_MAX_BYTES)
    current = url
    for _ in range(max_redirects + 1):
        if not _public_http_url(current):
            raise httpx.RequestError("Blocked private/internal URL", request=httpx.Request("GET", current))
-        # Force identity transfer-encoding. With gzip/deflate the wire bytes
+        response = httpx.get(current, headers=headers, timeout=timeout, follow_redirects=False)
-        # (and Content-Length) can be a small fraction of the decoded body, so
+        if response.status_code not in (301, 302, 303, 307, 308):
-        # a tiny compressed response could pass the hard-cap preflight and then
+            return response
        # expand past the ceiling in a single decoded chunk before the streamed
        # cap below can slice it. Identity makes Content-Length the true body
        # size and keeps each streamed chunk bounded by the network read.
        req_headers = dict(headers or {})
        req_headers["Accept-Encoding"] = "identity"
        with httpx.stream("GET", current, headers=req_headers, timeout=timeout,
                          follow_redirects=False) as response:
            if response.status_code in (301, 302, 303, 307, 308):
        location = response.headers.get("location")
        if not location:
-                    return _CappedFetch(response.status_code, response.headers, b"",
+            return response
                                        False, None, response.encoding, str(response.url))
        current = urljoin(str(response.url), location)
                continue
            # A server can ignore the identity request and still return a
            # compressed body; httpx.iter_bytes would then decode it, and a tiny
            # gzip can balloon into one decoded chunk far past the cap before we
            # slice. Refuse a compressed Content-Encoding so the streamed cap
            # stays a real memory bound (Content-Length is the compressed wire
            # length here, so the preflight and size metadata are unreliable too).
            enc = (response.headers.get("content-encoding") or "").strip().lower()
            if enc and enc != "identity":
                raise httpx.RequestError(
                    f"Refusing compressed response (Content-Encoding: {enc}) after "
                    "requesting identity: cannot bound decoded body size",
                    request=httpx.Request("GET", current),
                )
            declared = None
            raw_len = response.headers.get("content-length")
            if raw_len and raw_len.isdigit():
                declared = int(raw_len)
            # Refuse before buffering anything when the server already tells
            # us the body exceeds the absolute ceiling (Content-Length is wire
            # bytes; the decompressed body can only be larger).
            if declared is not None and declared > WEB_FETCH_HARD_MAX_BYTES:
                raise BodyTooLargeError(current, declared)
            chunks = []
            read = 0
            truncated = False
            # We requested identity above, so iter_bytes yields the raw body in
            # network-read-sized chunks (no decompression expansion); the cap
            # therefore bounds what we actually buffer.
            for chunk in response.iter_bytes():
                read += len(chunk)
                if read > cap:
                    keep = cap - (read - len(chunk))
                    if keep > 0:
                        chunks.append(chunk[:keep])
                    truncated = True
                    break
                chunks.append(chunk)
            return _CappedFetch(response.status_code, response.headers,
                                b"".join(chunks), truncated, declared,
                                response.encoding, str(response.url))
    raise httpx.RequestError("Too many redirects", request=httpx.Request("GET", current))
 # PDF extraction (optional dependency)
@@ -334,19 +222,9 @@ def _empty_result(url: str, error: str = "") -> dict:
 # ----------------------------------------------------------------------
 # Main content fetcher
 # ----------------------------------------------------------------------
-def fetch_webpage_content(url: str, timeout: int = 5, retry_attempt: int = 0,
+def fetch_webpage_content(url: str, timeout: int = 5, retry_attempt: int = 0) -> dict:
-                          max_bytes: int = None) -> dict:
+    """Fetch and extract meaningful content from a webpage with caching."""
-    """Fetch and extract meaningful content from a webpage with caching.
+    cache_key = generate_cache_key(url)
    ``max_bytes`` raises the download budget per call (clamped to the hard
    cap); the default is the soft cap. When the body is cut short the result
    carries ``truncated``/``fetched_bytes``/``total_bytes`` so callers can
    tell the model the content is partial (#3812).
    """
    effective_cap = min(max_bytes or WEB_FETCH_SOFT_MAX_BYTES, WEB_FETCH_HARD_MAX_BYTES)
    # The cap is part of the cache identity: a truncated soft-cap fetch must
    # not be served to a later full-budget request for the same URL.
    cache_key = generate_cache_key(f"{url}#cap={effective_cap}")
    cache_file = CONTENT_CACHE_DIR / f"{cache_key}.cache"
    # Check cache
@@ -369,24 +247,18 @@ def fetch_webpage_content(url: str, timeout: int = 5, retry_attempt: int = 0,
    # Fetch
    try:
        headers = {
-            "User-Agent": WEB_FETCH_USER_AGENT,
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
            "Accept-Language": "en-US,en;q=0.5",
-            # identity so the streamed size cap in _get_public_url stays honest
+            "Accept-Encoding": "gzip, deflate",
            # (a compressed body can decode to far more than Content-Length).
            "Accept-Encoding": "identity",
            "Connection": "keep-alive",
        }
-        response = _get_public_url(url, headers=headers, timeout=timeout,
+        response = _get_public_url(url, headers=headers, timeout=timeout)
                                   max_bytes=effective_cap)
        if response.status_code == 429:
            raise RateLimitError(f"Rate limit hit for {url} (attempt {retry_attempt})")
        response.raise_for_status()
    except BodyTooLargeError as e:
        error_logger.warning(f"Refused oversized body for {url}: {e}")
        return _empty_result(url, f"TooLarge: {e}")
    except httpx.HTTPStatusError as e:
        error_logger.warning(f"HTTP {e.response.status_code} fetching {url}: {e}")
        return _empty_result(url, f"HTTP {e.response.status_code}: {e}")
@@ -397,27 +269,9 @@ def fetch_webpage_content(url: str, timeout: int = 5, retry_attempt: int = 0,
        error_logger.error(str(e))
        return _empty_result(url, str(e))
    # Size bookkeeping shared by every content branch below. getattr keeps
    # plain httpx.Response stand-ins (tests) working without the cap fields.
    _size_fields = {
        "truncated": getattr(response, "truncated", False),
        "fetched_bytes": len(response.content),
        "total_bytes": getattr(response, "declared_bytes", None),
    }
    # PDF handling
    content_type = response.headers.get("Content-Type", "").lower()
    if "application/pdf" in content_type or url.lower().endswith(".pdf"):
        if _size_fields["truncated"]:
            # A PDF cut mid-stream is not parseable; unlike text there is no
            # useful partial result, so report the budget problem instead.
            _declared = _size_fields["total_bytes"]
            return _empty_result(
                url,
                f"TooLarge: PDF exceeds the {effective_cap:,}-byte fetch budget"
                + (f" (size {_declared:,} bytes)" if _declared else "")
                + "; retry with a larger budget if it fits under the hard cap",
            )
        if pdf_extract_text is None:
            logger.error("pdfminer.six is not installed; cannot extract PDF text.")
            pdf_text = ""
@@ -441,7 +295,6 @@ def fetch_webpage_content(url: str, timeout: int = 5, retry_attempt: int = 0,
            "js_message": "",
            "success": bool(pdf_text),
            "error": "" if pdf_text else "Failed to extract PDF text",
            **_size_fields,
        }
        _cache_result(cache_file, cache_key, result, url)
        return result
@@ -476,7 +329,6 @@ def fetch_webpage_content(url: str, timeout: int = 5, retry_attempt: int = 0,
            "js_message": "",
            "success": bool(text_body),
            "error": "" if text_body else "Empty response body",
            **_size_fields,
        }
        _cache_result(cache_file, cache_key, result, url)
        return result
@@ -539,7 +391,6 @@ def fetch_webpage_content(url: str, timeout: int = 5, retry_attempt: int = 0,
        "js_message": js_message,
        "success": True,
        "error": "",
        **_size_fields,
    }
    _cache_result(cache_file, cache_key, result, url)
    return result
@@ -9,12 +9,14 @@ from urllib.parse import urljoin, urlparse, parse_qs
 import httpx
 from bs4 import BeautifulSoup
-from src.constants import SEARXNG_INSTANCE, REQUEST_TIMEOUT, WEB_FETCH_USER_AGENT
+from src.constants import SEARXNG_INSTANCE
 from .analytics import RateLimitError, error_logger
 from .query import build_enhanced_query
 logger = logging.getLogger(__name__)
 REQUEST_TIMEOUT = 20
 # Provider registry — maps setting value to (label, needs_key, needs_url)
 PROVIDER_INFO = {
    "searxng":  ("SearXNG",           False, True),
@@ -138,7 +140,7 @@ def searxng_search_api(query: str, count: Optional[int] = None, categories: str
    count = count if count is not None else _get_result_count()
    instance = _get_search_instance()
    api_key = ""
-    headers = {"User-Agent": WEB_FETCH_USER_AGENT}
+    headers = {"User-Agent": "Mozilla/5.0"}
    if api_key:
        headers["Authorization"] = f"Bearer {api_key}"
    # News/fresh queries do badly in the 'general' category — it favours
@@ -250,7 +252,7 @@ def searxng_search(query, max_results=10):
    """Search using SearXNG instance - parsing HTML."""
    instance = _get_search_instance()
    api_key = ""
-    req_headers = {"User-Agent": WEB_FETCH_USER_AGENT}
+    req_headers = {"User-Agent": "Mozilla/5.0"}
    if api_key:
        req_headers["Authorization"] = f"Bearer {api_key}"
    try:
@@ -389,7 +391,7 @@ def duckduckgo_search(query: str, count: Optional[int] = None, time_filter: Opti
            response = httpx.get(
                "https://html.duckduckgo.com/html/",
                params={"q": query, "kp": _safesearch_for("duckduckgo_html")},
-                headers={"User-Agent": WEB_FETCH_USER_AGENT},
+                headers={"User-Agent": "Mozilla/5.0"},
                timeout=REQUEST_TIMEOUT,
            )
            response.raise_for_status()
@@ -16,9 +16,8 @@ sys.path.insert(0, BASE_DIR)
 from src.constants import (
    DATA_DIR, AUTH_FILE, UPLOAD_DIR, PERSONAL_DIR, PERSONAL_UPLOADS_DIR,
    TTS_CACHE_DIR, GENERATED_IMAGES_DIR, DEEP_RESEARCH_DIR, CHROMA_DIR,
-    RAG_DIR, MEMORY_VECTORS_DIR, PASSWORD_MIN_LENGTH,
+    RAG_DIR, MEMORY_VECTORS_DIR,
 )
 from core.auth import RESERVED_USERNAMES
 DIRS = [
    DATA_DIR,
@@ -60,23 +59,15 @@ def _prompt_admin_credentials():
    print("  (Press Enter to accept defaults)")
    print()
    while True:
    username = input("  Username [admin]: ").strip().lower()
    if not username:
        username = "admin"
        if username in RESERVED_USERNAMES:
            print(f"  '{username}' is a reserved username. Choose another.")
            continue
        break
    while True:
        password = getpass.getpass("  Password: ")
        if not password:
            print("  Password cannot be empty.")
            continue
        if len(password) < PASSWORD_MIN_LENGTH:
            print(f"  Password must be at least {PASSWORD_MIN_LENGTH} characters.")
            continue
        confirm = getpass.getpass("  Confirm password: ")
        if password != confirm:
            print("  Passwords don't match. Try again.")
@@ -102,13 +93,8 @@ def create_default_admin():
        password = os.getenv("ODYSSEUS_ADMIN_PASSWORD", "").strip()
        if username and password:
-            # Both provided via env — validate before using
+            # Both provided via env — use them directly
-            if username in RESERVED_USERNAMES:
+            pass
                print(f"  [error] ODYSSEUS_ADMIN_USER '{username}' is a reserved username")
                return "failed"
            if len(password) < PASSWORD_MIN_LENGTH:
                print(f"  [error] ODYSSEUS_ADMIN_PASSWORD must be at least {PASSWORD_MIN_LENGTH} characters")
                return "failed"
        elif sys.stdin.isatty() and not os.getenv("ODYSSEUS_SKIP_ADMIN_PROMPT"):
            # Interactive terminal — ask the user
            username, password = _prompt_admin_credentials()
@@ -1,412 +0,0 @@
 # Architecture Runtime Inventory
 > **Purpose**: Phase 0 planning baseline for codebase readability improvements (#4071).
 > **Parent issue**: [#4082](https://github.com/pewdiepie-archdaemon/odysseus/issues/4082)
 > **Last updated**: dev@b58af42 | 2026-06-16
 > **Status**: Draft — to be reviewed before follow-up slices open.
 > **Snapshot basis**: Importer / file / import-line counts are refreshed to `dev@b58af42` (2026-06-16) and are recomputable via the commands in §3.4. **Line counts** in §2.1 / §2.2 are a snapshot from an earlier baseline and drift as `dev` moves — recompute any of them with `wc -l <file>`. This inventory tracks structure and risk, not live metrics.
 This document maps the current runtime module structure, identifies high-risk boundaries, and recommends safe first refactor slices. It does **not** move files, change imports, or alter runtime behavior.
 ---
 ## 1. Current Structure Overview
 ### 1.1 Top-Level Layout
 ```
 odysseus/
 ├── app.py                    # FastAPI app entrypoint (1,145 lines)
 ├── conf/                     # Configuration (config.py, settings.py, settings_scrub.py)
 ├── src/                      # 95 flat .py files + 2 subdirectories
 │   ├── agent_tools/          # Tool helpers: document, filesystem, subprocess, web
 │   └── search/               # Search subsystem
 ├── routes/                   # 54 flat .py files — HTTP route handlers
 ├── core/                     # 10 files — database models, auth, middleware, session
 ├── mcp_servers/              # 5 files — MCP server implementations
 ├── scripts/                  # CLI tools and one-shot scripts
 ├── static/                   # Frontend HTML/CSS/JS
 ├── tests/                    # 583 test files (~54,800 lines)
 └── services/                 # (exists as needed)
 ```
 ### 1.2 Directory Flatness Metric
 | Directory | Flat `.py` Files | Subdirectories | Concern |
 |-----------|-----------------|----------------|---------|
 | `src/` | **95** | 2 (`agent_tools/`, `search/`) | No domain grouping; 95 files in one directory |
 | `routes/` | **54** | 0 | All route handlers in one flat directory |
 | `core/` | 10 | 0 | Manageable, but `database.py` is oversized |
 ---
 ## 2. Largest Runtime Modules
 ### 2.1 Python Backend
 | Rank | File | Lines | Classes | Functions | Risk |
 |------|------|-------|---------|-----------|------|
 | 1 | `src/tool_implementations.py` | **4,032** | 0 | ~48 | **HIGH** |
 | 2 | `routes/email_routes.py` | **3,245** | — | — | **MEDIUM** |
 | 3 | `routes/cookbook_routes.py` | **2,969** | — | — | **MEDIUM** |
 | 4 | `src/agent_loop.py` | **2,961** | 0 | ~24 | **HIGH** |
 | 5 | `src/task_scheduler.py` | **2,330** | — | 5 | MEDIUM |
 | 6 | `routes/model_routes.py` | **2,266** | — | — | MEDIUM |
 | 7 | `core/database.py` | **2,265** | 28 | ~59 helpers | **HIGH** |
 | 8 | `src/builtin_actions.py` | **2,262** | 2 | ~24 | MEDIUM |
 | 9 | `src/llm_core.py` | **2,164** | — | — | MEDIUM |
 | 10 | `mcp_servers/email_server.py` | 2,197 | — | — | LOW (separate process) |
 | 11 | `src/visual_report.py` | 1,918 | — | — | LOW |
 | 12 | `routes/gallery_routes.py` | 1,896 | — | — | LOW |
 | 13 | `src/ai_interaction.py` | 1,846 | — | — | MEDIUM |
 | 14 | `routes/document_routes.py` | 1,717 | — | — | LOW |
 | 15 | `routes/skills_routes.py` | 1,648 | — | — | LOW |
 **Heuristic**: Files > 2,000 lines with 20+ public symbols and many importers are the highest-risk splits. Files 1,000–2,000 lines are medium-risk if tightly coupled.
 ### 2.2 Frontend
 | File | Lines | Concern |
 |------|-------|---------|
 | `static/style.css` | **36,653** | Entire app CSS in one file (tracked separately in #2617) |
 | `static/js/document.js` | **9,776** | Single JS file for document functionality |
 | `static/js/slashCommands.js` | 6,498 | |
 | `static/js/settings.js` | 5,266 | |
 | `static/js/emailLibrary.js` | 5,217 | |
 | `static/js/notes.js` | 5,124 | |
 | `static/js/chat.js` | 4,985 | |
 | `static/app.js` | 4,090 | |
 **Note**: Frontend modularization is tracked separately in #2617 (CSS) and is not the focus of this Phase 0 inventory. Frontend is listed here for completeness but follow-up slices should target Python backend boundaries first.
 ---
 ## 3. Import Dependency Graph
 ### 3.1 Who Depends on `core/database.py`
 **102 files** import from `core.database` — this is the most depended-upon module:
 - All route handlers (`routes/*.py`)
 - Most `src/*.py` files
 - `core/session_manager.py`, `core/auth.py`
 - Multiple test files
 **Implication**: Any split of `core/database.py` is the highest-risk refactor. It should be tackled **last**, never first.
 ### 3.2 Who Depends on `src/tool_implementations.py`
 **17 files** import from `src.tool_implementations`:
 - `src/agent_loop.py`, `src/builtin_actions.py`, `src/tool_index.py`
 - `src/task_scheduler.py`, `src/tool_policy.py`
 - Various tests
 ### 3.3 Who Depends on `src/agent_loop.py`
 **22 files** import from `src.agent_loop`:
 - `src/tool_policy.py`, `src/teacher_escalation.py`, `src/bg_monitor.py`
 - `src/task_scheduler.py`
 - Multiple test files
 ### 3.4 Cross-Layer Import Violations
 **`src/` importing from `routes/`** (backwards dependency — domain logic depending on HTTP layer):
 ```
 src/tool_implementations.py ──→ routes/calendar_routes.py
 src/tool_implementations.py ──→ routes/cookbook_helpers.py
 src/tool_implementations.py ──→ routes/email_helpers.py
 src/tool_implementations.py ──→ routes/email_pollers.py
 src/tool_implementations.py ──→ routes/email_routes.py
 src/tool_implementations.py ──→ routes/model_routes.py
 src/tool_implementations.py ──→ routes/note_routes.py
 src/tool_implementations.py ──→ routes/prefs_routes.py
 ```
 > These are **runtime imports** (inside function bodies, not at module top), which mitigates circular import risk but indicates fuzzy layer boundaries. Function-level inline imports from the HTTP layer into business logic are a code smell.
 **Import counts (top-level)**:
 | Direction | Count | Notes |
 |-----------|-------|-------|
 | `routes/` → `src/` | **374** | Expected: HTTP handlers call domain logic |
 | `routes/` → `core/` | **126** | Expected: handlers access DB models |
 | `src/` → `routes/` | **31** | **Unexpected**: domain logic reaching into HTTP layer (direct grep of import lines referencing `routes/`) |
 | `src/` → `core/` | **106** | Acceptable but could be reduced with a data-access layer |
 > **How the metrics in this document are computed** — recompute against current `dev` before treating any count as authoritative (the tree drifts; these numbers are a snapshot, not a live value):
 > - `src/` flat `.py` files: `find src -maxdepth 1 -name '*.py' | wc -l`
 > - `tests/` test files: `find tests -name 'test_*.py' | wc -l`
 > - `core.database` importers: `grep -rlE '(from|import) +core\.database' --include='*.py' . | grep -v core/database.py | wc -l`
 > - `src.agent_loop` importers: `grep -rlE '(from|import) +src\.agent_loop' --include='*.py' . | grep -v src/agent_loop.py | wc -l`
 > - Cross-layer import lines: `grep -rhE '(from|import) +<pkg>' --include='*.py' <dir>/ | wc -l` (e.g. `(from|import) +routes` over `src/`)
 ---
 ## 4. Route Ownership Map
 Routes can be grouped into logical feature domains. Current flat structure obscures these boundaries:
 | Domain | Route Files | Total Lines | Review Complexity |
 |--------|-------------|-------------|-------------------|
 | **Email** | `email_routes.py`, `email_helpers.py`, `email_pollers.py` | 5,936 | HIGH — most complex domain |
 | **Chat / Agent** | `chat_routes.py`, `chat_helpers.py`, `shell_routes.py`, `codex_routes.py`, `skills_routes.py` | 6,365 | HIGH — core interaction surface |
 | **Cookbook** | `cookbook_routes.py`, `cookbook_helpers.py`, `cookbook_output.py` | 4,110 | MEDIUM |
 | **Model / LLM** | `model_routes.py`, `assistant_routes.py`, `copilot_routes.py` | 2,764 | MEDIUM |
 | **Calendar / Contacts** | `calendar_routes.py`, `contacts_routes.py` | 2,336 | MEDIUM |
 | **Documents** | `document_routes.py`, `document_helpers.py` | 1,954 | LOW |
 | **Auth** | `auth_routes.py`, `api_token_routes.py`, `device_flow.py` | 1,171 | LOW |
 | **Tasks** | `task_routes.py` (standalone) | 1,157 | LOW |
 | **Session** | `session_routes.py` (standalone) | 1,287 | LOW |
 | **Gallery** | `gallery_routes.py`, `gallery_helpers.py` | 1,896 | LOW |
 | **Memory** | `memory_routes.py` | — | LOW |
 | **Research** | `research_routes.py` | — | LOW |
 | **MCP** | `mcp_routes.py` | — | LOW |
 | **Notes** | `note_routes.py` | — | LOW |
 | **Other** | `prefs_routes.py`, `upload_routes.py`, `vault_routes.py`, `webhook_routes.py`, `workspace_routes.py`, `search_routes.py`, `history_routes.py`, `hwfit_routes.py`, `preset_routes.py`, `signature_routes.py`, `backup_routes.py`, `cleanup_routes.py`, `diagnostics_routes.py`, `embedding_routes.py`, `emoji_routes.py`, `font_routes.py`, `stt_routes.py`, `tts_routes.py`, `compare_routes.py`, `personal_routes.py`, `editor_draft_routes.py`, `admin_wipe_routes.py`, `chatgpt_subscription_routes.py` | 2,000+ | LOW individual, HIGH cumulative |
 ---
 ## 5. Tool Registry & Implementation Boundaries
 ### 5.1 Current Tool Architecture
 | Component | File | Lines | Role |
 |-----------|------|-------|------|
 | Tool schemas | `src/tool_schemas.py` | 1,392 | JSON Schema tool definitions (Duck-TypedDict) |
 | Tool index | `src/tool_index.py` | 542 | RAG-based tool retrieval from ChromaDB |
 | Tool implementations | `src/tool_implementations.py` | 4,032 | 33 `do_*` functions — all tool execution logic |
 | Tool security | `src/tool_security.py` | — | Owner-scoped tool blocking |
 | Tool policy | `src/tool_policy.py` | — | Guide-only directive, plan-mode disabled tools |
 | Tool utils | `src/tool_utils.py` | — | Shared tool helpers |
 ### 5.2 Tool Implementation Categories
 The 33 `do_*` functions in `tool_implementations.py` fall into natural domain groups — the basis for slice 1's split in §6.2:
 | Category | `do_*` functions | Count |
 |----------|------------------|-------|
 | **System / config** | `do_manage_skills`, `do_manage_tasks`, `do_manage_endpoints`, `do_manage_mcp`, `do_manage_webhooks`, `do_manage_tokens`, `do_manage_settings`, `do_api_call`, `do_app_api` | 9 |
 | **Cookbook / model serving** | `do_download_model`, `do_serve_model`, `do_list_served_models`, `do_stop_served_model`, `do_tail_serve_output`, `do_list_downloads`, `do_cancel_download`, `do_search_hf_models`, `do_adopt_served_model`, `do_list_cookbook_servers`, `do_list_serve_presets`, `do_serve_preset`, `do_list_cached_models` | 13 |
 | **Notes** | `do_manage_notes` | 1 |
 | **Calendar** | `do_manage_calendar` | 1 |
 | **Search** | `do_search_chats` | 1 |
 | **Research** | `do_manage_research`, `do_trigger_research` | 2 |
 | **Contacts** | `do_resolve_contact`, `do_manage_contact` | 2 |
 | **Vault** | `do_vault_search`, `do_vault_get`, `do_vault_unlock` | 3 |
 | **Image** | `do_edit_image` | 1 |
 | | **Total** | **33** |
 > Low-level tools (filesystem, subprocess, web fetch, document parsing) live in `src/agent_tools/`, **not** in `tool_implementations.py` — out of scope for this split.
 ---
 ## 6. Risk Assessment & Candidate Slice Ranking
 > **Candidate proposals, not a committed plan.** The rankings, package shapes (e.g. `src/pkg/`, `src/domain/`, `src/infra/`, `src/api/`), split ordering, and route-grouping strategy below are **options for maintainer discussion**. Per #4082/#4071, slice ownership and order are settled by maintainers before any follow-up PR. §1–§3 above are the factual current-state inventory.
 ### 6.1 Risk Scale
 | Level | Criteria |
 |-------|----------|
 | **LOW** | File has ≤3 importers AND ≤500 lines, OR is a pure refactor with clear boundaries |
 | **MEDIUM** | File has 4–15 importers OR 500–1,500 lines |
 | **HIGH** | File has 16+ importers OR >2,000 lines, OR has cross-layer import violations |
 ### 6.2 Ranked Split Candidates
 | Priority | Target | Risk | Rationale |
 |----------|--------|------|-----------|
 | **1** | `src/tool_implementations.py` → `src/tools/*.py` | **MEDIUM** | 4,032 lines → ~10 files by tool category. Already has natural boundaries. 17 importers, tracked in #3629. Use `__init__.py` shim to keep existing imports working. |
 | **2** | `routes/` → domain subdirectories (one domain per PR) | **MEDIUM** | 54 flat files. Done **one domain at a time** (e.g. a standalone PR for the email domain, then chat, …), not a broad reorganization — route modules carry helper imports, registration assumptions, and test import paths. |
 | **3** | `src/agent_loop.py` → `src/agent/loop.py` + submodules | **MEDIUM-HIGH** | 2,961 lines, 24 functions. Can extract prompt building, classification, verification, and runaway detection. Tracked in #3266. |
 | **4** | `src/` → `src/pkg/`, `src/domain/`, `src/infra/`, `src/api/` | **MEDIUM** | Structural reorganization. Split flat `src/` into layered packages. Must come after routes and tools are stable. |
 | **5** | `routes/email_*.py` consolidation | **LOW** | Already grouped by filename prefix. Low-risk cleanup within the email domain. |
 | **6** | `core/database.py` → `src/infra/database/models/*.py` | **HIGH** | 28 classes, 102 importers. Highest-risk split. Must be **last** in any sequence. Requires careful import shim strategy. |
 | **7** | Frontend CSS modularization | **MEDIUM** | 36,653 lines. Tracked in #2617. Separate timeline from backend work. |
 | **8** | Frontend JS modularization | **MEDIUM** | 9,776 lines in `document.js`. Introduce ES modules at minimum. |
 ### 6.3 Candidate First 3 Behavior-Preserving Slices
 **Slice 1: Split `tool_implementations.py`** (Lowest-risk high-impact)
 - Create `src/tools/` package with one file per tool category
 - Add `src/tools/__init__.py` re-exporting all symbols with current names
 - Update 17 importers to use new paths (can be deferred via shim)
 - Validation: `python -m pytest tests/ -x -q` + manual smoke test of tool execution
 - Reference: #3629
 **Slice 2: Group `routes/` by domain** (one domain per PR, not a broad sweep)
 Route modules carry helper imports, router registration assumptions, and test import paths, so this must be done **one domain at a time** rather than as a single reorganization PR. Example sequence (each its own PR):
 - PR 2a: move the **email** domain (`email_routes.py`, `email_helpers.py`, `email_pollers.py`) → `routes/email/` + shim
 - PR 2b: move the **chat/agent** domain → `routes/chat/` + shim
 - PR 2c: move the **cookbook** domain → `routes/cookbook/` + shim
 - …and so on per domain from §4
 Each PR: add `__init__.py` re-exporting old names, update `app.py` router imports, validation `python app.py` starts clean. **No behavior change** — pure file reorganization.
 **Slice 3: Extract `agent_loop.py` submodules** (Improve reviewability)
 - Move prompt assembly → `src/agent/prompt.py`
 - Move request classification → `src/agent/classifier.py`
 - Move sub-agent verification → `src/agent/verifier.py`
 - Move runaway detection → `src/agent/runaway.py`
 - Move context management → `src/agent/context.py`
 - Keep `src/agent/loop.py` as the main orchestration module
 - Validation: `python -m pytest tests/test_agent_loop.py tests/test_loop_breaker_runaway.py -v`
 ---
 ## 7. Safety Guardrails for Follow-Up Work
 Per maintainer guidance in #4082 and #4071:
 - [ ] **One domain/slice per PR** — never mix multiple reorganizations
 - [ ] **No behavior changes** mixed with file moves — pure reorganization only
 - [ ] **Keep compatibility shims** — `__init__.py` re-exports for all existing import paths
 - [ ] **Add or identify focused tests** before risky splits
 - [ ] **Do not start with `core/database.py`** or broad route movement unless this inventory shows a safe boundary
 - [ ] **Prefer small, reviewable slices** over large restructures
 - [ ] **No packaging/runtime/tooling migration** mixed into file moves
 - [ ] **No frontend framework migration** inside this stabilization lane
 - [ ] **Validate with `python -m compileall`** — every PR must pass CI checks
 - [ ] **Validate with `pytest`** — run the full test suite before opening each PR
 ---
 ## 8. Validation Commands
 Each follow-up PR should be verifiable with these commands before submission:
 ```bash
 # Syntax check — must pass with zero errors
 python -m compileall src/ routes/ core/ conf/
 # Full test suite — must match baseline pass rate
 python -m pytest tests/ -x -q
 # Import shim verification — existing import paths must still work
 python -c "from src.tool_implementations import do_search_chats; print('OK')"
 # App startup smoke test (if backend touched)
 timeout 5 python app.py 2>&1 | head -5 || true
 ```
 ---
 ## 9. Open Questions
 1. Is `#2538` (specs ground truth) the canonical behavior map baseline, and should this inventory be kept in sync with those specs once merged?
 2. Should route grouping follow the domain map proposed here, or is there a different taxonomy preferred by maintainers?
 3. For the `tool_implementations.py` split (#3629), is the tool categorization in §5.2 acceptable, or should it follow a different grouping?
 4. Should compatibility shims (`__init__.py`) be temporary (removed in a follow-up wave) or permanent?
 5. Should an ADR (Architecture Decision Record) document be started to track decisions made during this process?
 ---
 ## 10. Future Direction (NOT current state)
 The following are **future refactor targets** (candidate directions **pending maintainer agreement**, not committed), recorded here so this inventory does not imply they exist today. None of them are present in the current `dev` tree:
 - `main.py` — proposed rename of the `app.py` entrypoint. Today the app boots via `app.py`.
 - `src/agent/` — proposed package to hold `agent_loop.py` submodules (prompt/classifier/verifier/runaway/context). Today `agent_loop.py` is a single flat file in `src/`.
 - `src/infra/`, `src/domain/`, `src/pkg/`, `src/api/` — proposed layered reorganization of the flat `src/` directory (slice 4 in §6).
 These become real only when the corresponding slices land.
 ---
 ## Appendix A: File Listing
 ### `src/` (95 files — 61 shown; run `ls src/*.py` for the full list)
 ```
 agent_loop.py          tool_implementations.py   tool_schemas.py
 tool_index.py          tool_security.py          tool_policy.py
 tool_utils.py          builtin_actions.py        task_scheduler.py
 llm_core.py            model_context.py          model_discovery.py
 session_search.py      context_budget.py         context_compactor.py
 ai_interaction.py      action_intents.py         agent_runs.py
 app_helpers.py         app_initializer.py        config.py
 database.py            memory.py                 memory_provider.py
 secret_storage.py      prompt_security.py        url_security.py
 url_safety.py          rate_limiter.py           cleanup_service.py
 readiness.py           service_health.py         exceptions.py
 request_models.py      assistant_log.py          bg_monitor.py
 builtin_mcp.py         chat_helpers.py           chroma_client.py
 document_processor.py  embedding_lanes.py        deep_research.py
 research_handler.py    research_utils.py         personal_docs.py
 rag_manager.py         rag_singleton.py          topic_analyzer.py
 visual_report.py       youtube_handler.py        pdf_forms.py
 pdf_form_doc.py        pdf_runtime.py            caldav_writeback.py
 email_thread_parser.py text_helpers.py           user_time.py
 teacher_escalation.py  cookbook_serve_lifecycle.py
 chatgpt_subscription.py  mcp_manager.py
 ```
 ### `routes/` (54 files)
 ```
 __init__.py    _validators.py
 auth_routes.py              api_token_routes.py       device_flow.py
 chat_routes.py              chat_helpers.py           shell_routes.py
 codex_routes.py             skills_routes.py
 email_routes.py             email_helpers.py          email_pollers.py
 cookbook_routes.py          cookbook_helpers.py       cookbook_output.py
 model_routes.py             assistant_routes.py       copilot_routes.py
 calendar_routes.py          contacts_routes.py
 document_routes.py          document_helpers.py
 gallery_routes.py           gallery_helpers.py
 task_routes.py              session_routes.py
 note_routes.py              memory_routes.py          research_routes.py
 mcp_routes.py               search_routes.py          history_routes.py
 webhook_routes.py           workspace_routes.py       upload_routes.py
 vault_routes.py             prefs_routes.py           preset_routes.py
 signature_routes.py         personal_routes.py        hwfit_routes.py
 backup_routes.py            cleanup_routes.py         diagnostics_routes.py
 embedding_routes.py         emoji_routes.py           font_routes.py
 stt_routes.py               tts_routes.py             compare_routes.py
 editor_draft_routes.py      chatgpt_subscription_routes.py    admin_wipe_routes.py
 ```
 ### `core/` (10 files)
 ```
 __init__.py    constants.py    database.py    models.py
 auth.py        middleware.py   session_manager.py   exceptions.py
 atomic_io.py   platform_compat.py
 ```
 ---
 ## Appendix B: Key Import Relationships
 ```
 core/database.py  ←── 102 importers (routes/*, src/*, core/*, tests/*)
    ↑
    ├── routes/auth_routes.py
    ├── routes/email_routes.py
    ├── src/builtin_actions.py
    ├── src/task_scheduler.py
    ├── src/tool_implementations.py (inline)
    └── ...97 more
 src/tool_implementations.py  ←── 17 importers
    ↑
    ├── src/agent_loop.py
    ├── src/builtin_actions.py
    ├── src/tool_index.py
    ├── src/task_scheduler.py
    ├── src/tool_policy.py
    └── ...12 more (mostly tests)
 src/agent_loop.py  ←── 22 importers
    ↑
    ├── src/tool_policy.py
    ├── src/teacher_escalation.py
    ├── src/bg_monitor.py
    ├── src/task_scheduler.py
    └── 18 more (incl. tests)
 ```
@@ -38,6 +38,167 @@ from src.agent_tools import (
 logger = logging.getLogger(__name__)
 # Redaction patterns for common secret-bearing shapes. Explicit and tested
 # (see tests/test_loop_guard_signals.py) rather than one clever broad regex —
 # safety first, but we try not to mangle harmless prose. Applied in order.
 _REDACTED = "[redacted]"
 # Cookie: ... / Set-Cookie: ... — redact the rest of the line (cookies hold spaces).
 _SENSITIVE_COOKIE_RE = re.compile(
    r"(?i)\b((?:set-)?cookie\s*[:=]\s*)[^\r\n]+"
 )
 # URL credentials, e.g. postgres://user:pass@host/db. The password half allows
 # inner colons (postgres://user:pa:ss@host/db) but still stops at / and @.
 _SENSITIVE_URL_CRED_RE = re.compile(
    r"(?i)\b([a-z][a-z0-9+.\-]*://)[^\s:/@]+:[^\s/@]+@"
 )
 # Prefix-only discovery regexes. Each matches the key and its separator (the part
 # we KEEP); the value that follows is found by a linear scanner rather than by a
 # regex, so there is no backtracking-prone quantifier over uncontrolled input.
 #
 # Authorization: Bearer <tok> / Authorization: Basic "two word secret"
 _AUTH_PREFIX_RE = re.compile(
    r"(?i)authorization\s*[:=]\s*(?:bearer|basic)\s+"
 )
 # Provider-prefixed env names, e.g. OPENAI_API_KEY=..., AWS_SECRET_ACCESS_KEY=...,
 # GITHUB_TOKEN=... — require a sensitive suffix preceded by `_` so benign names
 # that merely end in KEY (MONKEY, TURKEY) are left alone.
 _ENV_PREFIX_RE = re.compile(
    r"(?:export\s+)?\b[A-Z][A-Z0-9_]*"
    r"_(?:KEY|TOKEN|SECRET|PASSWORD|PASSWD|PWD|CREDENTIALS?)\s*=\s*"
 )
 # Generic sensitive key, e.g. password=..., api_key: ..., client_secret=...
 _KEY_PREFIX_RE = re.compile(
    r"(?i)\b(?:password|passwd|pwd|token|api[_-]?key|client_secret|secret)\b\s*[:=]\s*"
 )
 # Obvious provider-shaped bare tokens (no surrounding key needed).
 _SENSITIVE_BARE_TOKEN_RE = re.compile(
    r"\b("
    r"sk-[A-Za-z0-9_\-]{16,}"          # OpenAI / Anthropic style
    r"|gh[pousr]_[A-Za-z0-9]{20,}"     # GitHub PAT
    r"|xox[baprs]-[A-Za-z0-9\-]{10,}"  # Slack
    r"|AKIA[0-9A-Z]{16}"               # AWS access key id
    r"|hf_[A-Za-z0-9]{16,}"            # Hugging Face token
    r"|AIza[0-9A-Za-z_\-]{20,}"        # Google API key
    r")\b"
 )
 def _consume_secret_value_end(text: str, start: int) -> int:
    """Return the exclusive end index of the secret value beginning at ``start``.
    If the value is quoted, scan to the matching unescaped quote (backslash
    escapes are skipped two chars at a time). Otherwise scan to the first
    whitespace, comma, or semicolon. The scan is linear in the length of the
    input, so it cannot exhibit catastrophic backtracking.
    """
    n = len(text)
    if start >= n:
        return start
    quote = text[start]
    if quote in ("'", '"'):
        i = start + 1
        while i < n:
            ch = text[i]
            if ch == "\\":
                i += 2
                continue
            if ch == quote:
                return i + 1
            i += 1
        return n  # unterminated quote: redact to the end
    i = start
    while i < n and not text[i].isspace() and text[i] not in (",", ";"):
        i += 1
    return i
 def _redact_after_prefix(text: str, prefix_re: "re.Pattern") -> str:
    """Redact the value following each ``prefix_re`` match using a linear scan."""
    result = []
    pos = 0
    n = len(text)
    while pos < n:
        match = prefix_re.search(text, pos)
        if match is None:
            result.append(text[pos:])
            break
        result.append(text[pos:match.end()])
        value_end = _consume_secret_value_end(text, match.end())
        if value_end > match.end():
            result.append(_REDACTED)
            pos = value_end
        else:
            # Empty value: nothing to redact; step past the prefix and continue.
            pos = match.end()
            if pos < n:
                result.append(text[pos])
                pos += 1
    return "".join(result)
 def _redact_private_keys(text: str) -> str:
    """Replace PEM private-key blocks with a placeholder via linear scanning.
    Finds ``-----BEGIN `` markers, verifies the header names a PRIVATE KEY,
    locates the matching ``-----END `` marker, and collapses the whole block.
    No regex is used, so the (multi-line, uncontrolled) body cannot trigger
    polynomial matching.
    """
    begin_marker = "-----BEGIN "
    end_marker = "-----END "
    dash = "-----"
    max_header = 64  # generous bound on "[TYPE ]PRIVATE KEY"
    result = []
    pos = 0
    while True:
        begin = text.find(begin_marker, pos)
        if begin == -1:
            result.append(text[pos:])
            return "".join(result)
        header_start = begin + len(begin_marker)
        header_close = text.find(dash, header_start)
        if (
            header_close == -1
            or header_close - header_start > max_header
            or not text[header_start:header_close].endswith("PRIVATE KEY")
        ):
            result.append(text[pos:header_start])
            pos = header_start
            continue
        end = text.find(end_marker, header_close)
        if end == -1:
            result.append(text[pos:])
            return "".join(result)
        end_header_start = end + len(end_marker)
        end_close = text.find(dash, end_header_start)
        if (
            end_close == -1
            or end_close - end_header_start > max_header
            or not text[end_header_start:end_close].endswith("PRIVATE KEY")
        ):
            result.append(text[pos:header_start])
            pos = header_start
            continue
        result.append(text[pos:begin])
        result.append("[redacted private key]")
        pos = end_close + len(dash)
 def _redact_sensitive_text(value: object) -> str:
    """Redact obvious credential values before surfacing tool output."""
    if value is None:
        return ""
    text = str(value)
    text = _redact_private_keys(text)
    text = _redact_after_prefix(text, _AUTH_PREFIX_RE)
    text = _SENSITIVE_COOKIE_RE.sub(r"\1" + _REDACTED, text)
    text = _SENSITIVE_URL_CRED_RE.sub(r"\1" + _REDACTED + "@", text)
    text = _redact_after_prefix(text, _ENV_PREFIX_RE)
    text = _redact_after_prefix(text, _KEY_PREFIX_RE)
    return _SENSITIVE_BARE_TOKEN_RE.sub(_REDACTED, text)
 def _load_mcp_disabled_map() -> Dict[str, set]:
    """Load per-server disabled tool sets from the database."""
@@ -267,10 +428,6 @@ _DOMAIN_RULES = {
 - Use `resolve_contact` to look up a contact's email or phone number by name. Searches the CardDAV address book and sent email history.
 - Use `manage_contact` to list, add, update, or delete contacts in the address book.
 - Do NOT use `manage_memory` for contact lookups — contact details live in the address book, not memory.""",
    "integrations": """\
 ## Integration/API rules
 - To query or control a configured service integration (Home Assistant, Miniflux, Gitea, Linkding, Jellyfin, or any other registered service), use `api_call` with the integration name, HTTP method, path, and optional JSON body.
 - Do not use shell, curl, or `app_api` to reach a user's connected integration when `api_call` is available.""",
 }
 _DOMAIN_TOOL_MAP = {
@@ -281,10 +438,9 @@ _DOMAIN_TOOL_MAP = {
    "notes_calendar_tasks": {"manage_notes", "manage_calendar", "manage_tasks"},
    "ui": {"ui_control"},
    "sessions": {"create_session", "list_sessions", "manage_session", "send_to_session", "search_chats"},
-    "files": {"bash", "python", "read_file", "write_file", "edit_file", "grep", "glob", "ls", "get_workspace", "manage_bg_jobs"},
+    "files": {"bash", "python", "read_file", "write_file", "edit_file", "grep", "glob", "ls", "get_workspace"},
    "settings": {"manage_settings", "manage_endpoints", "manage_mcp", "manage_webhooks", "manage_tokens", "app_api"},
    "contacts": {"resolve_contact", "manage_contact"},
    "integrations": {"api_call"},
 }
 def _domain_rules_for_tools(tool_names: set) -> list[str]:
@@ -529,7 +685,7 @@ def get_builtin_overrides() -> dict:
        ov = get_setting("builtin_tool_overrides", {})
        return ov if isinstance(ov, dict) else {}
    except Exception as e:
-        logger.warning("Failed to load builtin tool overrides, using defaults", exc_info=e)
+        logger.warning('Failed to load builtin tool overrides: %s', e)
        return {}
@@ -541,44 +697,17 @@ def _section_text(name: str, default: str) -> str:
    return val if isinstance(val, str) and val.strip() else default
 def _compact_tool_line(name: str, section: str) -> str:
    """One-line fenced-tool usage hint for compact/local prompts."""
    text = (section or "").strip()
    if not text:
        return f"- `{name}`"
    if text.startswith("- "):
        return text
    lines = [ln.strip() for ln in text.splitlines() if ln.strip()]
    usage = []
    in_fence = False
    for ln in lines:
        if ln.startswith("```"):
            usage.append(ln)
            in_fence = not in_fence
            if len(usage) >= 3:
                break
            continue
        if in_fence and len(usage) < 3:
            usage.append(ln)
    if usage:
        return f"- `{name}` — " + " ".join(usage)
    return f"- `{name}` — " + lines[0][:160]
 def _assemble_prompt(tool_names: set, disabled_tools: set = None, compact: bool = False) -> str:
    """Build the system prompt with only the specified tools included."""
    disabled = disabled_tools or set()
    included = tool_names - disabled
    if compact:
-        tool_lines = []
+        tool_list = ", ".join(sorted(included)) if included else "none"
        for name, _default_section in TOOL_SECTIONS.items():
            if name in included:
                tool_lines.append(_compact_tool_line(name, _section_text(name, _default_section)))
        parts = [
-            _AGENT_PREAMBLE,
+            "You are an AI assistant with tool access.",
-            "## Available tools\n" + ("\n".join(tool_lines) if tool_lines else "none"),
+            f"Available tools: {tool_list}.",
-            _AGENT_RULES,
+            _API_AGENT_RULES,
        ]
        parts.extend(_domain_rules_for_tools(included))
        return "\n\n".join(parts)
@@ -644,6 +773,11 @@ _API_HOSTS = frozenset([
    "api.perplexity.ai", "api.x.ai",
    "ollama.com", "api.venice.ai", "api.kimi.com",
    "api.githubcopilot.com",
    # Local OpenAI-compatible endpoints (llama.cpp, vLLM, LM Studio, etc.).
    # Without these, `_is_api_model` falls back to keyword sniffing on the
    # model name, so well-behaved local servers don't get native tool
    # schemas and the agent silently degrades to fenced-block parsing.
    "localhost", "127.0.0.1", "host.docker.internal",
 ])
 _MCP_KEYWORDS = frozenset(["mcp", "browse", "browser", "website", "calendar", "event", "email",
                           "gmail", "screenshot", "navigate", "click", "miniflux", "rss", "feed"])
@@ -671,28 +805,6 @@ def _is_ollama_openai_compat_url(endpoint_url: str) -> bool:
    return parsed.port == 11434 and (path == "/v1" or path.startswith("/v1/"))
 def _is_local_openai_compat_url(endpoint_url: str) -> bool:
    try:
        parsed = urlparse(endpoint_url or "")
    except Exception:
        return False
    host = (parsed.hostname or "").lower()
    path = (parsed.path or "").rstrip("/")
    if not (path == "/v1" or path.startswith("/v1/")):
        return False
    if host in {"localhost", "127.0.0.1", "0.0.0.0", "host.docker.internal"}:
        return True
    if host.startswith("192.168.") or host.startswith("10."):
        return True
    if host.startswith("172."):
        try:
            second = int(host.split(".")[1])
            return 16 <= second <= 31
        except Exception:
            return False
    return False
 def _endpoint_lookup_keys(endpoint_url: str) -> List[str]:
    """Candidate ModelEndpoint.base_url keys for a runtime chat URL."""
    raw = (endpoint_url or "").strip()
@@ -756,17 +868,6 @@ def _extract_last_user_message(messages: List[Dict]) -> str:
 _LOW_SIGNAL_RE = re.compile(r"^[\W_]*$", re.UNICODE)
 _CASUAL_OPENING_RE = re.compile(
    r"^\s*(?:h+i+|hey+|hello+|yo+|sup+|what'?s up|wass?up|hiya|howdy|"
    r"lol|lmao|haha+|hehe+|thanks?|thank you|ty|idk|dunno|meh|bruh|bro)\b(?P<tail>.*)$",
    re.IGNORECASE,
 )
 _CASUAL_BLOCKLIST_RE = re.compile(
    r"\b(?:cookbook|serve|serving|launch|start|vllm|sglang|llama\.?cpp|ollama|"
    r"download|model|email|document|doc|note|calendar|task|search|web|research|"
    r"file|folder|repo|git|settings?|endpoint|api|token|mcp)\b",
    re.IGNORECASE,
 )
 _EXPLICIT_CONTINUATION_RE = re.compile(
    r"^\s*(?:"
    r"yes|y|yeah|yep|ok|okay|sure|do it|go ahead|continue|carry on|"
@@ -776,17 +877,6 @@ _EXPLICIT_CONTINUATION_RE = re.compile(
    r")\s*[.!?]*\s*$",
    re.IGNORECASE,
 )
 _RETRY_CONTINUATION_RE = re.compile(
    r"\b(?:try again|retry|again|rerun|re-run|run it again|launch it again|"
    r"start it again|failed|fails?|died|crashed|broke|insta|instantly)\b",
    re.IGNORECASE,
 )
 _COOKBOOK_CONTEXT_RE = re.compile(
    r"\b(?:cookbook|serve|serving|served|launch|start|preset|vllm|sglang|"
    r"llama\.?cpp|ollama|download|cached models?|model servers?|running models?|"
    r"gpu box|ajax|qwen|gemma|llama|mistral|minimax)\b",
    re.IGNORECASE,
 )
 def _is_explicit_continuation(text: str) -> bool:
@@ -794,37 +884,6 @@ def _is_explicit_continuation(text: str) -> bool:
    return bool(_EXPLICIT_CONTINUATION_RE.match(str(text or "").strip()))
 def _is_casual_low_signal(text: str) -> bool:
    """True for short greetings/slang that should not inherit stale context."""
    s = str(text or "").strip()
    m = _CASUAL_OPENING_RE.match(s)
    if not m:
        return False
    tail = m.group("tail") or ""
    if _CASUAL_BLOCKLIST_RE.search(tail):
        return False
    # Allow a short vocative/address after the opener without hardcoding the
    # address term itself: "hey man", "yo dude", "sup <name>". Longer tails are
    # more likely to be an actual request and should get normal context/tooling.
    tail_words = re.findall(r"[A-Za-z0-9_'-]+", tail)
    return len(tail_words) <= 2
 def _is_contextual_retry_continuation(messages: List[Dict], text: str) -> bool:
    """Treat "try again / it failed" as a continuation only for active tool work.
    These follow-ups are common after Cookbook launches: the latest user turn
    says only "try again it failed", while the actionable model/host/command
    details live one or two turns back. Keep this intentionally narrow so
    ordinary chat does not inherit stale Cookbook context.
    """
    latest = str(text or "").strip()
    if not latest or not _RETRY_CONTINUATION_RE.search(latest):
        return False
    recent = _recent_context_for_retrieval(messages, max_user=5, max_chars=1200)
    return bool(_COOKBOOK_CONTEXT_RE.search(recent))
 def _assistant_requested_followup(messages: List[Dict]) -> bool:
    """True when the previous assistant turn asked for missing task details.
@@ -866,12 +925,11 @@ def _classify_agent_request(messages: List[Dict], last_user: str) -> Dict[str, o
    which domain rule packs get appended to the system prompt.
    """
    text = str(last_user or "").strip()
-    retry_continuation = _is_contextual_retry_continuation(messages, text)
+    continuation = _is_explicit_continuation(text) or _assistant_requested_followup(messages)
    continuation = _is_explicit_continuation(text) or _assistant_requested_followup(messages) or retry_continuation
    retrieval_query = _recent_context_for_retrieval(messages) if continuation else text
    q = retrieval_query.lower()
-    if not text or bool(_LOW_SIGNAL_RE.match(text)) or _is_casual_low_signal(text):
+    if not text or bool(_LOW_SIGNAL_RE.match(text)):
        return {
            "low_signal": True,
            "continuation": False,
@@ -914,25 +972,10 @@ def _classify_agent_request(messages: List[Dict], last_user: str) -> Dict[str, o
        domains.add("sessions")
    if has(r"\b(file|folder|directory|repo|git|grep|find in files|read file|edit file|shell|terminal|bash|python)\b"):
        domains.add("files")
    # Managing detached bash jobs: "kill the background job", "stop the job",
    # "kill that job", "check the job output", "is the bg job done".
    if (has(r"\b(background|bg)\s+(jobs?|task)\b")
            or has(r"\b(kill|stop|cancel|terminate|check|tail|show|list)\b.{0,16}\bjobs?\b")
            or has(r"\bjobs?\b.{0,16}\b(output|status|done|finished|running)\b")):
        domains.add("files")
    if has(r"\b(endpoint|api token|mcp|webhook|preference|configure|config|setting)\b"):
        domains.add("settings")
    if has(r"\b(contact|contacts|phone|phone number|address book|vcard)\b"):
        domains.add("contacts")
    # API-integration intent — calling a configured service via the api_call
    # tool. Without this the #3794 repro ("Use the api_call tool to call Home
    # Assistant GET /api/states") matched no domain, classified as low-signal,
    # and the tool never reached the schema filter. Detect it explicitly so the
    # "integrations" domain seeds api_call deterministically (see
    # _DOMAIN_TOOL_MAP), independent of embedding retrieval.
    if has(r"\bapi[ _]call\b", r"\bintegrations?\b",
           r"\b(?:home ?assistant|miniflux|gitea|linkding|jellyfin)\b"):
        domains.add("integrations")
    low_signal = not continuation and not domains
    return {
@@ -961,11 +1004,8 @@ def _recent_context_for_retrieval(messages: List[Dict], max_user: int = 3, max_c
        if isinstance(content, list):
            content = " ".join(b.get("text", "") for b in content if isinstance(b, dict))
        content = (content or "").strip()
-        # Skip injected envelopes — role=user but not human intent. Tool results
+        # Skip injected tool-result envelopes — role=user but not human intent.
-        # are now wrapped via untrusted_context_message (metadata.trusted=False);
+        if not content or content.startswith("[Tool execution results]"):
        # keep the legacy "[Tool execution results]" prefix for older histories.
        meta = msg.get("metadata") or {}
        if not content or meta.get("trusted") is False or content.startswith("[Tool execution results]"):
            continue
        collected.append(content)
        if len(collected) >= max_user:
@@ -984,7 +1024,6 @@ def _build_system_prompt(
    compact: bool = False,
    owner: Optional[str] = None,
    suppress_local_context: bool = False,
    suppress_skills: bool = False,
    active_email: Optional[Dict[str, str]] = None,
 ) -> List[Dict]:
    """Build agent system prompt, inject MCP/document context, merge consecutive system msgs."""
@@ -1002,7 +1041,7 @@ def _build_system_prompt(
        _ov_sig = _hl.sha256(_json.dumps(get_builtin_overrides() or {}, sort_keys=True).encode()).hexdigest()
    except Exception:
        _ov_sig = ""
-    cache_key = (frozenset(disabled_tools or []), bool(mcp_mgr), needs_admin, _rt_key, compact, _ov_sig, owner, suppress_local_context, suppress_skills)
+    cache_key = (frozenset(disabled_tools or []), bool(mcp_mgr), needs_admin, _rt_key, compact, _ov_sig, owner, suppress_local_context)
    if _cached_base_prompt and _cached_base_prompt_key == cache_key and not active_document:
        agent_prompt = _cached_base_prompt
        # Skill index is user-editable (name + description), so it must never
@@ -1012,7 +1051,6 @@ def _build_system_prompt(
            disabled_tools, mcp_mgr, needs_admin, relevant_tools,
            mcp_disabled_map=mcp_disabled_map, compact=compact, owner=owner,
            suppress_local_context=suppress_local_context,
            suppress_skills=suppress_skills,
        )
    else:
        agent_prompt, _skill_index_block = _build_base_prompt(
@@ -1024,7 +1062,6 @@ def _build_system_prompt(
            compact=compact,
            owner=owner,
            suppress_local_context=suppress_local_context,
            suppress_skills=suppress_skills,
        )
        if not active_document:
            _cached_base_prompt = agent_prompt
@@ -1053,8 +1090,8 @@ def _build_system_prompt(
    try:
        from src.user_time import current_datetime_context_message
        _datetime_message = current_datetime_context_message()
-    except Exception as e:
+    except Exception:
-        logger.warning("Failed to build datetime context message", exc_info=e)
+        pass
    # Document context is kept as a SEPARATE message (not merged into the tool
    # prompt) so the context trimmer doesn't destroy it when truncating the
@@ -1097,8 +1134,8 @@ def _build_system_prompt(
            try:
                from src.pdf_form_doc import find_source_upload_id
                _is_form_backed = bool(find_source_upload_id(active_document.current_content or ""))
-            except Exception as e:
+            except Exception:
-                logger.warning("Failed to detect if document is form-backed, assuming plain", exc_info=e)
+                pass
            if _is_form_backed:
                doc_ctx = (
@@ -1308,7 +1345,7 @@ def _build_system_prompt(
    # few. If the teacher wrote a procedure for "open my X chat" last
    # time the student failed, this is where the student finds it
    # before deciding which tool to call.
-    if not suppress_local_context and not suppress_skills:
+    if not suppress_local_context:
        try:
            last_user = _extract_last_user_message(messages)
            # Respect the user's skills-enabled toggle (mirrors memory_enabled).
@@ -1475,7 +1512,6 @@ def _build_base_prompt(
    compact: bool = False,
    owner: Optional[str] = None,
    suppress_local_context: bool = False,
    suppress_skills: bool = False,
 ):
    """Build the agent prompt with only relevant tools included.
@@ -1528,7 +1564,7 @@ def _build_base_prompt(
    # The caller wraps it in untrusted_context_message and ships it as a
    # user-role message — same treatment as the matched-skills block.
    skill_index_block = ""
-    if not suppress_local_context and not suppress_skills:
+    if not suppress_local_context:
        try:
            from services.memory.skills import SkillsManager
            from src.constants import DATA_DIR
@@ -1687,14 +1723,8 @@ def _append_tool_results(
        if round_reasoning:
            msg["reasoning_content"] = round_reasoning
        messages.append(msg)
        # Tool output (shell/python stdout, file reads, fetched pages, email
        # bodies, MCP results) is sourced from outside the server. Wrap it as
        # untrusted data so prompt-injection inside a tool result is treated as
        # data, not instructions — same hardening as skills (#788) and the
        # web/RAG context. THREAT_MODEL.md lists tool output as a surface that
        # must go through untrusted_context_message.
        messages.append(
-            untrusted_context_message("tool execution results", tool_output_text)
+            {"role": "user", "content": f"[Tool execution results]\n\n{tool_output_text}"}
        )
@@ -1953,7 +1983,6 @@ async def stream_agent_loop(
    approved_plan: Optional[str] = None,
    tool_policy: Optional[ToolPolicy] = None,
    workspace: Optional[str] = None,
    forced_tools: Optional[Set[str]] = None,
    _is_teacher_run: bool = False,
 ) -> AsyncGenerator[str, None]:
    """Streaming agent loop generator.
@@ -1993,20 +2022,6 @@ async def stream_agent_loop(
    _needs_admin = _detect_admin_intent(messages)
    _last_user = _extract_last_user_message(messages)
    _intent = _classify_agent_request(messages, _last_user)
    _low_signal_turn = bool(_intent.get("low_signal"))
    _casual_low_signal_turn = _is_casual_low_signal(_last_user)
    _direct_low_signal = (
        _low_signal_turn
        and not bool(_intent.get("continuation"))
        and not plan_mode
        and not approved_plan
        and not guide_only
        and (_casual_low_signal_turn or active_document is None)
        and (_casual_low_signal_turn or not active_email)
        and (_casual_low_signal_turn or not workspace)
        and not forced_tools
        and not relevant_tools
    )
    # Tool retrieval uses the latest message by default. It may inherit recent
    # user turns only for explicit continuations ("yes", "do it", "1").
    _retrieval_query = str(_intent.get("retrieval_query") or _last_user)
@@ -2014,86 +2029,11 @@ async def stream_agent_loop(
        "[agent-intent] latest=%r continuation=%s low_signal=%s domains=%s retrieval_query=%r",
        _last_user[:120],
        bool(_intent.get("continuation")),
-        _low_signal_turn,
+        bool(_intent.get("low_signal")),
        sorted(_intent.get("domains") or []),
        _retrieval_query[:200],
    )
    _mcp_disabled_map = _load_mcp_disabled_map() if mcp_mgr else {}
    if _direct_low_signal:
        logger.info("[agent] direct low-signal reply path for latest=%r", _last_user[:80])
        direct_messages = [{"role": "user", "content": _last_user}]
        direct_response = ""
        direct_start = time.time()
        direct_actual_model = model
        real_input_tokens = 0
        real_output_tokens = 0
        try:
            async for chunk in stream_llm_with_fallback(
                [(endpoint_url, model, headers)] + list(fallbacks or []),
                direct_messages,
                temperature=temperature,
                max_tokens=min(max_tokens or 128, 128),
                prompt_type=None,
                tools=None,
                timeout=int(get_setting("agent_stream_timeout_seconds", 300) or 300),
                session_id=session_id,
            ):
                if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
                    try:
                        data = json.loads(chunk[6:])
                    except json.JSONDecodeError:
                        yield chunk
                        continue
                    if data.get("type") == "usage":
                        usage = data.get("data", {}) or {}
                        direct_actual_model = usage.get("model") or direct_actual_model
                        real_input_tokens += usage.get("input_tokens", 0) or 0
                        real_output_tokens += usage.get("output_tokens", 0) or 0
                        continue
                    if data.get("type") == "model_actual":
                        direct_actual_model = data.get("model") or direct_actual_model
                        data["requested_model"] = model
                        yield f"data: {json.dumps(data)}\n\n"
                        continue
                    if data.get("type") == "fallback":
                        direct_actual_model = data.get("answered_by") or direct_actual_model
                        yield chunk
                        continue
                    if "delta" in data:
                        if not data.get("thinking"):
                            direct_response += data.get("delta", "")
                        yield chunk
                        continue
                    yield chunk
                elif chunk.startswith("event: "):
                    yield chunk
        except Exception as _direct_err:
            logger.warning("[agent] direct low-signal path failed: %s", _direct_err)
            fallback = "Hey."
            direct_response += fallback
            yield f"data: {json.dumps({'delta': fallback})}\n\n"
        if not direct_response.strip():
            fallback = "Hey."
            direct_response = fallback
            yield f"data: {json.dumps({'delta': fallback})}\n\n"
        duration = time.time() - direct_start
        metrics = {
            "model": direct_actual_model,
            "requested_model": model,
            "input_tokens": real_input_tokens or estimate_tokens(direct_messages),
            "output_tokens": real_output_tokens or max(len(direct_response) // 4, 1),
            "total_time": round(duration, 2),
            "response_time": round(duration, 2),
            "agent_rounds": 0,
            "tool_calls": 0,
            "direct_low_signal": True,
        }
        yield f"data: {json.dumps({'type': 'metrics', 'data': metrics})}\n\n"
        yield "data: [DONE]\n\n"
        return
    if plan_mode and mcp_mgr:
        # Allow read-only MCP tools to investigate, block write/unknown ones:
        # hide them from the schemas AND reject them at runtime by qualified name.
@@ -2105,11 +2045,11 @@ async def stream_agent_loop(
    # RAG-based tool selection: retrieve relevant tools for this query.
    # If caller provided a pre-computed set (e.g. task_scheduler), use that.
-    _relevant_tools = relevant_tools
+    _relevant_tools = set() if guide_only else relevant_tools
    _t1 = time.time()
    if _relevant_tools:
        logger.info(f"[tool-rag] Using caller-provided relevant_tools ({len(_relevant_tools)} tools)")
-    if not guide_only and not _relevant_tools and _low_signal_turn:
+    if not guide_only and not _relevant_tools and bool(_intent.get("low_signal")):
        from src.tool_index import ALWAYS_AVAILABLE
        if workspace:
            # An active workspace IS the file-work signal: a vague "look at the
@@ -2200,15 +2140,6 @@ async def stream_agent_loop(
    if _relevant_tools is not None and active_document is not None:
        _relevant_tools.update({"edit_document", "update_document", "suggest_document"})
    # Per-request UI toggles are stronger than retrieval. If the user turns on
    # Search, the model must see the search tools even when the latest text is a
    # typo or otherwise low-signal for tool RAG.
    if not guide_only and forced_tools:
        if _relevant_tools is None:
            from src.tool_index import ALWAYS_AVAILABLE
            _relevant_tools = set(ALWAYS_AVAILABLE)
        _relevant_tools.update(t for t in forced_tools if t not in disabled_tools)
    # The skill index injected by _build_system_prompt tells the model to
    # call `manage_skills action=view`, and Jaccard-matched skills are pasted
    # into the prompt as procedures to follow — but neither path goes through
@@ -2216,7 +2147,7 @@ async def stream_agent_loop(
    # (grep, read_file, ...) that aren't in its schema list. Keep the schemas
    # in lockstep: manage_skills is callable whenever any skill is indexed,
    # and a matched skill's declared requires_toolsets ride along with it.
-    if not guide_only and _relevant_tools is not None and not _low_signal_turn:
+    if not guide_only and _relevant_tools is not None:
        try:
            from services.memory.skills import SkillsManager
            from src.constants import DATA_DIR
@@ -2281,7 +2212,7 @@ async def stream_agent_loop(
    _model_supports_tools = any(kw in _model_lc for kw in (
        "gpt-4", "gpt-5", "gpt-o", "claude", "gemini", "gemma",
        "qwen3", "qwen2.5", "mixtral", "mistral", "llama-3.1", "llama-3.2",
-        "llama-3.3", "llama-4", "llama3.1", "llama3.2", "llama3.3", "llama4",
+        "llama-3.3", "llama-4",
        # Local-served models that follow OpenAI-style function calling
        # via vLLM's `--enable-auto-tool-choice`. Belt-and-suspenders
        # with the per-endpoint flag above.
@@ -2323,15 +2254,13 @@ async def stream_agent_loop(
        _is_api_model = False
    else:
        _is_api_model = any(h in endpoint_url for h in _API_HOSTS) or _model_supports_tools
    _compact_agent_prompt = _is_api_model or _is_ollama_native or _ollama_openai_compat
    messages, mcp_schemas = _build_system_prompt(
        messages, model, active_document, mcp_mgr, disabled_tools,
        needs_admin=_needs_admin, relevant_tools=_relevant_tools,
        mcp_disabled_map=_mcp_disabled_map,
-        compact=_compact_agent_prompt,
+        compact=_is_api_model,
        owner=owner,
        suppress_local_context=guide_only,
        suppress_skills=_low_signal_turn,
        active_email=active_email,
    )
    if plan_mode and not guide_only:
@@ -2417,14 +2346,6 @@ async def stream_agent_loop(
    # Strip internal metadata keys before sending to the LLM API
    messages = [{k: v for k, v in msg.items() if k != "_protected"} for msg in messages]
    agent_prompt_tokens = estimate_tokens(messages)
    logger.info(
        "[agent-timing] prep_done model=%s prompt_tokens=%s context_length=%s prep=%s",
        model,
        agent_prompt_tokens,
        context_length,
        {k: round(v, 3) for k, v in prep_timings.items()},
    )
    yield f"data: {json.dumps({'type': 'agent_prep', 'data': {k: round(v, 3) for k, v in prep_timings.items()}})}\n\n"
    full_response = ""
@@ -2455,6 +2376,7 @@ async def stream_agent_loop(
    # signatures + consecutive no-text tool rounds to bail early.
    _recent_call_sigs = collections.deque(maxlen=6)
    _stuck_rounds = 0
    _MAX_STUCK_ROUNDS = 4  # consecutive no-progress rounds before loop-breaker bails
    # Frequency of each exact call signature (tool + args), for the runaway
    # backstop. Counting identical repeats — not distinct same-tool calls —
    # lets a legit batch (e.g. 18 calendar events at once) through.
@@ -2569,19 +2491,6 @@ async def stream_agent_loop(
        # complementary cap for the rare stream that trickles bytes forever and
        # so never trips the inactivity timeout. Generous — only catches runaway.
        _round_deadline = time.time() + max(agent_stream_timeout * 4, 1200)
        _round_start = time.time()
        _round_first_event_logged = False
        _round_first_token_logged = False
        logger.info(
            "[agent-timing] round_start round=%s model=%s endpoint=%s prompt_tokens=%s tools=%s native_tools=%s timeout=%s",
            round_num,
            model,
            endpoint_url,
            estimate_tokens(messages),
            len(_tool_names_sent),
            bool(all_tool_schemas),
            agent_stream_timeout,
        )
        async for chunk in stream_llm_with_fallback(
            _candidates,
            messages,
@@ -2592,30 +2501,11 @@ async def stream_agent_loop(
            timeout=agent_stream_timeout,
            session_id=session_id,
        ):
            if not _round_first_event_logged:
                _round_first_event_logged = True
                logger.info(
                    "[agent-timing] first_event round=%s elapsed=%.3fs kind=%s",
                    round_num,
                    time.time() - _round_start,
                    "error" if chunk.startswith("event: error") else "data",
                )
            if time.time() > _round_deadline:
-                logger.warning(
+                logger.warning(f"[agent] round {round_num} stream exceeded wall-clock deadline; cutting off")
                    "[agent-timing] round_deadline round=%s elapsed=%.3fs deadline_s=%s",
                    round_num,
                    time.time() - _round_start,
                    max(agent_stream_timeout * 4, 1200),
                )
                break
            # Forward error events from stream_llm to the frontend
            if chunk.startswith("event: error"):
                logger.warning(
                    "[agent-timing] stream_error round=%s elapsed=%.3fs chunk=%r",
                    round_num,
                    time.time() - _round_start,
                    chunk[:500],
                )
                yield chunk
                continue
            if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
@@ -2695,15 +2585,6 @@ async def stream_agent_loop(
                        if not first_token_received:
                            time_to_first_token = time.time() - total_start
                            first_token_received = True
                        if not _round_first_token_logged:
                            _round_first_token_logged = True
                            logger.info(
                                "[agent-timing] first_visible_token round=%s elapsed=%.3fs total_elapsed=%.3fs thinking=%s",
                                round_num,
                                time.time() - _round_start,
                                time.time() - total_start,
                                bool(data.get("thinking")),
                            )
                        # Keep reasoning deltas in a separate accumulator so
                        # we can echo them back via `reasoning_content` on the
                        # next request (DeepSeek requires this; harmless for
@@ -2773,21 +2654,7 @@ async def stream_agent_loop(
                yield chunk
            # Intercept [DONE] — don't forward until all rounds finish
-        logger.info(
+        tool_blocks, used_native = _resolve_tool_blocks(round_response, native_tool_calls, round_num, is_api_model=_is_api_model)
            "[agent-timing] round_stream_done round=%s elapsed=%.3fs text_chars=%s tool_calls=%s first_event=%s first_token=%s",
            round_num,
            time.time() - _round_start,
            len(round_response),
            len(native_tool_calls),
            _round_first_event_logged,
            _round_first_token_logged,
        )
        tool_blocks, used_native = _resolve_tool_blocks(
            round_response,
            native_tool_calls,
            round_num,
            is_api_model=(_is_api_model and not guide_only),
        )
        # Force-answer round: we told the model to STOP calling tools and
        # answer. If it ignored that and emitted a (possibly DSML) tool
@@ -2871,7 +2738,7 @@ async def stream_agent_loop(
        # model with no real native_tool_calls) must not be stripped from the
        # persisted text either — otherwise it streams once and then disappears
        # on reload (#3222 follow-up).
-        cleaned_round = strip_tool_blocks(round_response, skip_fenced=(_is_api_model and not used_native and not guide_only)).strip()
+        cleaned_round = strip_tool_blocks(round_response, skip_fenced=(_is_api_model and not used_native)).strip()
        round_texts.append(cleaned_round)
        if not tool_blocks:
@@ -2932,25 +2799,21 @@ async def stream_agent_loop(
            # promise: short response (<400 chars), no fenced code/answer,
            # and an action-intent phrase was matched. Long answers that
            # happen to contain "let me know" are not stalls.
-            _looks_like_promise = (
+            _promise_shape = (
                not guide_only
                and _intent_match is not None
                and len(_intent_text) < 400
                and "```" not in _intent_text
                and _intent_nudge_count < _MAX_INTENT_NUDGES
            )
            _looks_like_promise = _promise_shape and _intent_nudge_count < _MAX_INTENT_NUDGES
            if _looks_like_promise:
                _intent_nudge_count += 1
                _matched_phrase = _intent_match.group(0).strip()
-                logger.info(f"[agent] intent-without-action nudge #{_intent_nudge_count} on round {round_num}: {_matched_phrase!r}")
+                # Don't log the matched phrase — it's raw model text that may
-                _lower_phrase = _matched_phrase.lower()
+                # carry credentials. Structural metadata only.
-                _cookbook_log_hint = ""
+                logger.info(
-                if any(_word in _lower_phrase for _word in ("log", "logs", "output", "tail", "status")):
+                    "[agent] intent-without-action nudge #%d on round %d",
-                    _cookbook_log_hint = (
+                    _intent_nudge_count, round_num,
                        " If this is about a Cookbook/model serve, the concrete calls are: "
                        "`list_served_models` first, then `tail_serve_output` with the "
                        "session_id from the serve/list result. Never answer with "
                        "\"check logs\" when those tools are available."
                )
                messages.append({
                    "role": "system",
@@ -2960,7 +2823,6 @@ async def stream_agent_loop(
                        "see you announced the action but didn't run it, which "
                        "is the most frustrating thing you can do. "
                        "DO IT NOW: emit the actual function call this turn. "
                        f"{_cookbook_log_hint}"
                        "If you decided not to do it after all, say so plainly in "
                        "one sentence instead of restating the plan."
                    ),
@@ -2968,6 +2830,24 @@ async def stream_agent_loop(
                # Visible signal in the stream so the user knows we caught it.
                yield f'data: {json.dumps({"type": "agent_step", "round": round_num + 1})}\n\n'
                continue
            # The model keeps announcing actions it never takes and we've spent
            # every nudge — surface why the turn is ending instead of letting it
            # look like a clean completion.
            if _promise_shape and _intent_nudge_count >= _MAX_INTENT_NUDGES:
                _matched_phrase = _intent_match.group(0).strip()
                _matched_phrase_safe = _redact_sensitive_text(_matched_phrase)
                _in_message = (
                    f"Intent-nudge cap reached on round {round_num}: the model "
                    f"announced an action ({_matched_phrase_safe!r}) without a tool call "
                    f"after {_intent_nudge_count} nudge(s); ending the turn."
                )
                # Do not log the matched phrase, even redacted. It is raw model
                # text and may contain credentials; keep logs structural only.
                logger.warning(
                    "[agent] intent-nudge cap exhausted on round %d (%d/%d)",
                    round_num, _intent_nudge_count, _MAX_INTENT_NUDGES,
                )
                yield f'data: {json.dumps({"type": "intent_nudge_exhausted", "round": round_num, "nudges": _intent_nudge_count, "max_nudges": _MAX_INTENT_NUDGES, "message": _in_message})}\n\n'
            break  # no tools — done
        # ── Loop-breaker (Terminus-style stall detector) ──────────────
@@ -3000,10 +2880,23 @@ async def stream_agent_loop(
        # Distinct calls to one tool (a real batch) are legitimate work, so we
        # count identical call signatures, not raw per-tool-type totals.
        _runaway = _detect_runaway_call(_call_freq)
-        if _stuck_rounds >= 4 or _runaway:
+        if _stuck_rounds >= _MAX_STUCK_ROUNDS or _runaway:
            reason = (f"calling {_runaway} with identical arguments over and over" if _runaway
                      else "repeating the same tool calls without new progress")
-            logger.warning(f"[agent] loop-breaker tripped on round {round_num} ({reason}); sig={_sig[:80]!r}")
+            _lb_message = (
                f"Loop-breaker stopped the agent on round {round_num}: {reason}. "
                "Forced one tool-free round to converge on an answer or state what's blocked."
            )
            # Log structural metadata only — `_sig` is raw tool-call content
            # that may carry credentials.
            logger.warning(
                "[agent] loop-breaker tripped on round %d (%s); "
                "stuck_rounds=%d/%d runaway=%r",
                round_num, reason, _stuck_rounds, _MAX_STUCK_ROUNDS, _runaway,
            )
            # Surface the stop cause to the stream so the user (and journalctl)
            # can tell a guard fired, not a clean completion.
            yield f'data: {json.dumps({"type": "loop_breaker_triggered", "round": round_num, "reason": reason, "stuck_rounds": _stuck_rounds, "max_stuck_rounds": _MAX_STUCK_ROUNDS, "runaway": _runaway, "message": _lb_message})}\n\n'
            # The model has been executing tools, so its results are already
            # in context. Force ONE tool-free round to converge: write the
            # answer from what it has, or state plainly what's blocking it.
@@ -3082,6 +2975,10 @@ async def stream_agent_loop(
                cmd_display = block.content.split("\n")[0].strip()[:80]
            else:
                cmd_display = block.content.strip()
            # The display string is streamed (tool_start/tool_output) and persisted;
            # redact any secrets in it. block.content itself is left untouched so
            # tool execution still sees the real command.
            cmd_display = _redact_sensitive_text(cmd_display)
            if tool_policy and tool_policy.blocks(block.tool_type):
                desc = f"{block.tool_type}: BLOCKED"
@@ -3127,8 +3024,15 @@ async def stream_agent_loop(
                    evt = await _progress_q.get()
                    if evt is None:
                        break
                    # Redact secrets in the live tail before streaming — the
                    # final tool_output is redacted, so the progress tail must
                    # be too, or a secret could flash by mid-run. Copy so we
                    # don't mutate the tool's own event payload.
                    _evt = dict(evt)
                    if isinstance(_evt.get("tail"), str):
                        _evt["tail"] = _redact_sensitive_text(_evt["tail"])
                    yield (
-                        f'data: {json.dumps({"type": "tool_progress", "tool": block.tool_type, "round": round_num, **evt})}\n\n'
+                        f'data: {json.dumps({"type": "tool_progress", "tool": block.tool_type, "round": round_num, **_evt})}\n\n'
                    )
                desc, result = await _tool_task
@@ -3194,7 +3098,7 @@ async def stream_agent_loop(
                                result["results"] = _clean
                            elif "stdout" in result:
                                result["stdout"] = _clean
-                        except (json.JSONDecodeError, Exception):
+                        except Exception:
                            pass
            # Emit doc-specific event for document tools — the frontend
@@ -3263,29 +3167,29 @@ async def stream_agent_loop(
                # empty) stdout/stderr; fall back to the error so the "timed
                # out" reason reaches the UI instead of a blank result.
                raw = result["stdout"] or result["stderr"] or result.get("error", "")
-                output_text = _truncate(raw)
+                output_text = _truncate(_redact_sensitive_text(raw))
            elif "output" in result:
                # bash / python canonical result: {"output": ..., "exit_code": ...}
                raw = result["output"] or ""
-                output_text = _truncate(raw)
+                output_text = _truncate(_redact_sensitive_text(raw))
            elif "response" in result:
                # AI interaction tools (chat_with_model, send_to_session)
                label = result.get("model", result.get("session_name", "AI"))
-                output_text = _truncate(f"{label}: {result['response']}")
+                output_text = _truncate(_redact_sensitive_text(f"{label}: {result['response']}"))
            elif "content" in result:
-                output_text = _truncate(result["content"])
+                output_text = _truncate(_redact_sensitive_text(result["content"]))
            elif "results" in result:
-                output_text = _truncate(result["results"])
+                output_text = _truncate(_redact_sensitive_text(result["results"]))
            elif "session_id" in result and "name" in result:
                output_text = f"Session created: {result['name']} (id: {result['session_id']})"
            elif "success" in result:
                output_text = (
                    f"Written: {result.get('path', '')}"
                    if result["success"]
-                    else f"Error: {result.get('error', '')}"
+                    else f"Error: {_redact_sensitive_text(result.get('error', ''))}"
                )
            elif "error" in result:
-                output_text = _truncate(result["error"])
+                output_text = _truncate(_redact_sensitive_text(result["error"]))
            # Emit tool_output (include ui_event data if present)
            tool_output_data = {"type": "tool_output", "tool": block.tool_type, "command": cmd_display, "output": output_text, "exit_code": result.get("exit_code")}
@@ -174,20 +174,8 @@ async def subscribe(session_id: str) -> AsyncGenerator[str, None]:
            next_seq += 1
        if run.status != "running":
            return
        heartbeat_idx = 0
        while True:
-            try:
+            seq, ev = await q.get()
                seq, ev = await asyncio.wait_for(q.get(), timeout=10.0)
            except asyncio.TimeoutError:
                # Keep slow local models/proxies alive while they prefill before
                # the first token. SSE comments are ignored by the UI but reset
                # browser/proxy idle timers, which prevents "empty response"
                # disconnects on llama.cpp first-token latencies of 30s+.
                if run.status == "running":
                    heartbeat_idx += 1
                    yield f": heartbeat {heartbeat_idx}\n\n"
                    continue
                seq, ev = (None, None)
            if seq is None:            # end sentinel
                while next_seq < len(run.buffer):   # flush any tail the sentinel raced
                    yield run.buffer[next_seq]
@@ -22,9 +22,6 @@ from .subprocess_tools import BashTool, PythonTool
 from .web_tools import WebSearchTool, WebFetchTool
 from .filesystem_tools import ReadFileTool, WriteFileTool, EditFileTool, LsTool, GlobTool, GrepTool, GetWorkspaceTool
 from .document_tools import CreateDocumentTool, UpdateDocumentTool, EditDocumentTool, SuggestDocumentTool, ManageDocumentTool
 from .model_interaction_tools import ChatWithModelTool, AskTeacherTool, ListModelsTool
 from .bg_job_tools import ManageBgJobsTool
 from .session_tools import CreateSessionTool, ListSessionsTool, SendToSessionTool, ManageSessionTool
 TOOL_HANDLERS = {
    "bash": BashTool().execute,
@@ -43,14 +40,6 @@ TOOL_HANDLERS = {
    "suggest_document": SuggestDocumentTool().execute,
    "manage_documents": ManageDocumentTool().execute,
    "get_workspace": GetWorkspaceTool().execute,
    "chat_with_model": ChatWithModelTool().execute,
    "ask_teacher": AskTeacherTool().execute,
    "list_models": ListModelsTool().execute,
    "manage_bg_jobs": ManageBgJobsTool().execute,
    "create_session": CreateSessionTool().execute,
    "list_sessions": ListSessionsTool().execute,
    "send_to_session": SendToSessionTool().execute,
    "manage_session": ManageSessionTool().execute,
 }
 # ---------------------------------------------------------------------------
@@ -63,7 +52,7 @@ PYTHON_TIMEOUT = 30
 # Tool types that trigger execution
 TOOL_TAGS = {"bash", "python", "web_search", "web_fetch", "read_file", "write_file", "edit_file",
-             "grep", "glob", "ls", "get_workspace", "manage_bg_jobs",
+             "grep", "glob", "ls", "get_workspace",
             "create_document", "update_document", "edit_document",
             "search_chats",
             "chat_with_model", "create_session", "list_sessions",
@@ -1,98 +0,0 @@
 """Agent tool to inspect and control detached background `bash` jobs.
 `bash` blocks prefixed with a `#!bg` marker run detached via `src.bg_jobs`; the
 agent is auto-re-invoked with the output when they finish. This tool covers the
 gaps in that flow: list the jobs in the current chat, read a still-running job's
 output on demand, and kill a runaway job instead of waiting out its max-runtime.
 Registry tool (`TOOL_HANDLERS["manage_bg_jobs"]`). Jobs are scoped to the chat
 that launched them, so every action requires the caller's `session_id` and a job
 from another session is treated as not found.
 """
 import json
 import time
 from typing import Any, Dict, List
 _LIST_ACTIONS = {"list", "ls", "jobs"}
 _OUTPUT_ACTIONS = {"output", "get", "read", "tail", "status", "show"}
 _KILL_ACTIONS = {"kill", "stop", "cancel", "terminate"}
 def _age(rec: Dict[str, Any]) -> str:
    start = rec.get("started_at")
    if not start:
        return "?"
    secs = int(time.time() - start)
    if secs < 60:
        return f"{secs}s"
    if secs < 3600:
        return f"{secs // 60}m"
    return f"{secs // 3600}h{(secs % 3600) // 60}m"
 def _status_label(rec: Dict[str, Any]) -> str:
    status = rec.get("status", "?")
    if rec.get("killed"):
        return "killed"
    if rec.get("timed_out"):
        return "timed out"
    if rec.get("died"):
        return "died"
    if status in ("done", "failed"):
        return f"{status} (exit {rec.get('exit_code')})"
    return status
 def _row(rec: Dict[str, Any]) -> str:
    cmd = (rec.get("command") or "").strip().splitlines()[0][:80]
    return f"[{rec.get('id')}] {_status_label(rec)} | {_age(rec)} | {cmd}"
 class ManageBgJobsTool:
    async def execute(self, content: str, ctx: dict) -> dict:
        from src import bg_jobs
        session_id = ctx.get("session_id")
        raw = (content or "").strip()
        try:
            args = json.loads(raw) if raw else {}
        except (ValueError, TypeError):
            args = {}
        if not isinstance(args, dict):
            args = {}
        action = str(args.get("action", "list")).strip().lower()
        job_id = str(args.get("job_id") or args.get("id") or "").strip()
        if not session_id:
            return {"error": "manage_bg_jobs: no active chat session; background jobs are scoped to a chat.", "exit_code": 1}
        if action in _LIST_ACTIONS:
            jobs: List[Dict[str, Any]] = bg_jobs.list_for_session(session_id)
            if not jobs:
                return {"output": "No background jobs in this chat.", "exit_code": 0}
            jobs.sort(key=lambda r: r.get("started_at") or 0, reverse=True)
            lines = "\n".join(_row(r) for r in jobs)
            return {"output": f"{len(jobs)} background job(s):\n{lines}", "exit_code": 0}
        if action in _OUTPUT_ACTIONS or action in _KILL_ACTIONS:
            if not job_id:
                return {"error": f"manage_bg_jobs: action '{action}' requires a job_id (see action='list').", "exit_code": 1}
            rec = bg_jobs.get(job_id)
            # Scope: only the chat that launched a job may see or control it.
            if rec is None or rec.get("session_id") != session_id:
                return {"error": f"manage_bg_jobs: no background job '{job_id}' in this chat.", "exit_code": 1}
            if action in _KILL_ACTIONS:
                if rec.get("status") != "running":
                    return {"output": f"Job `{job_id}` already {_status_label(rec)}; nothing to kill.", "exit_code": 0}
                killed = bg_jobs.kill(job_id)
                return {"output": f"Killed background job `{job_id}` ({(killed or {}).get('command', '').splitlines()[0][:80]}).", "exit_code": 0}
            out = rec.get("output") or "(no output yet)"
            return {
                "output": f"Job `{job_id}` [{_status_label(rec)}, {_age(rec)}]\nCommand: {rec.get('command')}\n\nOutput:\n{out}",
                "exit_code": 0,
            }
        return {"error": f"manage_bg_jobs: unknown action '{action}'. Use list, output, or kill.", "exit_code": 1}
@@ -1,7 +1,6 @@
 import asyncio
 import json
 import os
 import re
 import difflib
 import fnmatch
 import shutil
@@ -17,31 +16,6 @@ _CODENAV_SKIP_DIRS = frozenset({
 _CODENAV_MAX_HITS = 200
 _CODENAV_MAX_LINE = 400
 def _glob_to_regex(pat: str) -> "re.Pattern":
    """Translate a forward-slash glob (**, *, ?) into a compiled regex.
    `**/` matches zero or more complete directories.
    `*` matches within a single path segment (does not cross /).
    """
    i, n, out = 0, len(pat), []
    while i < n:
        if pat[i : i + 3] == "**/":
            out.append("(?:[^/]+/)*")
            i += 3
        elif pat[i : i + 2] == "**":
            out.append(".*")
            i += 2
        elif pat[i] == "*":
            out.append("[^/]*")
            i += 1
        elif pat[i] == "?":
            out.append("[^/]")
            i += 1
        else:
            out.append(re.escape(pat[i]))
            i += 1
    return re.compile("".join(out))
 def _unified_diff(old: str, new: str, path: str) -> Optional[Dict[str, Any]]:
    if old == new:
        return None
@@ -285,38 +259,23 @@ class GlobTool:
            return {"error": f"glob: {e}", "exit_code": 1}
        def _glob():
-            base = os.path.abspath(root)
+            from pathlib import Path
-            if not os.path.isdir(base):
+            base = Path(root)
            if not base.is_dir():
                return None, f"glob: {root}: not a directory"
            norm_pat = pattern.replace("\\", "/")
            # Fast path: literal pattern (no wildcards) → direct path lookup.
            if not any(c in norm_pat for c in "*?["):
                cand = os.path.normpath(os.path.join(base, norm_pat))
                if os.path.exists(cand):
                    return [cand], None
                # Literal not at exact path — fall through to walk so
                # e.g. "foo.py" still matches at any depth (like rglob).
            # Compile glob to regex: * stays within one segment, **/ spans dirs.
            regex = _glob_to_regex(norm_pat)
            matched = []
            cap = _CODENAV_MAX_HITS * 5
            try:
-                for dp, dns, fns in os.walk(base):
+                for p in base.rglob(pattern):
-                    # Prune skipped dirs before descending (unlike rglob which
+                    if set(p.relative_to(base).parts) & _CODENAV_SKIP_DIRS:
-                    # descends first then filters — fatal on large node_modules).
+                        continue
                    dns[:] = [d for d in dns if d not in _CODENAV_SKIP_DIRS]
                    for name in fns + dns:
                        full = os.path.join(dp, name)
                        rel = os.path.relpath(full, base).replace(os.sep, "/")
                        if regex.fullmatch(rel) or regex.fullmatch(name):
                    try:
-                                mtime = os.stat(full).st_mtime
+                        mtime = p.stat().st_mtime
                    except OSError:
                        mtime = 0
-                            matched.append((mtime, full))
+                    matched.append((mtime, str(p)))
-                    if len(matched) > cap:
+                    if len(matched) > _CODENAV_MAX_HITS * 5:
                        break
-            except OSError as _e:
+            except (OSError, ValueError) as _e:
                return None, f"glob: {_e}"
            matched.sort(key=lambda t: t[0], reverse=True)
            return [pth for _, pth in matched[:_CODENAV_MAX_HITS]], None
@@ -1,208 +0,0 @@
 """model_interaction_tools.py - agent tools for talking to other models.
 Owns the model-interaction tool implementations (chat_with_model, ask_teacher,
 list_models) and their handler classes, registered in ``TOOL_HANDLERS``. Part
 of the tool -> registry migration (#3629): the implementations were moved here
 out of ``src.ai_interaction`` so dispatch flows through the registry instead of
 the elif chain / dispatch_ai_tool in tool_execution.py.
 Shared helpers that still live in ``src.ai_interaction`` and are used by tools
 not yet migrated (``_resolve_model``, ``AI_CHAT_TIMEOUT``) are imported lazily
 inside the functions to avoid an import cycle at module load.
 """
 import logging
 from typing import Dict, Optional
 logger = logging.getLogger(__name__)
 _TEACHER_SYSTEM_PROMPT = (
    "You are a senior AI mentor. A less capable model is stuck on a problem and asking for help. "
    "Provide clear, actionable guidance:\n"
    "1. Brief analysis of the problem\n"
    "2. Recommended approach (step by step)\n"
    "3. Key things to watch out for\n\n"
    "Be concise and practical. No preamble."
 )
 async def chat_with_model(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
    """Send a message to a specific model and return its response.
    Content format:
      Line 1: model_name (or model_name@endpoint_name)
      Line 2+: the message to send
    """
    from src.ai_interaction import _resolve_model, AI_CHAT_TIMEOUT
    from src.llm_core import llm_call_async
    lines = content.strip().split("\n", 1)
    if not lines or not lines[0].strip():
        return {"error": "First line must be the model name"}
    model_spec = lines[0].strip()
    message = lines[1].strip() if len(lines) > 1 else ""
    if not message:
        return {"error": "No message provided (line 2+ is the message)"}
    try:
        url, model, headers = _resolve_model(model_spec, owner=owner)
    except ValueError as e:
        return {"error": str(e)}
    try:
        response = await llm_call_async(
            url, model,
            [{"role": "user", "content": message}],
            headers=headers,
            timeout=AI_CHAT_TIMEOUT,
        )
        # Truncate very long responses
        if len(response) > 10000:
            response = response[:10000] + "\n... (truncated)"
        return {"model": model, "response": response}
    except Exception as e:
        logger.error(f"chat_with_model failed: {e}")
        return {"error": f"Failed to get response from {model_spec}: {e}"}
 async def ask_teacher(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
    """Ask a more capable model for help.
    Content format:
      Line 1: model_name (or 'auto')
      Line 2+: the problem description
    """
    from src.ai_interaction import _resolve_model, AI_CHAT_TIMEOUT
    from src.llm_core import llm_call_async
    from src.settings import get_setting
    lines = content.strip().split("\n", 1)
    model_spec = lines[0].strip() if lines else "auto"
    problem = lines[1].strip() if len(lines) > 1 else ""
    if not problem:
        return {"error": "No problem description provided"}
    if model_spec.lower() in ("auto", ""):
        model_spec = get_setting("teacher_model", "")
        if not model_spec:
            return {"error": "No teacher model configured. Specify a model name or set teacher_model in settings."}
    try:
        url, model, headers = _resolve_model(model_spec, owner=owner)
    except ValueError as e:
        return {"error": str(e)}
    try:
        response = await llm_call_async(
            url, model,
            [
                {"role": "system", "content": _TEACHER_SYSTEM_PROMPT},
                {"role": "user", "content": f"Problem:\n{problem}"},
            ],
            headers=headers,
            timeout=AI_CHAT_TIMEOUT,
        )
        if len(response) > 8000:
            response = response[:8000] + "\n... (truncated)"
        return {"model": model, "response": response, "teacher": True}
    except Exception as e:
        logger.error(f"ask_teacher failed: {e}")
        return {"error": f"Teacher call failed ({model_spec}): {e}"}
 async def list_models(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
    """List all available models across configured endpoints.
    Content = optional filter keyword.
    """
    import json
    import httpx
    from src.database import SessionLocal, ModelEndpoint
    from src.llm_core import _detect_provider, ANTHROPIC_MODELS
    from src.auth_helpers import owner_filter
    from src.endpoint_resolver import resolve_endpoint_runtime, build_headers, build_models_url
    keyword = content.strip().lower() if content.strip() else None
    db = SessionLocal()
    try:
        query = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True)
        if owner:
            query = owner_filter(query, ModelEndpoint, owner)
        endpoints = query.all()
        if not endpoints:
            return {"results": "No enabled model endpoints configured."}
        result_lines = []
        total_models = 0
        for ep in endpoints:
            try:
                base, api_key = resolve_endpoint_runtime(ep, owner=owner)
            except Exception:
                continue
            provider = _detect_provider(base)
            headers = build_headers(api_key, base)
            model_ids = []
            if provider == "anthropic":
                model_ids = list(ANTHROPIC_MODELS)
            else:
                try:
                    models_url = build_models_url(base)
                    if models_url:
                        r = httpx.get(models_url, headers=headers, timeout=5)
                        r.raise_for_status()
                        data = r.json()
                        model_ids = [m.get("id") for m in (data.get("data") or []) if m.get("id")]
                        if not model_ids:
                            model_ids = [
                                m.get("name") or m.get("model")
                                for m in (data.get("models") or [])
                                if m.get("name") or m.get("model")
                            ]
                    else:
                        model_ids = json.loads(ep.cached_models or "[]")
                except Exception:
                    model_ids = ["(endpoint offline)"]
            if keyword:
                model_ids = [m for m in model_ids if keyword in m.lower() or keyword in (ep.name or "").lower()]
            if model_ids:
                result_lines.append(f"\n**{ep.name or base}** ({provider}):")
                for mid in model_ids:
                    result_lines.append(f"  - `{mid}`")
                    total_models += 1
        if not result_lines:
            return {"results": "No models found" + (f" matching '{keyword}'" if keyword else "") + "."}
        header = f"Available models ({total_models} total):"
        return {"results": header + "\n".join(result_lines)}
    except Exception as e:
        logger.error(f"list_models failed: {e}")
        return {"error": str(e)}
    finally:
        db.close()
 # ---------------------------------------------------------------------------
 # Handler classes registered in TOOL_HANDLERS
 # ---------------------------------------------------------------------------
 class ChatWithModelTool:
    async def execute(self, content: str, ctx: dict) -> Dict:
        return await chat_with_model(content, ctx.get("session_id"), owner=ctx.get("owner"))
 class AskTeacherTool:
    async def execute(self, content: str, ctx: dict) -> Dict:
        return await ask_teacher(content, ctx.get("session_id"), owner=ctx.get("owner"))
 class ListModelsTool:
    async def execute(self, content: str, ctx: dict) -> Dict:
        return await list_models(content, ctx.get("session_id"), owner=ctx.get("owner"))
@@ -1,464 +0,0 @@
 """session_tools.py - agent tools for AI-to-AI session management.
 Owns create_session, list_sessions, send_to_session and manage_session, moved
 out of src.ai_interaction as part of the tool -> registry migration (#3629), and
 their handler classes registered in TOOL_HANDLERS.
 The session manager is a runtime-set singleton in src.ai_interaction, so each
 function fetches it via get_session_manager() (imported here); _resolve_model and
 AI_CHAT_TIMEOUT are reused from there too.
 """
 import json
 import logging
 import uuid
 from typing import Dict, Optional
 from src.ai_interaction import get_session_manager, _resolve_model, AI_CHAT_TIMEOUT
 logger = logging.getLogger(__name__)
 async def create_session(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
    """Create a new chat session.
    Content format:
      Line 1: session name
      Line 2: model_name (or model_name@endpoint_name)
    """
    _session_manager = get_session_manager()
    if not _session_manager:
        return {"error": "Session manager not available"}
    lines = content.strip().split("\n")
    if len(lines) < 2:
        return {"error": "Need 2 lines: session name, then model spec"}
    name = lines[0].strip()
    model_spec = lines[1].strip()
    if not name:
        return {"error": "Session name cannot be empty"}
    try:
        url, model, headers = _resolve_model(model_spec, owner=owner)
    except ValueError as e:
        return {"error": str(e)}
    sid = str(uuid.uuid4())[:8]
    try:
        _session_manager.create_session(
            session_id=sid,
            name=name,
            endpoint_url=url,
            model=model,
            rag=False,
            owner=owner,
        )
        # Store headers on session for future calls
        sess = _session_manager.get_session(sid)
        if sess and headers:
            sess.headers = headers
        try:
            from src.event_bus import fire_event
            fire_event("session_created", owner)
        except Exception:
            logger.debug("session_created event dispatch failed", exc_info=True)
        return {"session_id": sid, "name": name, "model": model, "endpoint_url": url}
    except Exception as e:
        logger.error(f"create_session failed: {e}")
        return {"error": f"Failed to create session: {e}"}
 async def list_sessions(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
    """List sessions sorted by most-recently-active first.
    Output includes a relative "last active" timestamp per row so the
    agent can answer "open my last chat" without guessing from titles.
    The most-recent session is always first in the list.
    Content = optional filter keyword (matches session name).
    """
    _session_manager = get_session_manager()
    if not _session_manager:
        return {"error": "Session manager not available"}
    keyword = content.strip().lower() if content.strip() else None
    try:
        from core.database import SessionLocal, Session as DbSession
        from datetime import datetime, timezone
        # Pull every session's last_accessed from the DB so we can sort
        # by recency. In-memory sessions hold name + model + msg_count;
        # the DB row holds the timestamps.
        db = SessionLocal()
        try:
            db_rows = {r.id: r for r in db.query(DbSession).all()}
        finally:
            db.close()
        # SECURITY: scope to the caller's sessions. Passing None returned
        # every user's sessions, which the agent tool then exposed via the
        # "list my chats" reply.
        sessions = _session_manager.get_sessions_for_user(owner)
        rows = []
        for sid, sess in sessions.items():
            if keyword and keyword not in (sess.name or "").lower():
                continue
            db_row = db_rows.get(sid)
            # Prefer last_accessed; fall back to updated_at, then created_at.
            ts = None
            if db_row:
                ts = getattr(db_row, 'last_accessed', None) or getattr(db_row, 'updated_at', None) or getattr(db_row, 'created_at', None)
            rows.append((ts, sid, sess))
        # Sort by timestamp DESC; rows without a timestamp sink to the bottom.
        rows.sort(key=lambda r: r[0] or datetime.min, reverse=True)
        def _rel(ts):
            if not ts:
                return 'never'
            now = datetime.utcnow()
            try:
                if ts.tzinfo is not None:
                    now = datetime.now(timezone.utc)
                diff = (now - ts).total_seconds()
            except Exception:
                return 'unknown'
            if diff < 60: return 'just now'
            if diff < 3600: return f'{int(diff / 60)}m ago'
            if diff < 86400: return f'{int(diff / 3600)}h ago'
            if diff < 86400 * 7: return f'{int(diff / 86400)}d ago'
            return ts.strftime('%Y-%m-%d')
        lines = []
        for i, (ts, sid, sess) in enumerate(rows):
            if i >= 50:
                lines.append(f"... and {len(rows) - 50} more (showing first 50)")
                break
            safe_name = (sess.name or "Untitled").replace("[", "\\[").replace("]", "\\]")
            msg_count = getattr(sess, "message_count", 0) or 0
            model = getattr(sess, "model", "unknown")
            marker = " ← most recent" if i == 0 else ""
            lines.append(f"- **[{safe_name}](#session-{sid})** (id: `{sid}`, model: {model}, {msg_count} msgs, last active {_rel(ts)}){marker}")
        if not lines:
            return {"results": "No sessions found" + (f" matching '{keyword}'" if keyword else "") + "."}
        return {
            "results": (
                f"Found {len(rows)} session(s), sorted most-recent first:\n"
                + "\n".join(lines)
                + "\n\nAssistant: when replying to the user, preserve the chat-title markdown links exactly as shown, e.g. `[Chat](#session-id)`. Do not rewrite this as a plain, non-clickable table."
            )
        }
    except Exception as e:
        logger.error(f"list_sessions failed: {e}")
        return {"error": str(e)}
 async def send_to_session(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
    """Send a message to an existing session and get a response.
    Content format:
      Line 1: session_id
      Line 2+: message
    """
    _session_manager = get_session_manager()
    from src.llm_core import llm_call_async
    from core.models import ChatMessage
    if not _session_manager:
        return {"error": "Session manager not available"}
    lines = content.strip().split("\n", 1)
    if len(lines) < 2:
        return {"error": "Need 2 lines: session_id, then message"}
    target_sid = lines[0].strip()
    message = lines[1].strip()
    sess = _session_manager.get_session(target_sid)
    if not sess:
        return {"error": f"Session '{target_sid}' not found"}
    # Owner-scope: reject access to another user's session
    if owner and getattr(sess, "owner", None) and sess.owner != owner:
        return {"error": f"Session '{target_sid}' not found"}
    if not message:
        return {"error": "No message provided"}
    try:
        # Build context from session history
        context = sess.get_context_messages()
        context.append({"role": "user", "content": message})
        response = await llm_call_async(
            sess.endpoint_url, sess.model, context,
            headers=sess.headers,
            timeout=AI_CHAT_TIMEOUT,
        )
        # Save both messages to session
        sess.add_message(ChatMessage("user", message))
        sess.add_message(ChatMessage("assistant", response))
        # Truncate for tool output
        if len(response) > 10000:
            response = response[:10000] + "\n... (truncated)"
        return {
            "session_id": target_sid,
            "session_name": sess.name,
            "response": response,
        }
    except Exception as e:
        logger.error(f"send_to_session failed: {e}")
        return {"error": f"Failed to send to session: {e}"}
 async def manage_session(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
    """Manage sessions: rename, archive, delete, important, truncate, fork.
    Content format:
      Line 1: action (rename|archive|unarchive|delete|important|unimportant|truncate|fork)
      Line 2: target session_id (or "current" to use the active session)
      Line 3+: action-specific params (e.g. new name for rename, keep_count for truncate)
    """
    _session_manager = get_session_manager()
    if not _session_manager:
        return {"error": "Session manager not available"}
    from src.database import SessionLocal, Session as DbSession
    # Accept BOTH the structured JSON args the tool schema advertises
    # ({action, session_id, value}) AND the legacy line-based format
    # (line1=action, line2=session_id, line3=value). Native function-calling
    # models send JSON; fenced-block callers send lines. Previously only the
    # line format was parsed, so a model that followed the schema (JSON) got
    # "Need at least 2 lines" / "Rename needs line 3" and couldn't drive it.
    _raw = (content or "").strip()
    action = ""
    target_sid = ""
    value = None      # the action param: new name (rename) / keep_count (truncate, fork)
    _list_filter = ""
    _parsed = None
    if _raw.startswith("{"):
        try:
            _parsed = json.loads(_raw)
        except Exception:
            _parsed = None
    if isinstance(_parsed, dict):
        action = str(_parsed.get("action") or "").strip().lower()
        target_sid = str(_parsed.get("session_id") or _parsed.get("session") or _parsed.get("id") or "").strip()
        _v = _parsed.get("value")
        if _v is None:
            _v = (_parsed.get("name") or _parsed.get("new_name")
                  or _parsed.get("title") or _parsed.get("keep_count"))
        value = None if _v is None else str(_v).strip()
        _list_filter = str(_parsed.get("filter") or "").strip()
    else:
        lines = _raw.split("\n")
        if not lines or not lines[0].strip():
            return {"error": "Missing action (rename|archive|delete|important|truncate|fork|list|switch)"}
        action = lines[0].strip().lower()
        target_sid = lines[1].strip() if len(lines) >= 2 else ""
        value = lines[2].strip() if len(lines) >= 3 else None
        _list_filter = "\n".join(lines[1:]).strip()
    if not action:
        return {"error": "Missing action (rename|archive|delete|important|truncate|fork|list|switch)"}
    # `list` alias - dispatch to list_sessions so the agent's natural
    # first guess (every other manage_* tool has a `list` action) works.
    if action == "list":
        return await list_sessions(_list_filter, session_id, owner=owner)
    if not target_sid:
        return {"error": "Need a session_id (or 'current' for the active chat)"}
    # Allow "current" to refer to the active session
    if target_sid.lower() == "current" and session_id:
        target_sid = session_id
    # `switch` / `open` / `select` / `view` - the agent reaches for
    # these when the user asks to "open" or "switch to" a session.
    # There's no server-side way to make the browser navigate, so we
    # just return a clickable anchor link the user can click. The
    # frontend's chat-history click delegate routes `#session-<id>`
    # to selectSession(). The agent's reply naturally embeds this
    # result so the user sees a single clickable line.
    def _session_query(db):
        query = db.query(DbSession).filter(DbSession.id == target_sid)
        if owner is not None:
            query = query.filter(DbSession.owner == owner)
        return query
    if action in ("switch", "open", "select", "view"):
        db = SessionLocal()
        try:
            db_sess = _session_query(db).first()
            if not db_sess:
                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
            name = db_sess.name or target_sid
        finally:
            db.close()
        return {
            "action": action,
            "session_id": target_sid,
            "name": name,
            "results": f"[{name}](#session-{target_sid}) - click to open.",
        }
    db = SessionLocal()
    try:
        if action == "rename":
            if not value:
                return {"error": "rename needs a new name (the `value` arg, or line 3 in the legacy format)"}
            new_name = value
            db_sess = _session_query(db).first()
            if not db_sess:
                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
            db_sess.name = new_name
            db.commit()
            _session_manager.update_session_name(target_sid, new_name)
            return {"action": "rename", "session_id": target_sid, "name": new_name,
                    "results": f"Session renamed to '{new_name}'"}
        elif action == "archive":
            db_sess = _session_query(db).first()
            if not db_sess:
                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
            db_sess.archived = True
            db.commit()
            return {"action": "archive", "session_id": target_sid,
                    "results": f"Session '{db_sess.name}' archived"}
        elif action == "unarchive":
            db_sess = _session_query(db).first()
            if not db_sess:
                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
            db_sess.archived = False
            db.commit()
            return {"action": "unarchive", "session_id": target_sid,
                    "results": f"Session '{db_sess.name}' unarchived"}
        elif action == "delete":
            if target_sid == session_id:
                return {"error": "Cannot delete the current session while chatting in it. Delete other sessions first."}
            db_sess = _session_query(db).first()
            if not db_sess:
                return {"error": f"Session '{target_sid}' not found. Refusing to delete an unknown chat id; use the exact id from list_sessions."}
            if db_sess and db_sess.is_important:
                return {"error": f"Session '{db_sess.name}' is starred/favorited. Unstar it first before deleting."}
            try:
                ok = _session_manager.delete_session(target_sid)
                if not ok:
                    return {"error": f"Session '{target_sid}' was not deleted because it no longer exists."}
                return {"action": "delete", "session_id": target_sid,
                        "results": f"Session '{db_sess.name or target_sid}' deleted"}
            except Exception as e:
                return {"error": f"Failed to delete session: {e}"}
        elif action in ("important", "unimportant"):
            is_important = action == "important"
            db_sess = _session_query(db).first()
            if not db_sess:
                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
            # Prevent AI from unstarring sessions - only the user can do that manually
            if not is_important and db_sess.is_important:
                return {"error": f"Session '{db_sess.name}' is starred by the user. Only the user can unstar sessions manually."}
            db_sess.is_important = is_important
            db.commit()
            status = "marked as important" if is_important else "unmarked as important"
            return {"action": action, "session_id": target_sid,
                    "results": f"Session '{db_sess.name}' {status}"}
        elif action == "truncate":
            db_sess = _session_query(db).first()
            if not db_sess:
                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
            keep_count = 10
            if value:
                try:
                    keep_count = int(value)
                except ValueError:
                    pass
            success = _session_manager.truncate_messages(target_sid, keep_count)
            if success:
                return {"action": "truncate", "session_id": target_sid,
                        "results": f"Session truncated to last {keep_count} messages"}
            return {"error": f"Failed to truncate session '{target_sid}'"}
        elif action == "fork":
            db_sess = _session_query(db).first()
            if not db_sess:
                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
            keep_count = 0  # 0 = all messages
            if value:
                try:
                    keep_count = int(value)
                except ValueError:
                    pass
            source = _session_manager.get_session(target_sid)
            if not source:
                return {"error": f"Session '{target_sid}' not found"}
            new_sid = str(uuid.uuid4())[:8]
            _session_manager.create_session(
                session_id=new_sid,
                name=f"Fork: {source.name}",
                endpoint_url=source.endpoint_url,
                model=source.model,
                rag=False,
                owner=owner,
            )
            # Copy messages
            history = source.get_context_messages()
            if keep_count > 0:
                history = history[:keep_count]
            from core.models import ChatMessage as InMemoryMsg
            new_sess = _session_manager.get_session(new_sid)
            for msg in history:
                new_sess.add_message(InMemoryMsg(msg["role"], msg["content"]))
            try:
                from src.event_bus import fire_event
                fire_event("session_created", owner)
            except Exception:
                logger.debug("session_created event dispatch failed", exc_info=True)
            return {"action": "fork", "session_id": new_sid,
                    "source_session": target_sid, "messages_copied": len(history),
                    "results": f"Forked session '{source.name}' -> new session {new_sid} ({len(history)} messages)"}
        else:
            return {"error": f"Unknown action '{action}'. Use: list, switch, rename, archive, unarchive, delete, important, unimportant, truncate, fork"}
    except Exception as e:
        logger.error(f"manage_session failed: {e}")
        return {"error": str(e)}
    finally:
        db.close()
 # ---------------------------------------------------------------------------
 # Handler classes registered in TOOL_HANDLERS
 # ---------------------------------------------------------------------------
 class CreateSessionTool:
    async def execute(self, content: str, ctx: dict) -> Dict:
        return await create_session(content, ctx.get("session_id"), owner=ctx.get("owner"))
 class ListSessionsTool:
    async def execute(self, content: str, ctx: dict) -> Dict:
        return await list_sessions(content, ctx.get("session_id"), owner=ctx.get("owner"))
 class SendToSessionTool:
    async def execute(self, content: str, ctx: dict) -> Dict:
        return await send_to_session(content, ctx.get("session_id"), owner=ctx.get("owner"))
 class ManageSessionTool:
    async def execute(self, content: str, ctx: dict) -> Dict:
        return await manage_session(content, ctx.get("session_id"), owner=ctx.get("owner"))
@@ -7,7 +7,6 @@ from src.constants import MAX_OUTPUT_CHARS
 class WebSearchTool:
    async def execute(self, content: str, ctx: dict) -> dict:
        from src.search import comprehensive_web_search
        progress_cb = ctx.get("progress_cb") if isinstance(ctx, dict) else None
        raw = content.strip()
        query = raw
        time_filter = None
@@ -38,12 +37,6 @@ class WebSearchTool:
            elif " news" in q_lc or q_lc.startswith("news ") or q_lc.endswith(" news"):
                time_filter = "week"
        loop = asyncio.get_running_loop()
        if progress_cb:
            await progress_cb({
                "elapsed_s": 0,
                "tail": f"Searching web for: {query[:160]}",
            })
        try:
        text, sources = await asyncio.wait_for(
            loop.run_in_executor(
                None,
@@ -56,21 +49,6 @@ class WebSearchTool:
            ),
            timeout=30,
        )
        except asyncio.TimeoutError:
            return {
                "error": f"web_search timed out after 30s: {query[:200]}",
                "exit_code": 1,
            }
        except Exception as e:
            return {
                "error": f"web_search failed: {type(e).__name__}: {str(e) or 'no details'}",
                "exit_code": 1,
            }
        if progress_cb:
            await progress_cb({
                "elapsed_s": 30,
                "tail": "Search completed; preparing sources.",
            })
        output = text[:MAX_OUTPUT_CHARS] if len(text) > MAX_OUTPUT_CHARS else text
        if sources:
            output += "\n\n<!-- SOURCES:" + json.dumps(sources) + " -->"
@@ -79,23 +57,13 @@ class WebSearchTool:
 class WebFetchTool:
    async def execute(self, content: str, ctx: dict) -> dict:
        from src.search.content import fetch_webpage_content
        from src.constants import WEB_FETCH_HARD_MAX_BYTES
        raw = content.strip()
        url = ""
        max_bytes = None
        if raw.startswith("{"):
            try:
                parsed = json.loads(raw)
                if isinstance(parsed, dict):
                    url = str(parsed.get("url") or "").strip()
                    # Download-budget override (#3812): "full": true raises the
                    # budget to the hard cap; an explicit max_bytes is clamped
                    # to the hard cap downstream. Default stays the soft cap.
                    if parsed.get("full") is True:
                        max_bytes = WEB_FETCH_HARD_MAX_BYTES
                    mb = parsed.get("max_bytes")
                    if isinstance(mb, int) and mb > 0:
                        max_bytes = mb
            except json.JSONDecodeError:
                url = ""
        if not url:
@@ -110,7 +78,7 @@ class WebFetchTool:
        loop = asyncio.get_running_loop()
        try:
            result = await asyncio.wait_for(
-                loop.run_in_executor(None, lambda: fetch_webpage_content(url, timeout=10, max_bytes=max_bytes)),
+                loop.run_in_executor(None, lambda: fetch_webpage_content(url, timeout=10)),
                timeout=30,
            )
        except asyncio.TimeoutError:
@@ -126,28 +94,8 @@ class WebFetchTool:
                return {"error": f"web_fetch: {url}: {err}", "exit_code": 1}
            return {"error": f"web_fetch: {url}: no readable text content (not HTML, or the page needs JS/login)", "exit_code": 1}
        # Tell the model when the download budget cut the body short and how
        # to get the rest, instead of silently presenting a partial page as
        # the whole thing.
        size_note = ""
        if result.get("truncated"):
            fetched = result.get("fetched_bytes") or 0
            total = result.get("total_bytes")
            total_txt = f" of {total:,} bytes" if total else ""
            size_note = (
                f"[partial content: download stopped at {fetched:,} bytes{total_txt}. "
                f'Re-call with {{"url": "{url}", "full": true}} to fetch up to '
                f"{WEB_FETCH_HARD_MAX_BYTES:,} bytes.]\n\n"
            )
        # The notice must lead the output so the MAX_OUTPUT_CHARS trim below can
        # never drop it. The title is untrusted, uncapped page content, so a
        # giant title ahead of the notice could push it out of range; keep the
        # notice first and cap the title as a second guard.
        if len(title) > 300:
            title = title[:300] + "..."
        header = (f"# {title}\n" if title else "") + f"Source: {url}\n\n"
-        output = size_note + header + text
+        output = header + text
        if len(output) > MAX_OUTPUT_CHARS:
            output = output[:MAX_OUTPUT_CHARS] + "\n\n[...truncated]"
        return {"output": output, "exit_code": 0}
@@ -1,14 +1,8 @@
 """
 ai_interaction.py
-AI-to-AI interaction tools: pipeline and manage_memory, plus shared model
+AI-to-AI interaction tools: chat_with_model, create_session, list_sessions,
-resolution (_resolve_model), the session-manager singleton, and dispatch_ai_tool.
+send_to_session, pipeline.
 As part of the tool -> registry migration (#3629), chat_with_model, ask_teacher
 and list_models moved to src/agent_tools/model_interaction_tools.py, and
 create_session, list_sessions, send_to_session and manage_session moved to
 src/agent_tools/session_tools.py. Those modules reuse get_session_manager /
 _resolve_model / AI_CHAT_TIMEOUT from here.
 These are agent tools — the LLM writes fenced code blocks and they execute
 through the standard agent_tools.py pipeline.
@@ -165,6 +159,440 @@ def _resolve_model(spec: str, owner: Optional[str] = None) -> Tuple[str, str, Di
 # Tool implementations
 # ---------------------------------------------------------------------------
 async def do_chat_with_model(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
    """Send a message to a specific model and return its response.
    Content format:
      Line 1: model_name (or model_name@endpoint_name)
      Line 2+: the message to send
    """
    from src.llm_core import llm_call_async
    lines = content.strip().split("\n", 1)
    if not lines or not lines[0].strip():
        return {"error": "First line must be the model name"}
    model_spec = lines[0].strip()
    message = lines[1].strip() if len(lines) > 1 else ""
    if not message:
        return {"error": "No message provided (line 2+ is the message)"}
    try:
        url, model, headers = _resolve_model(model_spec, owner=owner)
    except ValueError as e:
        return {"error": str(e)}
    try:
        response = await llm_call_async(
            url, model,
            [{"role": "user", "content": message}],
            headers=headers,
            timeout=AI_CHAT_TIMEOUT,
        )
        # Truncate very long responses
        if len(response) > 10000:
            response = response[:10000] + "\n... (truncated)"
        return {"model": model, "response": response}
    except Exception as e:
        logger.error(f"chat_with_model failed: {e}")
        return {"error": f"Failed to get response from {model_spec}: {e}"}
 _TEACHER_SYSTEM_PROMPT = (
    "You are a senior AI mentor. A less capable model is stuck on a problem and asking for help. "
    "Provide clear, actionable guidance:\n"
    "1. Brief analysis of the problem\n"
    "2. Recommended approach (step by step)\n"
    "3. Key things to watch out for\n\n"
    "Be concise and practical. No preamble."
 )
 async def do_ask_teacher(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
    """Ask a more capable model for help.
    Content format:
      Line 1: model_name (or 'auto')
      Line 2+: the problem description
    """
    from src.llm_core import llm_call_async
    from src.settings import get_setting
    lines = content.strip().split("\n", 1)
    model_spec = lines[0].strip() if lines else "auto"
    problem = lines[1].strip() if len(lines) > 1 else ""
    if not problem:
        return {"error": "No problem description provided"}
    if model_spec.lower() in ("auto", ""):
        model_spec = get_setting("teacher_model", "")
        if not model_spec:
            return {"error": "No teacher model configured. Specify a model name or set teacher_model in settings."}
    try:
        url, model, headers = _resolve_model(model_spec, owner=owner)
    except ValueError as e:
        return {"error": str(e)}
    try:
        response = await llm_call_async(
            url, model,
            [
                {"role": "system", "content": _TEACHER_SYSTEM_PROMPT},
                {"role": "user", "content": f"Problem:\n{problem}"},
            ],
            headers=headers,
            timeout=AI_CHAT_TIMEOUT,
        )
        if len(response) > 8000:
            response = response[:8000] + "\n... (truncated)"
        return {"model": model, "response": response, "teacher": True}
    except Exception as e:
        logger.error(f"ask_teacher failed: {e}")
        return {"error": f"Teacher call failed ({model_spec}): {e}"}
 async def do_second_opinion(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
    """Get a second opinion from another model, then have the original model
    evaluate the feedback and produce a unified version.
    Content format:
      Line 1: model_name (or model_name@endpoint_name)
      Line 2+ (optional): specific question or focus area
    Flow:
      1. Pull recent conversation context
      2. Send to reviewer model → get honest feedback
      3. Send feedback back to the session's own model → evaluate & unify
      4. Return both the review and the unified response
    """
    from src.llm_core import llm_call_async
    lines = content.strip().split("\n", 1)
    if not lines or not lines[0].strip():
        return {"error": "First line must be the model name"}
    model_spec = lines[0].strip()
    focus = lines[1].strip() if len(lines) > 1 else ""
    try:
        reviewer_url, reviewer_model, reviewer_headers = _resolve_model(model_spec, owner=owner)
    except ValueError as e:
        return {"error": str(e)}
    # Pull recent conversation context from current session
    context_text = ""
    sess = None
    if session_id and _session_manager:
        sess = _session_manager.get_session(session_id)
        if sess:
            messages = sess.get_context_messages()
            recent = messages[-15:] if len(messages) > 15 else messages
            parts = []
            for m in recent:
                role = m.get("role", "unknown").upper()
                text = m.get("content", "")
                if isinstance(text, list):
                    text = " ".join(
                        p.get("text", "") for p in text if isinstance(p, dict)
                    )
                if text:
                    parts.append(f"[{role}]: {text[:2000]}")
            context_text = "\n\n".join(parts)
    if not context_text:
        return {"error": "No conversation context found to review"}
    # ── Step 1: Get the reviewer's feedback ──
    reviewer_system = (
        "You are giving a second opinion on a conversation between a user and an AI assistant. "
        "Your job is to be genuinely helpful and honest — not a yes-man, but not a contrarian either.\n\n"
        "Guidelines:\n"
        "- If the plan/idea is solid, say so clearly. Don't manufacture problems that aren't there.\n"
        "- If you spot a real flaw, blind spot, or simpler approach — call it out directly.\n"
        "- Be practical. Don't over-engineer or over-analyze. Real-world tradeoffs matter.\n"
        "- If there's a meaningfully better way to do something, suggest it concretely.\n"
        "- Give credit where it's due — highlight what's working well.\n"
        "- Keep it concise and actionable. No fluff.\n"
        "- You're a second pair of eyes, not a professor grading a paper."
    )
    reviewer_message = f"Here's the conversation so far:\n\n{context_text}"
    if focus:
        reviewer_message += f"\n\n---\nSpecifically, I want your take on: {focus}"
    else:
        reviewer_message += "\n\n---\nGive me your honest second opinion on what's being discussed."
    try:
        review = await llm_call_async(
            reviewer_url, reviewer_model,
            [
                {"role": "system", "content": reviewer_system},
                {"role": "user", "content": reviewer_message},
            ],
            headers=reviewer_headers,
            timeout=AI_CHAT_TIMEOUT,
        )
        if len(review) > 8000:
            review = review[:8000] + "\n... (truncated)"
    except Exception as e:
        logger.error(f"second_opinion reviewer call failed: {e}")
        return {"error": f"Failed to get second opinion from {model_spec}: {e}"}
    # ── Step 2: Send review back to session's own model for evaluation ──
    unified = ""
    original_model = "unknown"
    if sess:
        original_url = sess.endpoint_url
        original_model = sess.model
        original_headers = getattr(sess, "headers", None) or {}
        unify_system = (
            "Another AI model just reviewed the conversation you've been having with the user. "
            "Read their feedback carefully, then respond with:\n\n"
            "1. **What you agree with** — acknowledge valid points honestly.\n"
            "2. **What you disagree with** — explain why, briefly.\n"
            "3. **Unified version** — produce an updated/refined version of whatever was being discussed, "
            "incorporating the feedback you found valid. Don't accept every note blindly — "
            "use your judgment on what actually improves things vs what's unnecessary.\n\n"
            "Be concise and practical. The user wants a better result, not a meta-discussion."
        )
        unify_message = (
            f"Here's the conversation context:\n\n{context_text}\n\n"
            f"---\n\n"
            f"**Review from {reviewer_model}:**\n\n{review}\n\n"
            f"---\n\n"
            f"Evaluate this feedback and produce a unified improved version."
        )
        try:
            unified = await llm_call_async(
                original_url, original_model,
                [
                    {"role": "system", "content": unify_system},
                    {"role": "user", "content": unify_message},
                ],
                headers=original_headers,
                timeout=AI_CHAT_TIMEOUT,
            )
            if len(unified) > 10000:
                unified = unified[:10000] + "\n... (truncated)"
        except Exception as e:
            logger.error(f"second_opinion unify call failed: {e}")
            unified = f"(Failed to get unified response: {e})"
    # Build combined result
    combined = (
        f"## Second Opinion from {reviewer_model}\n\n{review}"
        f"\n\n---\n\n"
        f"## {original_model}'s Response\n\n{unified}"
    )
    return {
        "model": reviewer_model,
        "response": combined,
        "instruction": "Present these results to the user exactly as they are. Do NOT call second_opinion again. The user can continue the conversation from here.",
    }
 async def do_create_session(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
    """Create a new chat session.
    Content format:
      Line 1: session name
      Line 2: model_name (or model_name@endpoint_name)
    """
    if not _session_manager:
        return {"error": "Session manager not available"}
    lines = content.strip().split("\n")
    if len(lines) < 2:
        return {"error": "Need 2 lines: session name, then model spec"}
    name = lines[0].strip()
    model_spec = lines[1].strip()
    if not name:
        return {"error": "Session name cannot be empty"}
    try:
        url, model, headers = _resolve_model(model_spec, owner=owner)
    except ValueError as e:
        return {"error": str(e)}
    sid = str(uuid.uuid4())[:8]
    try:
        _session_manager.create_session(
            session_id=sid,
            name=name,
            endpoint_url=url,
            model=model,
            rag=False,
            owner=owner,
        )
        # Store headers on session for future calls
        sess = _session_manager.get_session(sid)
        if sess and headers:
            sess.headers = headers
        try:
            from src.event_bus import fire_event
            fire_event("session_created", owner)
        except Exception:
            logger.debug("session_created event dispatch failed", exc_info=True)
        return {"session_id": sid, "name": name, "model": model, "endpoint_url": url}
    except Exception as e:
        logger.error(f"create_session failed: {e}")
        return {"error": f"Failed to create session: {e}"}
 async def do_list_sessions(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
    """List sessions sorted by most-recently-active first.
    Output includes a relative "last active" timestamp per row so the
    agent can answer "open my last chat" without guessing from titles.
    The most-recent session is always first in the list.
    Content = optional filter keyword (matches session name).
    """
    if not _session_manager:
        return {"error": "Session manager not available"}
    keyword = content.strip().lower() if content.strip() else None
    try:
        from core.database import SessionLocal, Session as DbSession
        from datetime import datetime, timezone
        # Pull every session's last_accessed from the DB so we can sort
        # by recency. In-memory sessions hold name + model + msg_count;
        # the DB row holds the timestamps.
        db = SessionLocal()
        try:
            db_rows = {r.id: r for r in db.query(DbSession).all()}
        finally:
            db.close()
        # SECURITY: scope to the caller's sessions. Passing None returned
        # every user's sessions, which the agent tool then exposed via the
        # "list my chats" reply.
        sessions = _session_manager.get_sessions_for_user(owner)
        rows = []
        for sid, sess in sessions.items():
            if keyword and keyword not in (sess.name or "").lower():
                continue
            db_row = db_rows.get(sid)
            # Prefer last_accessed; fall back to updated_at, then created_at.
            ts = None
            if db_row:
                ts = getattr(db_row, 'last_accessed', None) or getattr(db_row, 'updated_at', None) or getattr(db_row, 'created_at', None)
            rows.append((ts, sid, sess))
        # Sort by timestamp DESC; rows without a timestamp sink to the bottom.
        rows.sort(key=lambda r: r[0] or datetime.min, reverse=True)
        def _rel(ts):
            if not ts:
                return 'never'
            now = datetime.utcnow()
            try:
                if ts.tzinfo is not None:
                    now = datetime.now(timezone.utc)
                diff = (now - ts).total_seconds()
            except Exception:
                return 'unknown'
            if diff < 60: return 'just now'
            if diff < 3600: return f'{int(diff / 60)}m ago'
            if diff < 86400: return f'{int(diff / 3600)}h ago'
            if diff < 86400 * 7: return f'{int(diff / 86400)}d ago'
            return ts.strftime('%Y-%m-%d')
        lines = []
        for i, (ts, sid, sess) in enumerate(rows):
            if i >= 50:
                lines.append(f"... and {len(rows) - 50} more (showing first 50)")
                break
            safe_name = (sess.name or "Untitled").replace("[", "\\[").replace("]", "\\]")
            msg_count = getattr(sess, "message_count", 0) or 0
            model = getattr(sess, "model", "unknown")
            marker = " ← most recent" if i == 0 else ""
            lines.append(f"- **[{safe_name}](#session-{sid})** (id: `{sid}`, model: {model}, {msg_count} msgs, last active {_rel(ts)}){marker}")
        if not lines:
            return {"results": "No sessions found" + (f" matching '{keyword}'" if keyword else "") + "."}
        return {
            "results": (
                f"Found {len(rows)} session(s), sorted most-recent first:\n"
                + "\n".join(lines)
                + "\n\nAssistant: when replying to the user, preserve the chat-title markdown links exactly as shown, e.g. `[Chat](#session-id)`. Do not rewrite this as a plain, non-clickable table."
            )
        }
    except Exception as e:
        logger.error(f"list_sessions failed: {e}")
        return {"error": str(e)}
 async def do_send_to_session(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
    """Send a message to an existing session and get a response.
    Content format:
      Line 1: session_id
      Line 2+: message
    """
    from src.llm_core import llm_call_async
    from core.models import ChatMessage
    if not _session_manager:
        return {"error": "Session manager not available"}
    lines = content.strip().split("\n", 1)
    if len(lines) < 2:
        return {"error": "Need 2 lines: session_id, then message"}
    target_sid = lines[0].strip()
    message = lines[1].strip()
    sess = _session_manager.get_session(target_sid)
    if not sess:
        return {"error": f"Session '{target_sid}' not found"}
    # Owner-scope: reject access to another user's session
    if owner and getattr(sess, "owner", None) and sess.owner != owner:
        return {"error": f"Session '{target_sid}' not found"}
    if not message:
        return {"error": "No message provided"}
    try:
        # Build context from session history
        context = sess.get_context_messages()
        context.append({"role": "user", "content": message})
        response = await llm_call_async(
            sess.endpoint_url, sess.model, context,
            headers=sess.headers,
            timeout=AI_CHAT_TIMEOUT,
        )
        # Save both messages to session
        sess.add_message(ChatMessage("user", message))
        sess.add_message(ChatMessage("assistant", response))
        # Truncate for tool output
        if len(response) > 10000:
            response = response[:10000] + "\n... (truncated)"
        return {
            "session_id": target_sid,
            "session_name": sess.name,
            "response": response,
        }
    except Exception as e:
        logger.error(f"send_to_session failed: {e}")
        return {"error": f"Failed to send to session: {e}"}
 async def stream_ai_tool(tool: str, content: str, session_id: Optional[str] = None, owner: Optional[str] = None):
@@ -287,6 +715,229 @@ async def do_pipeline(content: str, session_id: Optional[str] = None, owner: Opt
 # Session management tool
 # ---------------------------------------------------------------------------
 async def do_manage_session(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
    """Manage sessions: rename, archive, delete, important, truncate, fork.
    Content format:
      Line 1: action (rename|archive|unarchive|delete|important|unimportant|truncate|fork)
      Line 2: target session_id (or "current" to use the active session)
      Line 3+: action-specific params (e.g. new name for rename, keep_count for truncate)
    """
    if not _session_manager:
        return {"error": "Session manager not available"}
    from src.database import SessionLocal, Session as DbSession
    # Accept BOTH the structured JSON args the tool schema advertises
    # ({action, session_id, value}) AND the legacy line-based format
    # (line1=action, line2=session_id, line3=value). Native function-calling
    # models send JSON; fenced-block callers send lines. Previously only the
    # line format was parsed, so a model that followed the schema (JSON) got
    # "Need at least 2 lines" / "Rename needs line 3" and couldn't drive it.
    _raw = (content or "").strip()
    action = ""
    target_sid = ""
    value = None      # the action param: new name (rename) / keep_count (truncate, fork)
    _list_filter = ""
    _parsed = None
    if _raw.startswith("{"):
        try:
            _parsed = json.loads(_raw)
        except Exception:
            _parsed = None
    if isinstance(_parsed, dict):
        action = str(_parsed.get("action") or "").strip().lower()
        target_sid = str(_parsed.get("session_id") or _parsed.get("session") or _parsed.get("id") or "").strip()
        _v = _parsed.get("value")
        if _v is None:
            _v = (_parsed.get("name") or _parsed.get("new_name")
                  or _parsed.get("title") or _parsed.get("keep_count"))
        value = None if _v is None else str(_v).strip()
        _list_filter = str(_parsed.get("filter") or "").strip()
    else:
        lines = _raw.split("\n")
        if not lines or not lines[0].strip():
            return {"error": "Missing action (rename|archive|delete|important|truncate|fork|list|switch)"}
        action = lines[0].strip().lower()
        target_sid = lines[1].strip() if len(lines) >= 2 else ""
        value = lines[2].strip() if len(lines) >= 3 else None
        _list_filter = "\n".join(lines[1:]).strip()
    if not action:
        return {"error": "Missing action (rename|archive|delete|important|truncate|fork|list|switch)"}
    # `list` alias — dispatch to do_list_sessions so the agent's natural
    # first guess (every other manage_* tool has a `list` action) works.
    if action == "list":
        return await do_list_sessions(_list_filter, session_id, owner=owner)
    if not target_sid:
        return {"error": "Need a session_id (or 'current' for the active chat)"}
    # Allow "current" to refer to the active session
    if target_sid.lower() == "current" and session_id:
        target_sid = session_id
    # `switch` / `open` / `select` / `view` — the agent reaches for
    # these when the user asks to "open" or "switch to" a session.
    # There's no server-side way to make the browser navigate, so we
    # just return a clickable anchor link the user can click. The
    # frontend's chat-history click delegate routes `#session-<id>`
    # to selectSession(). The agent's reply naturally embeds this
    # result so the user sees a single clickable line.
    def _session_query(db):
        query = db.query(DbSession).filter(DbSession.id == target_sid)
        if owner is not None:
            query = query.filter(DbSession.owner == owner)
        return query
    if action in ("switch", "open", "select", "view"):
        db = SessionLocal()
        try:
            db_sess = _session_query(db).first()
            if not db_sess:
                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
            name = db_sess.name or target_sid
        finally:
            db.close()
        return {
            "action": action,
            "session_id": target_sid,
            "name": name,
            "results": f"[{name}](#session-{target_sid}) — click to open.",
        }
    db = SessionLocal()
    try:
        if action == "rename":
            if not value:
                return {"error": "rename needs a new name (the `value` arg, or line 3 in the legacy format)"}
            new_name = value
            db_sess = _session_query(db).first()
            if not db_sess:
                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
            db_sess.name = new_name
            db.commit()
            _session_manager.update_session_name(target_sid, new_name)
            return {"action": "rename", "session_id": target_sid, "name": new_name,
                    "results": f"Session renamed to '{new_name}'"}
        elif action == "archive":
            db_sess = _session_query(db).first()
            if not db_sess:
                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
            db_sess.archived = True
            db.commit()
            return {"action": "archive", "session_id": target_sid,
                    "results": f"Session '{db_sess.name}' archived"}
        elif action == "unarchive":
            db_sess = _session_query(db).first()
            if not db_sess:
                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
            db_sess.archived = False
            db.commit()
            return {"action": "unarchive", "session_id": target_sid,
                    "results": f"Session '{db_sess.name}' unarchived"}
        elif action == "delete":
            if target_sid == session_id:
                return {"error": "Cannot delete the current session while chatting in it. Delete other sessions first."}
            db_sess = _session_query(db).first()
            if not db_sess:
                return {"error": f"Session '{target_sid}' not found. Refusing to delete an unknown chat id; use the exact id from list_sessions."}
            if db_sess and db_sess.is_important:
                return {"error": f"Session '{db_sess.name}' is starred/favorited. Unstar it first before deleting."}
            try:
                ok = _session_manager.delete_session(target_sid)
                if not ok:
                    return {"error": f"Session '{target_sid}' was not deleted because it no longer exists."}
                return {"action": "delete", "session_id": target_sid,
                        "results": f"Session '{db_sess.name or target_sid}' deleted"}
            except Exception as e:
                return {"error": f"Failed to delete session: {e}"}
        elif action in ("important", "unimportant"):
            is_important = action == "important"
            db_sess = _session_query(db).first()
            if not db_sess:
                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
            # Prevent AI from unstarring sessions — only the user can do that manually
            if not is_important and db_sess.is_important:
                return {"error": f"Session '{db_sess.name}' is starred by the user. Only the user can unstar sessions manually."}
            db_sess.is_important = is_important
            db.commit()
            status = "marked as important" if is_important else "unmarked as important"
            return {"action": action, "session_id": target_sid,
                    "results": f"Session '{db_sess.name}' {status}"}
        elif action == "truncate":
            db_sess = _session_query(db).first()
            if not db_sess:
                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
            keep_count = 10
            if value:
                try:
                    keep_count = int(value)
                except ValueError:
                    pass
            success = _session_manager.truncate_messages(target_sid, keep_count)
            if success:
                return {"action": "truncate", "session_id": target_sid,
                        "results": f"Session truncated to last {keep_count} messages"}
            return {"error": f"Failed to truncate session '{target_sid}'"}
        elif action == "fork":
            db_sess = _session_query(db).first()
            if not db_sess:
                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
            keep_count = 0  # 0 = all messages
            if value:
                try:
                    keep_count = int(value)
                except ValueError:
                    pass
            source = _session_manager.get_session(target_sid)
            if not source:
                return {"error": f"Session '{target_sid}' not found"}
            new_sid = str(uuid.uuid4())[:8]
            _session_manager.create_session(
                session_id=new_sid,
                name=f"Fork: {source.name}",
                endpoint_url=source.endpoint_url,
                model=source.model,
                rag=False,
                owner=owner,
            )
            # Copy messages
            history = source.get_context_messages()
            if keep_count > 0:
                history = history[:keep_count]
            from core.models import ChatMessage as InMemoryMsg
            new_sess = _session_manager.get_session(new_sid)
            for msg in history:
                new_sess.add_message(InMemoryMsg(msg["role"], msg["content"]))
            try:
                from src.event_bus import fire_event
                fire_event("session_created", owner)
            except Exception:
                logger.debug("session_created event dispatch failed", exc_info=True)
            return {"action": "fork", "session_id": new_sid,
                    "source_session": target_sid, "messages_copied": len(history),
                    "results": f"Forked session '{source.name}' -> new session {new_sid} ({len(history)} messages)"}
        else:
            return {"error": f"Unknown action '{action}'. Use: list, switch, rename, archive, unarchive, delete, important, unimportant, truncate, fork"}
    except Exception as e:
        logger.error(f"manage_session failed: {e}")
        return {"error": str(e)}
    finally:
        db.close()
 # ---------------------------------------------------------------------------
 # Memory management tool
 # ---------------------------------------------------------------------------
@@ -453,6 +1104,83 @@ async def do_manage_memory(content: str, session_id: Optional[str] = None, owner
        return {"error": f"Unknown action '{action}'. Use: list, add, edit, delete, search"}
 # ---------------------------------------------------------------------------
 # List models tool
 # ---------------------------------------------------------------------------
 async def do_list_models(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
    """List all available models across configured endpoints.
    Content = optional filter keyword.
    """
    import httpx
    from src.database import SessionLocal, ModelEndpoint
    from src.llm_core import _detect_provider, ANTHROPIC_MODELS
    from src.auth_helpers import owner_filter
    keyword = content.strip().lower() if content.strip() else None
    db = SessionLocal()
    try:
        query = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True)
        if owner:
            query = owner_filter(query, ModelEndpoint, owner)
        endpoints = query.all()
        if not endpoints:
            return {"results": "No enabled model endpoints configured."}
        result_lines = []
        total_models = 0
        for ep in endpoints:
            try:
                base, api_key = resolve_endpoint_runtime(ep, owner=owner)
            except Exception:
                continue
            provider = _detect_provider(base)
            headers = build_headers(api_key, base)
            model_ids = []
            if provider == "anthropic":
                model_ids = list(ANTHROPIC_MODELS)
            else:
                try:
                    models_url = build_models_url(base)
                    if models_url:
                        r = httpx.get(models_url, headers=headers, timeout=5)
                        r.raise_for_status()
                        data = r.json()
                        model_ids = [m.get("id") for m in (data.get("data") or []) if m.get("id")]
                        if not model_ids:
                            model_ids = [
                                m.get("name") or m.get("model")
                                for m in (data.get("models") or [])
                                if m.get("name") or m.get("model")
                            ]
                    else:
                        model_ids = json.loads(ep.cached_models or "[]")
                except Exception:
                    model_ids = ["(endpoint offline)"]
            if keyword:
                model_ids = [m for m in model_ids if keyword in m.lower() or keyword in (ep.name or "").lower()]
            if model_ids:
                result_lines.append(f"\n**{ep.name or base}** ({provider}):")
                for mid in model_ids:
                    result_lines.append(f"  - `{mid}`")
                    total_models += 1
        if not result_lines:
            return {"results": "No models found" + (f" matching '{keyword}'" if keyword else "") + "."}
        header = f"Available models ({total_models} total):"
        return {"results": header + "\n".join(result_lines)}
    except Exception as e:
        logger.error(f"list_models failed: {e}")
        return {"error": str(e)}
    finally:
        db.close()
 # ---------------------------------------------------------------------------
@@ -1103,20 +1831,55 @@ async def dispatch_ai_tool(
 ) -> Tuple[str, Dict]:
    """Dispatch an AI interaction tool. Returns (description, result_dict)."""
-    if tool == "pipeline":
+    if tool == "chat_with_model":
        model_spec = content.split("\n")[0].strip()[:60]
        desc = f"chat_with_model: {model_spec}"
        result = await do_chat_with_model(content, session_id, owner=owner)
    elif tool == "create_session":
        name = content.split("\n")[0].strip()[:60]
        desc = f"create_session: {name}"
        result = await do_create_session(content, session_id, owner=owner)
    elif tool == "list_sessions":
        keyword = content.strip()[:40]
        desc = f"list_sessions{': ' + keyword if keyword else ''}"
        result = await do_list_sessions(content, session_id, owner=owner)
    elif tool == "send_to_session":
        sid = content.split("\n")[0].strip()[:20]
        desc = f"send_to_session: {sid}"
        result = await do_send_to_session(content, session_id, owner=owner)
    elif tool == "pipeline":
        desc = "pipeline: running steps"
        result = await do_pipeline(content, session_id, owner=owner)
    elif tool == "manage_session":
        action = content.split("\n")[0].strip()[:40]
        desc = f"manage_session: {action}"
        result = await do_manage_session(content, session_id, owner=owner)
    elif tool == "manage_memory":
        action = content.split("\n")[0].strip()[:40]
        desc = f"manage_memory: {action}"
        result = await do_manage_memory(content, session_id, owner=owner)
    elif tool == "list_models":
        keyword = content.strip()[:40]
        desc = f"list_models{': ' + keyword if keyword else ''}"
        result = await do_list_models(content, session_id, owner=owner)
    elif tool == "ui_control":
        action = content.split("\n")[0].strip()[:60]
        desc = f"ui_control: {action}"
        result = await do_ui_control(content, session_id, owner=owner)
    elif tool == "ask_teacher":
        problem = content.split("\n", 1)[-1].strip()[:60]
        desc = f"ask_teacher: {problem}"
        result = await do_ask_teacher(content, session_id, owner=owner)
    else:
        desc = f"unknown ai tool: {tool}"
        result = {"error": f"Unknown AI interaction tool: {tool}"}
@@ -263,32 +263,10 @@ def list_for_session(session_id: str) -> List[Dict[str, Any]]:
    return [r for r in refresh().values() if r.get("session_id") == session_id]
 def kill(job_id: str) -> Optional[Dict[str, Any]]:
    """Terminate a running job's process tree and mark it killed. Returns the
    updated record, or None if the id is unknown. Idempotent: a job that already
    finished is returned unchanged. Sets followed_up so the monitor does not also
    fire an auto-continue for a job the agent deliberately stopped."""
    jobs = _load()
    rec = jobs.get(job_id)
    if rec is None:
        return None
    if rec.get("status") == "running":
        _kill(rec.get("pid"))
        rec["status"] = "failed"
        rec["exit_code"] = -1
        rec["ended_at"] = time.time()
        rec["killed"] = True
        rec["followed_up"] = True
        _save(jobs)
    return rec
 def result_text(rec: Dict[str, Any]) -> str:
    """Human/agent-readable summary of a finished job, for the follow-up."""
    out = _read_output(rec)
-    if rec.get("killed"):
+    if rec.get("timed_out"):
        head = "Background job was killed."
    elif rec.get("timed_out"):
        head = f"Background job timed out after {rec.get('max_runtime_s')}s."
    elif rec.get("died"):
        head = "Background job process died unexpectedly (no exit code)."
@@ -76,7 +76,8 @@ async def action_consolidate_memory(owner: str, **kwargs) -> Tuple[str, bool]:
        import json
        import re
        from src.constants import DATA_DIR
-        from src.llm_core import llm_call_async_with_fallback
+        from src.endpoint_resolver import resolve_endpoint
        from src.llm_core import llm_call_async
        from src.memory import MemoryManager
        manager = MemoryManager(DATA_DIR)
@@ -115,9 +116,10 @@ async def action_consolidate_memory(owner: str, **kwargs) -> Tuple[str, bool]:
            if len(group_memories) < 2:
                return False
-            from src.task_endpoint import resolve_task_candidates
+            url, model, headers = resolve_endpoint("utility", owner=group_owner or None)
-            candidates = resolve_task_candidates(owner=group_owner or None)
+            if not url or not model:
-            if not candidates:
+                url, model, headers = resolve_endpoint("default", owner=group_owner or None)
            if not url or not model:
                return False
            try:
@@ -145,11 +147,13 @@ async def action_consolidate_memory(owner: str, **kwargs) -> Tuple[str, bool]:
                    "\"drop\":[{\"id\":\"existing id\",\"reason\":\"short reason\"}]}\n\n"
                    f"MEMORIES:\n{json.dumps(items, ensure_ascii=False)}"
                )
-                raw = await llm_call_async_with_fallback(
+                raw = await llm_call_async(
-                    candidates,
+                    url=url,
                    model=model,
                    messages=[{"role": "user", "content": prompt}],
                    temperature=0.0,
                    max_tokens=4096,
                    headers=headers,
                    timeout=120,
                )
                from src.text_helpers import strip_think
@@ -600,7 +604,8 @@ async def action_classify_events(owner: str, **kwargs) -> Tuple[str, bool]:
    try:
        from datetime import timedelta
        from core.database import SessionLocal, CalendarEvent
-        from src.llm_core import llm_call_async_with_fallback
+        from src.endpoint_resolver import resolve_endpoint
        from src.llm_core import llm_call_async
        import re as _re, json as _json
        db = SessionLocal()
@@ -615,9 +620,10 @@ async def action_classify_events(owner: str, **kwargs) -> Tuple[str, bool]:
            if not events:
                return "No upcoming events to classify", True
-            from src.task_endpoint import resolve_task_candidates
+            llm_url, llm_model, llm_headers = resolve_endpoint("utility", owner=owner)
-            llm_candidates = resolve_task_candidates(owner=owner)
+            if not llm_url:
-            llm_available = bool(llm_candidates)
+                llm_url, llm_model, llm_headers = resolve_endpoint("default", owner=owner)
            llm_available = bool(llm_url and llm_model)
            # Pull user memories so the LLM has personal context (relationships,
            # job, hobbies). Helps it know e.g. "<name> is your spouse" so their
@@ -693,11 +699,11 @@ async def action_classify_events(owner: str, **kwargs) -> Tuple[str, bool]:
                    f"EVENTS: {_json.dumps(items)}"
                )
                try:
-                    raw = await llm_call_async_with_fallback(
+                    raw = await llm_call_async(
-                        llm_candidates,
+                        url=llm_url, model=llm_model,
                        messages=[{"role": "user", "content": prompt}],
                        temperature=0.1, max_tokens=16384,
-                        timeout=180,
+                        headers=llm_headers, timeout=180,
                    )
                    from src.text_helpers import strip_think as _st
                    raw = _st(raw or "", prose=False, prompt_echo=False)
@@ -804,7 +810,8 @@ async def action_learn_sender_signatures(owner: str, **kwargs) -> Tuple[str, boo
        import asyncio as _aio
        from datetime import datetime as _dt, timedelta as _td
        from routes.email_helpers import _email_cache_owner_clause, _imap_connect, SCHEDULED_DB
-        from src.llm_core import llm_call_async_with_fallback
+        from src.endpoint_resolver import resolve_endpoint
        from src.llm_core import llm_call_async
        # 1. Pull recent UIDs + From headers cheaply (header-only fetch).
        def _pull_headers():
@@ -884,11 +891,11 @@ async def action_learn_sender_signatures(owner: str, **kwargs) -> Tuple[str, boo
        if not eligible:
            return "All sender sigs already cached (or no eligible senders)", True
-        from src.task_endpoint import resolve_task_candidates
+        url, model, headers = resolve_endpoint("utility", owner=owner)
-        candidates = resolve_task_candidates(owner=owner)
+        if not url or not model:
-        if not candidates:
+            url, model, headers = resolve_endpoint("default", owner=owner)
        if not url or not model:
            return "No LLM endpoint available", False
        model = candidates[0][1]
        analyzed = 0
        no_sig = 0
@@ -942,11 +949,11 @@ async def action_learn_sender_signatures(owner: str, **kwargs) -> Tuple[str, boo
            )
            try:
-                raw = await llm_call_async_with_fallback(
+                raw = await llm_call_async(
-                    candidates,
+                    url=url, model=model,
                    messages=[{"role": "user", "content": prompt}],
                    temperature=0.0, max_tokens=600,
-                    timeout=60,
+                    headers=headers, timeout=60,
                )
                from src.text_helpers import strip_think as _st
                sig = _st(raw or "", prose=False, prompt_echo=False).strip()
@@ -1130,6 +1137,7 @@ async def action_test_skills(owner: str, **kwargs) -> Tuple[str, bool]:
        from services.memory.skills import SkillsManager
        from src.constants import DATA_DIR
        from routes.skills_routes import _run_skill_test_once, _skill_test_task
        from src.endpoint_resolver import resolve_endpoint
        # #3 SCOPE GUARD: refuse to run on a None/empty owner — otherwise
        # `sm.load(owner=None)` returns every user's skills and we'd cross-
@@ -1144,40 +1152,27 @@ async def action_test_skills(owner: str, **kwargs) -> Tuple[str, bool]:
        if not names:
            raise TaskNoop("no skills to test")
-        from src.task_endpoint import resolve_task_candidates
+        url, model, headers = resolve_endpoint("default", owner=owner)
-        candidates = resolve_task_candidates(owner=owner)
+        if not url or not model:
        if not candidates:
            return "No Default/Utility model configured — set one in Settings.", False
        # #2 NO SILENT MODEL SWAP: if the configured model isn't served by the
        # endpoint, try a basename match — but fail loudly instead of grabbing
        # `avail[0]` which could be an embedding-only model and produce 36
        # garbage transcripts → 36 'unknown' verdicts with no hint why.
        url, model, headers = candidates[0]
        try:
            from src.llm_core import list_model_ids
            avail = list_model_ids(url, headers=headers)
            if avail and model not in avail:
                import os as _os
-
+                base = _os.path.basename((model or "").rstrip("/"))
-            selected = None
+                m = next((a for a in avail if _os.path.basename(a.rstrip("/")) == base), None)
-            mismatch_notes = []
+                if m:
-            for cand_url, cand_model, cand_headers in candidates:
+                    model = m
-                avail = list_model_ids(cand_url, headers=cand_headers)
+                else:
-                if not avail or cand_model in avail:
+                    return (f"Default model '{model}' not served by endpoint {url}. "
-                    selected = (cand_url, cand_model, cand_headers)
+                            f"Available: {', '.join(avail[:8])}{'…' if len(avail) > 8 else ''}. "
-                    break
+                            "Set a valid Default model in Settings."), False
                base = _os.path.basename((cand_model or "").rstrip("/"))
                matched = next((a for a in avail if _os.path.basename(a.rstrip("/")) == base), None)
                if matched:
                    selected = (cand_url, matched, cand_headers)
                    break
                mismatch_notes.append(
                    f"{cand_model} not served by {cand_url}; available: "
                    f"{', '.join(avail[:8])}{'...' if len(avail) > 8 else ''}"
                )
            if selected:
                url, model, headers = selected
            elif mismatch_notes:
                return "No configured task fallback model is served. " + " | ".join(mismatch_notes[:3]), False
        except Exception as _e:
            logger.warning(f"test_skills model resolve check failed (continuing): {_e}")
@@ -1488,6 +1483,7 @@ async def action_check_email_urgency(owner: str, **kwargs) -> Tuple[str, bool]:
        from pathlib import Path as _P
        from core.database import SessionLocal as _SL, EmailAccount as _EA
        from routes.email_helpers import _imap_connect, _decode_header
        from src.endpoint_resolver import resolve_endpoint, resolve_utility_fallback_candidates
        from src.llm_core import llm_call_async_with_fallback
        # Per-owner state file so multi-user runs don't clobber each other's
@@ -1509,10 +1505,12 @@ async def action_check_email_urgency(owner: str, **kwargs) -> Tuple[str, bool]:
        # ── 1. Resolve LLM candidates (utility primary + utility fallbacks; fall
        # through to default chat as a last resort).
-        from src.task_endpoint import resolve_task_candidates
+        url, model, headers = resolve_endpoint("utility", owner=owner)
-        candidates = resolve_task_candidates(owner=owner)
+        if not url or not model:
-        if not candidates:
+            url, model, headers = resolve_endpoint("default", owner=owner)
        if not url or not model:
            return "No LLM endpoint available", False
        candidates = [(url, model, headers)] + resolve_utility_fallback_candidates(owner=owner)
        # ── 2. Enumerate enabled accounts. Match this task's owner AND fall
        # back to the legacy "unowned account whose imap_user / from_address
@@ -14,7 +14,6 @@ import subprocess
 import sys
 from core.platform_compat import IS_WINDOWS, which_tool
 from src.runtime_paths import get_app_root
 logger = logging.getLogger(__name__)
@@ -82,7 +81,7 @@ _BUILTIN_NPX_SERVERS = {
        "name": "Built-in: Browser",
        "command": "npx",
        "args": ["-y", "@playwright/mcp@latest", "--headless", "--caps", "vision"],
-    }
+    },
 }
 # Global flag to disable MCP if there are compatibility issues
@@ -95,7 +94,7 @@ async def register_builtin_servers(mcp_manager):
        logger.info("Built-in MCP servers disabled via ODYSSEUS_DISABLE_MCP")
        return
-    base_dir = get_app_root()
+    base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    python = sys.executable
    async def _connect_python_server(server_id: str, script_path: str, name: str):
@@ -5,7 +5,6 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
 from pydantic import Field, field_validator
 from src.constants import DATA_DIR as _DATA_DIR_CONST
 from src.runtime_paths import get_app_root
 # Cross-platform OS flag, exposed here so callers can `from src.config import
 # IS_WINDOWS`. Defined locally (a trivial `os.name == "nt"`) rather than imported
@@ -20,7 +19,7 @@ IS_WINDOWS = os.name == "nt"
 class DataConfig(BaseSettings):
    """Configuration for data storage and file handling."""
    # Base directory
-    base_dir: Path = Field(default=Path(get_app_root()), description="Base directory for the application")
+    base_dir: Path = Field(default=Path(__file__).parent.parent, description="Base directory for the application")
    # Data paths
    data_dir: Path = Field(default=Path(_DATA_DIR_CONST), description="Main data directory")
@@ -139,7 +138,7 @@ class AppConfig(BaseSettings):
        if isinstance(v, dict) and "base_dir" in v:
            base_dir = v["base_dir"]
        else:
-            base_dir = Path(get_app_root())
+            base_dir = Path(__file__).parent.parent
        # Convert string paths to Path objects relative to base_dir
        data_dir = Path(_DATA_DIR_CONST)
@@ -2,14 +2,12 @@
 """Application-wide constants and configuration values."""
 import os
-from src.runtime_paths import get_app_root, get_default_data_dir
+APP_VERSION = "1.0.0"
 APP_VERSION = "1.0.1"
 # Base paths
-BASE_DIR = os.path.join(get_app_root(), "")
+BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + "/"
 STATIC_DIR = os.path.join(BASE_DIR, "static")
-DATA_DIR = os.getenv("ODYSSEUS_DATA_DIR", get_default_data_dir())
+DATA_DIR = os.getenv("ODYSSEUS_DATA_DIR", os.path.join(BASE_DIR, "data"))
 # Data file paths
 # Single source of truth: every persisted file/dir lives under DATA_DIR, which
@@ -57,13 +55,7 @@ MEMORY_VECTORS_DIR = os.path.join(DATA_DIR, "memory_vectors")
 # Paths with an intentional dedicated env override, defaulting under DATA_DIR.
 MAIL_ATTACHMENTS_DIR = os.getenv("ODYSSEUS_MAIL_ATTACHMENTS_DIR", os.path.join(DATA_DIR, "mail-attachments"))
-# `or` (not os.getenv's default arg) so a PRESENT-but-EMPTY value falls back to
+FASTEMBED_CACHE_DIR = os.getenv("FASTEMBED_CACHE_PATH", os.path.join(DATA_DIR, "fastembed_cache"))
 # the default. docker-compose.yml injects `FASTEMBED_CACHE_PATH=${FASTEMBED_CACHE_PATH:-}`,
 # which sets the var to "" when the host hasn't defined it. os.getenv(name, default)
 # only returns the default when the var is ABSENT, so the empty string would win →
 # os.makedirs("") raises [Errno 2] No such file or directory: '' → FastEmbed fails to
 # init and all vector features (RAG, semantic memory, tool index) silently degrade.
 FASTEMBED_CACHE_DIR = os.getenv("FASTEMBED_CACHE_PATH") or os.path.join(DATA_DIR, "fastembed_cache")
 # Agent tool output limits (single source of truth — imported by tool_execution.py,
 # tool_implementations.py, agent_tools.py, and any other module that needs them)
@@ -71,26 +63,11 @@ MAX_OUTPUT_CHARS = 10_000       # cap for bash/python/web_search/web_fetch outpu
 MAX_READ_CHARS = 20_000         # cap for read_file / document preview
 MAX_DIFF_LINES = 400            # cap for edit_file unified-diff display
 # web_fetch response-size policy (#3812). MAX_OUTPUT_CHARS above only trims
 # what the agent SEES; these caps bound what the server downloads, parses,
 # and writes to the content cache. The soft cap is the default download
 # budget; the agent can raise it per call (full/max_bytes) but never past
 # the hard cap, so a model can't decide to pull a multi-GB file.
 WEB_FETCH_SOFT_MAX_BYTES = 2_000_000    # default download budget (2 MB)
 WEB_FETCH_HARD_MAX_BYTES = 20_000_000   # absolute ceiling, even with override (20 MB)
 # API Configuration
 MAX_CONTEXT_MESSAGES = 90
 REQUEST_TIMEOUT = 20
 OPENAI_COMPAT_PATH = "/v1/chat/completions"
 # Outbound UA for web_fetch / web_search scraping; common desktop UA so pages serve normal HTML.
 WEB_FETCH_USER_AGENT = os.environ.get(
    "WEB_FETCH_USER_AGENT",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
    "(KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36",
 )
 # Environment variables with defaults
 DEFAULT_HOST = os.getenv("LLM_HOST", "localhost")
 LLM_HOSTS = [h.strip() for h in os.getenv("LLM_HOSTS", "").split(",") if h.strip()]
@@ -102,9 +79,6 @@ SEARXNG_INSTANCE = os.getenv("SEARXNG_INSTANCE", "http://localhost:8080")
 CLEANUP_ENABLED = os.getenv("CLEANUP_ENABLED", "True").lower() == "true"
 CLEANUP_INTERVAL_HOURS = int(os.getenv("CLEANUP_INTERVAL_HOURS", "24"))
 # Auth policy
 PASSWORD_MIN_LENGTH = 8
 # Default parameters
 DEFAULT_TEMPERATURE = 1.0
 DEFAULT_MAX_TOKENS = 0
@@ -31,8 +31,6 @@ import numpy as np
 import httpx
 from typing import List, Optional
 from src.runtime_paths import get_app_root
 logger = logging.getLogger(__name__)
 _DEFAULT_MODEL = "all-minilm:l6-v2"
@@ -161,32 +161,6 @@ def normalize_base(url: str) -> str:
    return url
 def _validated_endpoint_base(url: str) -> str:
    """Return a base URL that is safe for endpoint path appends."""
    base = (url or "").strip().rstrip("/")
    if "?" in base or "#" in base:
        raise ValueError("Endpoint base URL must not include query or fragment")
    return urlunparse(urlparse(base)._replace(query="", fragment="")).rstrip("/")
 def _prepare_endpoint_base(base: str) -> str:
    base = _validated_endpoint_base(normalize_base(base))
    return _validated_endpoint_base(normalize_base(resolve_url(base)))
 def _append_endpoint_path(base: str, suffix: str) -> str:
    parsed = urlparse(base)
    current = (parsed.path or "").rstrip("/")
    extra = "/" + suffix.lstrip("/")
    path = f"{current}{extra}" if current else extra
    return urlunparse(parsed._replace(path=path, query="", fragment=""))
 def _pathless_host(base: str, host: str) -> bool:
    parsed = urlparse(base)
    return (parsed.hostname or "").lower() == host and not (parsed.path or "").strip("/")
 def _anthropic_api_root(base: str) -> str:
    """Return Anthropic's API root, preserving /v1 for OpenAI-compatible APIs elsewhere."""
    base = (base or "").strip().rstrip("/")
@@ -197,17 +171,15 @@ def _anthropic_api_root(base: str) -> str:
 def build_chat_url(base: str) -> str:
    """Return the correct chat endpoint URL for a given base."""
-    base = _prepare_endpoint_base(base)
+    base = resolve_url(base)
    provider = _detect_provider(base)
    if provider == "anthropic":
-        return _append_endpoint_path(_anthropic_api_root(base), "/v1/messages")
+        return _anthropic_api_root(base) + "/v1/messages"
    if provider == "ollama":
-        return _append_endpoint_path(_ollama_api_root(base), "/chat")
+        return _ollama_api_root(base) + "/chat"
    if provider == "chatgpt-subscription":
-        return _append_endpoint_path(base, "/responses")
+        return base.rstrip("/") + "/responses"
-    if _pathless_host(base, "api.openai.com"):
+    return base + "/chat/completions"
        base = _append_endpoint_path(base, "/v1")
    return _append_endpoint_path(base, "/chat/completions")
 def build_models_url(base: str) -> Optional[str]:
@@ -221,12 +193,12 @@ def build_models_url(base: str) -> Optional[str]:
    untouched (so custom prefixes like ``/openai`` or ``/api/openai/v1`` keep
    their semantics).
    """
-    base = _prepare_endpoint_base(base)
+    base = normalize_base(resolve_url(base))
    provider = _detect_provider(base)
    if provider == "anthropic":
-        return _append_endpoint_path(_anthropic_api_root(base), "/v1/models")
+        return _anthropic_api_root(base) + "/v1/models"
    if provider == "ollama":
-        return _append_endpoint_path(_ollama_api_root(base), "/tags")
+        return _ollama_api_root(base) + "/tags"
    if provider == "chatgpt-subscription":
        return None
    # Generic OpenAI-compatible fallback: local model servers with no explicit
@@ -236,10 +208,10 @@ def build_models_url(base: str) -> Optional[str]:
    parsed = urlparse(base)
    host = (parsed.hostname or "").lower()
    is_local = host in {"localhost", "127.0.0.1", "::1", "host.docker.internal"}
-    uses_v1_models_by_default = is_local or host in {"api.deepseek.com", "api.openai.com"}
+    uses_v1_models_by_default = is_local or host in {"api.deepseek.com"}
    if not parsed.path and uses_v1_models_by_default:
-        base = _append_endpoint_path(base, "/v1")
+        base = base + "/v1"
-    return _append_endpoint_path(base, "/models")
+    return base + "/models"
 def build_headers(api_key: Optional[str], base: str) -> Dict[str, str]:
@@ -424,9 +396,6 @@ def resolve_utility_fallback_candidates(owner: Optional[str] = None) -> list:
        settings = load_settings()
        utility_ep = (get_user_setting("utility_endpoint_id", owner or "", settings.get("utility_endpoint_id", "")) or "").strip()
        if not utility_ep:
            utility_chain = get_user_setting("utility_model_fallbacks", owner or "", settings.get("utility_model_fallbacks") or []) or []
            if utility_chain:
                return _resolve_fallback_candidates("utility_model_fallbacks", owner=owner)
            return _resolve_fallback_candidates("default_model_fallbacks", owner=owner)
    except Exception:
        pass
@@ -4,7 +4,6 @@ import uuid
 import logging
 import re
 from typing import Dict, List, Optional, Any
 from urllib.parse import urljoin, urlparse, urlunparse
 import httpx
 from fastapi import HTTPException
@@ -203,22 +202,6 @@ def mask_integration_secret(integration: Dict[str, Any]) -> Dict[str, Any]:
    return safe
 def _normalize_integration_base_url(base_url: Any) -> str:
    if not isinstance(base_url, str) or not base_url.strip():
        raise ValueError("Integration base URL is required")
    cleaned = base_url.strip().rstrip("/")
    if "?" in cleaned or "#" in cleaned:
        raise ValueError("Integration base URL must not include query or fragment")
    parsed = urlparse(cleaned)
    if parsed.scheme.lower() not in ("http", "https") or not parsed.hostname:
        raise ValueError("Integration base URL must be an HTTP(S) URL")
    return urlunparse(parsed._replace(scheme=parsed.scheme.lower(), query="", fragment="")).rstrip("/")
 def _join_integration_url(base_url: str, path: str) -> str:
    return urljoin(base_url.rstrip("/") + "/", path.lstrip("/"))
 def load_integrations() -> List[Dict[str, Any]]:
    """Load all integrations from disk with secrets decrypted for runtime use."""
    if not os.path.exists(DATA_FILE):
@@ -278,10 +261,8 @@ def add_integration(data: Dict[str, Any]) -> Dict[str, Any]:
    if not isinstance(integration.get("name"), str) or not integration["name"].strip():
        raise HTTPException(400, "Integration name is required")
-    try:
+    if not isinstance(integration.get("base_url"), str) or not integration["base_url"].strip():
-        integration["base_url"] = _normalize_integration_base_url(integration.get("base_url"))
+        raise HTTPException(400, "Integration base URL is required")
    except ValueError as exc:
        raise HTTPException(400, str(exc)) from exc
    integrations = load_integrations()
    integrations.append(integration)
@@ -291,14 +272,10 @@ def add_integration(data: Dict[str, Any]) -> Dict[str, Any]:
 def update_integration(integration_id: str, data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
    """Update fields on an existing integration. Returns updated integration or None."""
    data = dict(data)
    if "name" in data and (not isinstance(data["name"], str) or not data["name"].strip()):
        raise HTTPException(400, "Integration name is required")
-    if "base_url" in data:
+    if "base_url" in data and (not isinstance(data["base_url"], str) or not data["base_url"].strip()):
-        try:
+        raise HTTPException(400, "Integration base URL is required")
            data["base_url"] = _normalize_integration_base_url(data["base_url"])
        except ValueError as exc:
            raise HTTPException(400, str(exc)) from exc
    integrations = load_integrations()
    for item in integrations:
@@ -364,10 +341,9 @@ async def execute_api_call(
    if not integration.get("enabled", True):
        return {"error": f"Integration '{integration.get('name')}' is disabled", "exit_code": 1}
-    try:
+    base_url = integration.get("base_url", "").rstrip("/")
-        base_url = _normalize_integration_base_url(integration.get("base_url", ""))
+    if not base_url:
-    except ValueError as exc:
+        return {"error": "Integration has no base_url configured", "exit_code": 1}
        return {"error": str(exc), "exit_code": 1}
    # Strip common API path suffixes users might accidentally include
    # (e.g. "http://host/v1/" → "http://host"). The integration's preset
@@ -390,10 +366,7 @@ async def execute_api_call(
    if re.search(r"^https?://", path) or "://" in path:
        return {"error": "Path must not contain a protocol scheme", "exit_code": 1}
-    if "#" in path:
+    url = base_url + path
        return {"error": "Path must not contain a fragment", "exit_code": 1}
    url = _join_integration_url(base_url, path)
    method = method.upper()
    # Build headers
@@ -87,7 +87,7 @@ _host_health_lock = threading.Lock()
 _model_activity: Dict[str, float] = {}
 _HARMONY_MARKER_RE = re.compile(
-    r"<\|channel\|>(analysis|commentary|final)"
+    r"<\|channel\|>(analysis|final)"
    r"|<\|start\|>(?:assistant|system|user|tool)?"
    r"|<\|message\|>"
    r"|<\|end\|>"
@@ -96,7 +96,6 @@ _HARMONY_MARKER_RE = re.compile(
 )
 _HARMONY_MARKERS = (
    "<|channel|>analysis",
    "<|channel|>commentary",
    "<|channel|>final",
    "<|start|>assistant",
    "<|start|>system",
@@ -146,10 +145,7 @@ class _HarmonyStreamRouter:
            out.append((text, False))
            return
        if self._in_message:
-            # analysis + commentary (tool-call preambles / function-arg bodies)
+            out.append((text, self._channel == "analysis"))
            # are internal, not user-facing — route them to thinking so they
            # don't leak into the visible answer; only `final` is visible.
            out.append((text, self._channel in ("analysis", "commentary")))
    def _handle_marker(self, match: re.Match[str]) -> None:
        marker = match.group(0)
@@ -287,8 +283,7 @@ def _is_ollama_native_url(url: str) -> bool:
    """Return True for native Ollama API URLs, including Ollama Cloud."""
    try:
        parsed = urlparse(url or "")
-    except Exception as e:
+    except Exception:
        logger.warning("Failed to parse URL for Ollama detection", exc_info=e)
        return False
    host = parsed.hostname or ""
    path = (parsed.path or "").rstrip("/")
@@ -907,10 +902,7 @@ def _anthropic_rejects_temperature(model: str) -> bool:
    return (int(match.group(1)), int(match.group(2))) >= (4, 7)
 # Models that support structured thinking — may output </think> without opening tag
-_THINKING_MODEL_PATTERNS = (
+_THINKING_MODEL_PATTERNS = ("qwen3", "qwq", "deepseek-r1", "deepseek-reasoner", "minimax", "m2-reap", "gemma")
    "qwen3", "qwq", "deepseek-r1", "deepseek-reasoner", "minimax",
    "m2-reap", "gemma", "stepfun", "step-3", "step3",
 )
 def _supports_thinking(model: str) -> bool:
    """Check if model supports structured thinking output."""
@@ -1353,8 +1345,8 @@ def list_model_ids(
                r = httpx.get(root + "/api/tags", timeout=timeout)
                r.raise_for_status()
                return [m.get("name") or m.get("model") for m in (r.json().get("models") or []) if m.get("name") or m.get("model")]
-        except Exception as e:
+        except Exception:
-            logger.warning("Failed to fetch model list from configured endpoint", exc_info=e)
+            pass
        return []
 def normalize_model_id(
@@ -2138,8 +2130,6 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
                                            yield _stream_delta_event(reasoning, thinking=True)
                                        content = delta.get("content") or ""
                                        if content:
                                            content = re.sub(r"<mm:think(\s+[^>]*)?>", r"<think\1>", content, flags=re.IGNORECASE)
                                            content = re.sub(r"</mm:think>", "</think>", content, flags=re.IGNORECASE)
                                            stripped = content.lstrip()
                                            # gpt-oss harmony format (<|channel|>analysis/final): route via the harmony
                                            # stream router. Sticky once the first marker appears — distinct from the
@@ -11,8 +11,6 @@ import os
 import re
 from typing import Any, Dict, List, Optional, Set, Tuple
 from src.runtime_paths import get_app_root
 logger = logging.getLogger(__name__)
 def _format_mcp_connection_error(name: str, command: str = "", args: Optional[List[str]] = None, error: Exception = None) -> str:
@@ -510,7 +508,7 @@ class McpManager:
            return False
        script_rel, name = _BUILTIN_SERVERS[server_id]
-        base_dir = get_app_root()
+        base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
        script_path = os.path.join(base_dir, script_rel)
        # Clean up old connection
@@ -17,11 +17,10 @@ import httpx
 logger = logging.getLogger(__name__)
 _LOCAL_HOSTS = {"localhost", "127.0.0.1", "0.0.0.0", "::1", "host.docker.internal"}
-_PRIVATE_NETWORKS = (
+_PRIVATE_PREFIXES = ("10.", "172.16.", "172.17.", "172.18.", "172.19.",
-    ipaddress.ip_network("10.0.0.0/8"),
+                     "172.20.", "172.21.", "172.22.", "172.23.", "172.24.",
-    ipaddress.ip_network("172.16.0.0/12"),
+                     "172.25.", "172.26.", "172.27.", "172.28.", "172.29.",
-    ipaddress.ip_network("192.168.0.0/16"),
+                     "172.30.", "172.31.", "192.168.")
 )
 # Tailscale uses the CGNAT range 100.64.0.0/10, NOT all of 100.0.0.0/8.
 # A bare "100." prefix would classify public addresses (e.g. AWS ranges
@@ -37,14 +36,6 @@ def _in_tailscale_range(host: str) -> bool:
        return False
 def _is_private_ip_literal(host: str) -> bool:
    try:
        ip = ipaddress.ip_address(host)
    except ValueError:
        return False
    return any(ip in network for network in _PRIVATE_NETWORKS)
 def _normalize_base_for_compare(url: str) -> str:
    url = (url or "").strip().rstrip("/")
    for suffix in ("/chat/completions", "/models", "/completions", "/v1/messages"):
@@ -96,7 +87,7 @@ def is_local_endpoint(url: str) -> bool:
        return True
    try:
        host = urlparse(url).hostname or ""
-        return host in _LOCAL_HOSTS or _is_private_ip_literal(host) or _in_tailscale_range(host)
+        return host in _LOCAL_HOSTS or host.startswith(_PRIVATE_PREFIXES) or _in_tailscale_range(host)
    except Exception:
        return False
@@ -322,47 +322,6 @@ class PersonalDocsManager:
        else:
            logger.info(f"Directory not in index: {directory}")
    def rename_directory(self, old_directory: str, new_directory: str, *, path_map: Dict[str, str] = None):
        """Rewrite tracked directory and excluded-file paths after an owner rename."""
        old_directory = os.path.abspath(old_directory)
        new_directory = os.path.abspath(new_directory)
        path_map = {os.path.abspath(k): os.path.abspath(v) for k, v in (path_map or {}).items()}
        def rewrite(path: str) -> str:
            abs_path = os.path.abspath(path)
            mapped = path_map.get(abs_path)
            if mapped:
                return mapped
            if abs_path == old_directory:
                return new_directory
            if abs_path.startswith(old_directory + os.sep):
                return new_directory + abs_path[len(old_directory):]
            return abs_path
        changed_dirs = False
        rewritten_dirs = []
        for directory in self.indexed_directories:
            rewritten = rewrite(directory)
            changed_dirs = changed_dirs or rewritten != os.path.abspath(directory)
            if rewritten not in rewritten_dirs:
                rewritten_dirs.append(rewritten)
        if changed_dirs:
            self.indexed_directories = rewritten_dirs
            self.save_directories()
        changed_excluded = False
        rewritten_excluded = set()
        for path in self.excluded_files:
            rewritten = rewrite(path)
            changed_excluded = changed_excluded or rewritten != os.path.abspath(path)
            rewritten_excluded.add(rewritten)
        if changed_excluded:
            self.excluded_files = rewritten_excluded
            self._save_excluded()
        if changed_dirs or changed_excluded:
            self.refresh_index()
    def get_indexed_directories(self):
        """Get the list of all indexed directories."""
        return self.indexed_directories.copy()
@@ -7,7 +7,6 @@ import time
 from pathlib import Path
 from src.constants import RAG_DIR
 from src.runtime_paths import get_app_root
 logger = logging.getLogger(__name__)
@@ -50,23 +50,6 @@ def _generate_doc_id(text: str, owner: str = "") -> str:
    return f"doc_{hashlib.sha256(key.encode('utf-8')).hexdigest()[:16]}"
 def _rewrite_owner_path(value: str, path_map: Dict[str, str], path_prefixes: List[tuple]) -> str:
    if not isinstance(value, str) or not value:
        return value
    abs_value = os.path.abspath(value)
    mapped = path_map.get(abs_value)
    if mapped:
        return mapped
    for old_prefix, new_prefix in path_prefixes:
        old_abs = os.path.abspath(old_prefix)
        new_abs = os.path.abspath(new_prefix)
        if abs_value == old_abs:
            return new_abs
        if abs_value.startswith(old_abs + os.sep):
            return new_abs + abs_value[len(old_abs):]
    return value
 class VectorRAG:
    """RAG system using ChromaDB vector storage with hybrid search."""
@@ -267,75 +250,6 @@ class VectorRAG:
            "failed_count": len(docs) - len(valid),
        }
    def rename_owner(
        self,
        old_owner: str,
        new_owner: str,
        *,
        path_map: Optional[Dict[str, str]] = None,
        path_prefixes: Optional[List[tuple]] = None,
    ) -> Dict[str, Any]:
        """Rewrite existing RAG metadata after an auth username rename."""
        if not self.healthy:
            return {"success": False, "updated_count": 0, "message": "Collection not initialized"}
        old_owner = (old_owner or "").strip().lower()
        new_owner = (new_owner or "").strip().lower()
        if not old_owner or not new_owner or old_owner == new_owner:
            return {"success": True, "updated_count": 0, "message": "No owner rename needed"}
        path_map = {os.path.abspath(k): os.path.abspath(v) for k, v in (path_map or {}).items()}
        path_prefixes = path_prefixes or []
        updated_ids = set()
        failed_count = 0
        for lane_name, collection in self._collections_for_delete():
            try:
                results = collection.get(
                    where={"owner": old_owner},
                    include=["metadatas"],
                )
            except Exception as e:
                logger.warning("rename_owner metadata scan failed in %s lane: %s", lane_name, e)
                failed_count += 1
                continue
            ids = results.get("ids") or []
            metadatas = results.get("metadatas") or []
            if not ids:
                continue
            new_metas = []
            selected_ids = []
            for doc_id, meta in zip(ids, metadatas):
                if not isinstance(meta, dict):
                    continue
                next_meta = dict(meta)
                if str(next_meta.get("owner", "")).strip().lower() == old_owner:
                    next_meta["owner"] = new_owner
                for key in ("source", "directory"):
                    next_meta[key] = _rewrite_owner_path(next_meta.get(key), path_map, path_prefixes)
                selected_ids.append(doc_id)
                new_metas.append(next_meta)
            if not selected_ids:
                continue
            try:
                collection.update(ids=selected_ids, metadatas=new_metas)
                updated_ids.update(selected_ids)
            except Exception as e:
                logger.warning("rename_owner metadata update failed in %s lane: %s", lane_name, e)
                failed_count += len(selected_ids)
        success = failed_count == 0
        return {
            "success": success,
            "updated_count": len(updated_ids),
            "failed_count": failed_count,
            "message": f"Updated {len(updated_ids)} RAG chunk(s)",
        }
    # ------------------------------------------------------------------
    # Search — hybrid: vector similarity + keyword overlap
    # ------------------------------------------------------------------
@@ -1,30 +0,0 @@
 """Helpers for resolving runtime paths in source and frozen builds."""
 import os
 import sys
 def get_app_root() -> str:
    """Return the app root directory.
    In normal source runs, this is the repository root. In a frozen Windows
    build, it is the bundle content root (PyInstaller's internal directory)
    so bundled runtime folders like `static/`, `scripts/`, and `data/` stay
    together with the executable payload.
    """
    if getattr(sys, "frozen", False):
        return getattr(sys, "_MEIPASS", os.path.dirname(os.path.abspath(sys.executable)))
    return os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 def get_default_data_dir() -> str:
    """Return the default path to the data directory.
    In normal runs, this is a 'data' subdirectory under the app root.
    In frozen builds, it is a persistent user directory (~/.odysseus/data)
    to prevent SQLite databases and other persistent files from being
    written to the ephemeral, temporary extraction bundle directory.
    """
    if getattr(sys, "frozen", False):
        return os.path.join(os.path.expanduser("~"), ".odysseus", "data")
    return os.path.join(get_app_root(), "data")
@@ -1,11 +1,6 @@
-"""Shared resolver for background-task AI endpoints."""
+"""Shared resolver for background-task AI endpoint (auto-naming, memory, sorting)."""
-from src.endpoint_resolver import (
+from src.endpoint_resolver import resolve_endpoint
    resolve_chat_fallback_candidates,
    resolve_endpoint,
    resolve_utility_fallback_candidates,
 )
 from src.llm_core import llm_call_async_with_fallback
 def resolve_task_endpoint(fallback_url=None, fallback_model=None, fallback_headers=None, owner=None):
@@ -16,60 +11,3 @@ def resolve_task_endpoint(fallback_url=None, fallback_model=None, fallback_heade
    endpoint cannot be resolved.
    """
    return resolve_endpoint("task", fallback_url, fallback_model, fallback_headers, owner=owner)
 def resolve_task_candidates(
    fallback_url=None,
    fallback_model=None,
    fallback_headers=None,
    owner=None,
 ):
    """Return ordered background-task LLM candidates.
    Order:
    1. configured Background Tasks endpoint/model, or caller fallback
    2. Utility endpoint/model
    3. Default endpoint/model
    4. Utility fallback chain
    5. Default fallback chain
    """
    candidates = []
    def _append(url, model, headers):
        if not url or not model:
            return
        key = (url, model)
        if any((u, m) == key for u, m, _ in candidates):
            return
        candidates.append((url, model, headers or {}))
    _append(*resolve_task_endpoint(fallback_url, fallback_model, fallback_headers, owner=owner))
    _append(*resolve_endpoint("utility", owner=owner))
    _append(*resolve_endpoint("default", owner=owner))
    for url, model, headers in resolve_utility_fallback_candidates(owner=owner):
        _append(url, model, headers)
    for url, model, headers in resolve_chat_fallback_candidates(owner=owner):
        _append(url, model, headers)
    return candidates
 async def task_llm_call_async(
    messages,
    *,
    fallback_url=None,
    fallback_model=None,
    fallback_headers=None,
    owner=None,
    **kwargs,
 ):
    """Call the shared background-task LLM candidate chain."""
    candidates = resolve_task_candidates(
        fallback_url=fallback_url,
        fallback_model=fallback_model,
        fallback_headers=fallback_headers,
        owner=owner,
    )
    if not candidates:
        raise RuntimeError("No LLM endpoint available for background task")
    return await llm_call_async_with_fallback(candidates, messages=messages, **kwargs)
@@ -9,8 +9,6 @@ import uuid
 from datetime import datetime, timedelta, timezone
 from typing import Any, Awaitable, Callable, Dict, Tuple
 from core.auth import RESERVED_USERNAMES
 logger = logging.getLogger(__name__)
@@ -19,34 +17,6 @@ def _utcnow() -> datetime:
    return datetime.now(timezone.utc).replace(tzinfo=None)
 # Shell/file tools a scheduled task's agent should be offered by default,
 # mirroring the chat agent (where these are on unless a privilege or global
 # setting turns them off). The RAG tool selector + ASSISTANT_ALWAYS_AVAILABLE
 # never include bash/python, so on a host with an empty/degraded tool-embedding
 # index a task could not run shell or Python even for an admin owner. Offering
 # them here is safe: stream_agent_loop's blocked_tools_for_owner() still strips
 # this whole group for non-admin multi-user owners, and only admits it for
 # admins and single-user (AUTH_ENABLED=false) deployments.
 TASK_DEFAULT_SHELL_TOOLS = frozenset({
    "bash", "python", "read_file", "write_file", "edit_file",
    "grep", "glob", "ls", "get_workspace",
 })
 def compose_task_relevant_tools(rag_tools, assistant_always, disabled_tools):
    """Compose the relevant-tools set offered to a scheduled task's agent.
    Unions the RAG-retrieved tools, the assistant's always-available set, and
    the default shell/file group, then removes anything the task's crew
    explicitly disabled via its `enabled_tools` allowlist. Per-owner admin
    gating is applied later by stream_agent_loop (blocked_tools_for_owner).
    """
    tools = set(rag_tools) | set(assistant_always) | set(TASK_DEFAULT_SHELL_TOOLS)
    if disabled_tools:
        tools -= set(disabled_tools)
    return tools
 # ── Shared TTL cache (singleflight) ────────────────────────────────────────
 # Multiple scheduled tasks firing in the same minute often need the same
 # external data (Miniflux unreads, MCP tool snapshots, etc.). This cache
@@ -266,29 +236,6 @@ def _digest_windows(now):
    ]
 def _checkin_calendar_events(db, owner, start, end):
    """Calendar events in [start, end] for ONE owner, for the check-in digest.
    Ownership lives on CalendarCal.owner; events inherit it via calendar_id.
    The digest query had no owner scope, so it pulled EVERY user's events into
    one user's check-in (a cross-tenant leak of summaries/locations). Scope it
    by joining CalendarCal, mirroring routes/calendar_routes.list_events.
    """
    from core.database import CalendarEvent as _CE, CalendarCal as _CC
    return (
        db.query(_CE)
        .join(_CC, _CE.calendar_id == _CC.id)
        .filter(
            _CC.owner == owner,
            _CE.dtstart >= start,
            _CE.dtstart <= end,
            _CE.status != "cancelled",
        )
        .order_by(_CE.dtstart)
        .all()
    )
 class TaskScheduler:
    def __init__(self, session_manager):
        self._session_manager = session_manager
@@ -886,14 +833,6 @@ class TaskScheduler:
                    owner=task.owner,
                    body=run.result if output == "notification" else None,
                )
            elif run.status == "error":
                self.add_notification(
                    task.name,
                    "error",
                    task_id,
                    owner=task.owner,
                    body=run.error or run.result,
                )
            # Log result to the assistant chat so all task activity is visible.
            # Skip skipped/error rows — user shouldn't see "skipped: …" noise
@@ -1188,7 +1127,11 @@ class TaskScheduler:
                    # Strip timezone for naive DB comparison
                    _s = start.replace(tzinfo=None) if start.tzinfo else start
                    _e = end.replace(tzinfo=None) if end.tzinfo else end
-                    evs = _checkin_calendar_events(_db, task.owner, _s, _e)
+                    evs = _db.query(_CE).filter(
                        _CE.dtstart >= _s,
                        _CE.dtstart <= _e,
                        _CE.status != "cancelled",
                    ).order_by(_CE.dtstart).all()
                    if not evs:
                        continue
                    # Group by importance for richer output
@@ -1427,28 +1370,15 @@ class TaskScheduler:
            time_str = _utcnow().strftime("%A, %B %d %Y, %H:%M UTC")
        system_prompt = f"Current time: {time_str}\n\n{system_prompt}"
-        # Compute the disabled-tools set: the crew's enabled_tools allowlist
+        # Compute tool filter from CrewMember.enabled_tools if set
-        # (inverted) plus the operator's global disabled_tools setting. The
+        disabled_tools = None
        # global list must be merged here — chat does the same merge before
        # entering the agent loop (routes/chat_routes.py) — otherwise an admin
        # or AUTH_ENABLED=false scheduled task would still see and call shell/
        # file tools after the operator disabled them globally, because the
        # prompt/schema/execution gates only enforce what is passed in.
        disabled_tools: set[str] = set()
        if crew and crew.enabled_tools:
            try:
                enabled = json.loads(crew.enabled_tools)
                if isinstance(enabled, list) and enabled:
                    from src.tool_index import BUILTIN_TOOL_DESCRIPTIONS
                    all_tools = set(BUILTIN_TOOL_DESCRIPTIONS.keys())
-                    disabled_tools |= all_tools - set(enabled)
+                    disabled_tools = all_tools - set(enabled)
            except Exception:
                pass
        try:
            from src.settings import get_setting
            _global_disabled = get_setting("disabled_tools", [])
            if isinstance(_global_disabled, list):
                disabled_tools.update(_global_disabled)
            except Exception:
                pass
@@ -1460,10 +1390,10 @@ class TaskScheduler:
            tool_idx = get_tool_index()
            if tool_idx:
                rag_tools = tool_idx.get_tools_for_query(task.prompt or "", k=8)
-                relevant_tools = compose_task_relevant_tools(
+                relevant_tools = (rag_tools | ASSISTANT_ALWAYS_AVAILABLE)
-                    rag_tools, ASSISTANT_ALWAYS_AVAILABLE, disabled_tools
+                if disabled_tools:
-                )
+                    relevant_tools -= disabled_tools
-                logger.info(f"[assistant] RAG selected {len(rag_tools)} tools + {len(ASSISTANT_ALWAYS_AVAILABLE)} always-available + shell/file defaults = {len(relevant_tools)} total for '{task.name}'")
+                logger.info(f"[assistant] RAG selected {len(rag_tools)} tools + {len(ASSISTANT_ALWAYS_AVAILABLE)} always-available = {len(relevant_tools)} total for '{task.name}'")
        except Exception as e:
            logger.warning(f"[assistant] RAG tool selection failed, using all: {e}")
@@ -1471,23 +1401,17 @@ class TaskScheduler:
        try:
            result = await self._run_agent_loop(
                endpoint_url, model, task, session_id,
-                system_prompt=system_prompt, disabled_tools=disabled_tools or None,
+                system_prompt=system_prompt, disabled_tools=disabled_tools,
                relevant_tools=relevant_tools,
            )
        except Exception as e:
            logger.warning(f"Agent loop failed for task '{task.name}', falling back to simple call: {e}")
-            from src.task_endpoint import task_llm_call_async
+            from src.llm_core import llm_call_async
            messages = [
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": task.prompt},
            ]
-            result = await task_llm_call_async(
+            result = await llm_call_async(url=endpoint_url, model=model, messages=messages, timeout=120)
                messages,
                fallback_url=endpoint_url,
                fallback_model=model,
                owner=task.owner,
                timeout=120,
            )
        # Strip the model's chain-of-thought before saving/delivering. Task
        # output is LLM-only, so prose=True (which also removes untagged
@@ -1712,17 +1636,13 @@ class TaskScheduler:
        # Honor per-task max_steps (defense against runaway agent loops).
        # Falls back to 20 if not set — the historical default.
        _task_max_rounds = task.max_steps if task.max_steps and task.max_steps > 0 else 20
-        # Tasks are background workloads: use the shared task fallback chain
+        # Tasks are background workloads — they share the Utility model's
-        # behind the primary endpoint so a downed primary won't silently yield
+        # fallback chain (Settings → Utility Model → Fallbacks). A downed
-        # `(no output)`.
+        # primary endpoint won't silently yield `(no output)` — same recipe
        # chat uses but with the utility list (`utility_model_fallbacks`).
        try:
-            from src.task_endpoint import resolve_task_candidates
+            from src.endpoint_resolver import resolve_utility_fallback_candidates
-            _task_fallbacks = resolve_task_candidates(
+            _task_fallbacks = resolve_utility_fallback_candidates(owner=task.owner or None)
                fallback_url=endpoint_url,
                fallback_model=model,
                fallback_headers=headers,
                owner=task.owner or None,
            )[1:]
        except Exception:
            _task_fallbacks = []
        async for event_str in stream_agent_loop(
@@ -1759,22 +1679,21 @@ class TaskScheduler:
        # asking it to summarize what it did. Guarantees output.
        if not full_text.strip():
            try:
-                from src.task_endpoint import task_llm_call_async
+                from src.llm_core import llm_call_async_with_fallback
                from src.endpoint_resolver import resolve_utility_fallback_candidates
                grace_context = "You ran out of steps. "
                if tool_results:
                    grace_context += "Here's what your tools returned:\n" + "\n".join(tool_results[-5:])
                else:
                    grace_context += "No tool results were captured."
                grace_context += "\n\nSummarize what you accomplished and what's still pending. Be concise."
-                full_text = await task_llm_call_async(
+                _grace_candidates = [(endpoint_url, model, headers)] + resolve_utility_fallback_candidates(owner=task.owner or None)
                full_text = await llm_call_async_with_fallback(
                    _grace_candidates,
                    messages=[
                        {"role": "system", "content": system_content},
                        {"role": "user", "content": grace_context},
                    ],
                    fallback_url=endpoint_url,
                    fallback_model=model,
                    fallback_headers=headers,
                    owner=task.owner or None,
                    timeout=30,
                )
                full_text = (full_text or "").strip()
@@ -2283,7 +2202,7 @@ class TaskScheduler:
        # check-ins seeded, which then double-fire alongside the human user's
        # check-ins. This was the root cause of the duplicate 'Morning check-in'
        # rows we had to manually clean up.
-        if not owner or owner in RESERVED_USERNAMES:
+        if not owner or owner in {"internal-tool", "api", "demo", "system"}:
            logger.info(f"ensure_assistant_defaults: skip synthetic owner {owner!r}")
            return
        from core.database import SessionLocal, CrewMember, ScheduledTask
@@ -323,24 +323,6 @@ _MCP_TOOL_MAP = {
    "web_fetch":      ("web_fetch",  "web_fetch"),
    "generate_image": ("image_gen",  "generate_image"),
 }
 _EMAIL_MCP_OWNER_ARG = "_odysseus_owner"
 def _parse_qualified_mcp_args(tool: str, content: str) -> tuple[Dict, Optional[str]]:
    raw = (content or "").strip()
    if not raw:
        return {}, None
    try:
        parsed = json.loads(raw)
    except (json.JSONDecodeError, TypeError):
        if tool.startswith("mcp__email__"):
            return {}, "Email MCP tool arguments must be a JSON object."
        return {}, None
    if not isinstance(parsed, dict):
        if tool.startswith("mcp__email__"):
            return {}, "Email MCP tool arguments must be a JSON object."
        return {}, None
    return parsed, None
 def _parse_generate_image(content: str) -> Dict:
@@ -471,8 +453,6 @@ async def _direct_fallback(
    tool: str,
    content: str,
    progress_cb: Optional[Callable[[Dict], Awaitable[None]]] = None,
    session_id: Optional[str] = None,
    owner: Optional[str] = None,
 ) -> Optional[Dict]:
    _subproc_env = {
        **os.environ,
@@ -486,8 +466,6 @@ async def _direct_fallback(
        ctx = {
            "progress_cb": progress_cb,
            "subproc_env": _subproc_env,
            "session_id": session_id,
            "owner": owner,
        }
        from src.agent_tools import TOOL_HANDLERS
@@ -735,13 +713,10 @@ async def _execute_tool_block_impl(
            desc = f"bash (background): {short}"
            result = {
                "output": (
-                    f"Started background job `{rec['id']}`. It is running detached; "
+                    f"Started background job `{rec['id']}`. It is running detached — "
                    f"do NOT wait for it or poll it. You will be automatically re-invoked "
                    f"with its full output when it finishes. Continue with other work, or "
-                    f"end your turn now and resume when the result arrives. If the user "
+                    f"end your turn now and resume when the result arrives."
                    f"later asks to check progress or stop it, call the manage_bg_jobs "
                    f"tool yourself (output or kill); do not tell them to run a tool "
                    f"command, and do not surface raw tool syntax in your reply."
                ),
                "exit_code": 0,
                "bg_job_id": rec["id"],
@@ -762,11 +737,6 @@ async def _execute_tool_block_impl(
        desc = f"{tool}: {first_line}"
        result = await _direct_fallback(tool, content, progress_cb=progress_cb) \
            or {"error": f"{tool}: execution failed", "exit_code": 1}
    elif tool == "manage_bg_jobs":
        # Inspect/kill detached `bash` jobs; needs session_id to scope to chat.
        desc = f"manage_bg_jobs: {content.split(chr(10))[0][:80]}"
        result = await _direct_fallback(tool, content, session_id=session_id, owner=owner) \
            or {"error": "manage_bg_jobs: execution failed", "exit_code": 1}
    elif tool in ("create_document", "update_document", "edit_document",
                  "suggest_document", "manage_documents"):
        desc = f"{tool}: {content.split(chr(10))[0][:80]}"
@@ -778,24 +748,10 @@ async def _execute_tool_block_impl(
        query = content.split("\n")[0].strip()
        desc = f"search_chats: {query[:80]}"
        result = await do_search_chats(query, owner=owner)
-    elif tool in ("chat_with_model", "ask_teacher", "list_models"):
+    elif tool in ("chat_with_model", "create_session", "list_sessions",
-        # Migrated to the agent_tools registry (#3629): dispatched through
+                  "send_to_session", "pipeline",
-        # TOOL_HANDLERS with the owner/session ctx these tools need, instead
+                  "manage_session", "manage_memory", "list_models",
-        # of the legacy dispatch_ai_tool elif. The impls live in
+                  "ui_control", "ask_teacher"):
        # src/agent_tools/model_interaction_tools.py.
        first_line = content.split(chr(10))[0].strip()[:60]
        desc = f"{tool}: {first_line}" if first_line else tool
        result = await _document_tool_dispatch(tool, content, session_id, owner) \
            or {"error": f"{tool}: execution failed", "exit_code": 1}
    elif tool in ("create_session", "list_sessions", "send_to_session", "manage_session"):
        # Migrated to the agent_tools registry (#3629): dispatched through
        # TOOL_HANDLERS with the owner/session ctx these tools need. The impls
        # live in src/agent_tools/session_tools.py.
        first_line = content.split(chr(10))[0].strip()[:60]
        desc = f"{tool}: {first_line}" if first_line else tool
        result = await _document_tool_dispatch(tool, content, session_id, owner) \
            or {"error": f"{tool}: execution failed", "exit_code": 1}
    elif tool in ("pipeline", "manage_memory", "ui_control"):
        from src.ai_interaction import dispatch_ai_tool
        desc, result = await dispatch_ai_tool(tool, content, session_id, owner=owner)
    elif tool == "manage_tasks":
@@ -902,14 +858,11 @@ async def _execute_tool_block_impl(
        # MCP tool dispatch
        mcp = get_mcp_manager()
        if mcp:
            try:
                args = json.loads(content) if content.strip().startswith("{") else {}
            except (json.JSONDecodeError, TypeError):
                args = {}
            desc = f"mcp: {tool}"
            args, parse_error = _parse_qualified_mcp_args(tool, content)
            if parse_error:
                result = {"error": parse_error, "exit_code": 1}
            else:
                if tool.startswith("mcp__email__") and owner:
                    args = dict(args)
                    args[_EMAIL_MCP_OWNER_ARG] = owner
            result = await mcp.call_tool(tool, args)
        else:
            desc = f"mcp: {tool}"
@@ -12,24 +12,12 @@ import os
 import re
 from typing import Any, Dict, List, Optional
 from fastapi import HTTPException
 from src.constants import MAX_READ_CHARS, DEEP_RESEARCH_DIR, VAULT_FILE
 from src.tool_utils import get_mcp_manager
 from core.constants import internal_api_base
 from routes._validators import validate_remote_host, validate_ssh_port
 logger = logging.getLogger(__name__)
 def _string_arg(value: Any) -> str:
    return "" if value is None else str(value).strip()
 def _validate_cookbook_ssh_target(remote_host: Any, ssh_port: Any = "") -> tuple[str, str]:
    remote = validate_remote_host(_string_arg(remote_host) or None) or ""
    sport = validate_ssh_port(_string_arg(ssh_port) or None) or ""
    return remote, sport
 # ---------------------------------------------------------------------------
 # Active email state
 # ---------------------------------------------------------------------------
@@ -657,137 +645,6 @@ async def do_manage_endpoints(content: str, owner: Optional[str] = None) -> Dict
 # MCP server management tool
 # ---------------------------------------------------------------------------
 # Parallel to routes/cookbook_helpers._validate_serve_cmd but deliberately the
 # opposite policy: that gate guards an admin-only serve command and allows
 # interpreters (python3/etc) because model-serving needs them, whereas this is
 # the model/prompt-injection-reachable manage_mcp path, so interpreters and
 # runners are denied here.
 #
 # Commands that can execute arbitrary code regardless of their arguments. These
 # are NEVER accepted on the manage_mcp agent path, even if an operator lists one
 # in ODYSSEUS_MCP_ALLOWED_COMMANDS -- a stdio server that genuinely needs an
 # interpreter or package runner must be registered via the trusted admin route.
 _MCP_DENIED_COMMANDS = frozenset({
    "sh", "bash", "zsh", "fish", "dash", "ksh", "csh", "tcsh", "ash", "busybox",
    "cmd", "command.com", "powershell", "pwsh",
    "python", "pypy", "node", "nodejs", "deno", "bun", "ruby", "jruby",
    "perl", "raku", "php", "lua", "luajit", "tclsh", "wish", "expect", "rscript",
    "groovy", "scala", "elixir", "erl", "iex", "java", "javac", "jshell", "jbang",
    "kotlin", "kotlinc", "dotnet", "mono", "swift", "osascript", "tsx", "ts-node",
    "npx", "bunx", "uvx", "pipx", "npm", "pnpm", "yarn", "pip", "uv",
    "gem", "cargo", "go", "bundle", "poetry", "conda", "mamba", "brew",
    "apt", "apt-get", "yum", "dnf", "pacman", "apk",
    "env", "xargs", "nohup", "setsid", "nice", "ionice", "time", "timeout",
    "watch", "stdbuf", "unbuffer", "script", "ssh", "scp", "sshpass", "sudo",
    "doas", "su", "make", "cmake", "docker", "podman", "kubectl", "find",
    "awk", "gawk", "sed", "vi", "vim", "nvim", "emacs", "ed", "tee", "eval",
 })
 # Argv flags that make even an allowlisted binary execute inline code. Matched
 # by prefix so glued forms (-cimport os, --eval=...) are caught, not just the
 # exact-token form.
 _MCP_CODE_EXEC_SHORT_FLAGS = ("-c", "-e", "-m")
 _MCP_CODE_EXEC_LONG_FLAGS = ("--eval", "--exec", "--print", "--module", "--command", "--require")
 _MCP_URL_SCHEMES = ("http://", "https://", "ftp://", "ftps://", "file://", "data:", "jar:", "blob:")
 # Shell metacharacters refused in command/args. Args are passed as an argv list
 # (no shell), but refusing these keeps the surface narrow and obvious.
 _MCP_SHELL_METACHARS = set(";|&$`><\n\r")
 # Env vars that let a child process load attacker-supplied code before main().
 _MCP_DANGEROUS_ENV = frozenset({
    "LD_PRELOAD", "LD_LIBRARY_PATH", "LD_AUDIT", "DYLD_INSERT_LIBRARIES",
    "DYLD_LIBRARY_PATH", "DYLD_FRAMEWORK_PATH", "PYTHONPATH", "PYTHONSTARTUP",
    "PYTHONHOME", "PYTHONEXECUTABLE", "NODE_OPTIONS", "NODE_PATH", "BASH_ENV",
    "ENV", "SHELLOPTS", "PERL5LIB", "PERL5OPT", "RUBYOPT", "RUBYLIB", "GEM_PATH",
    "R_PROFILE", "R_HOME", "PATH", "IFS", "PROMPT_COMMAND",
 })
 def _mcp_allowed_commands() -> set:
    """Operator-configured allowlist of safe MCP launcher basenames for the agent
    path. Empty by default; set ODYSSEUS_MCP_ALLOWED_COMMANDS (comma-separated)
    to opt specific trusted binaries in. Denied commands are rejected even if
    listed here."""
    raw = os.environ.get("ODYSSEUS_MCP_ALLOWED_COMMANDS", "")
    return {c.strip().lower() for c in raw.split(",") if c.strip()}
 def _validate_mcp_command(command, args, env) -> Optional[str]:
    """Validate a model-supplied stdio MCP registration. Returns an error string
    if it must be rejected, else None.
    Closes the RCE where manage_mcp 'add' passed prompt-injection-controlled
    command/args/env straight to a subprocess spawn (issue #438): a payload
    smuggled into a skill description, memory entry, fetched page, or email body
    could register a stdio server running arbitrary code as the app UID.
    """
    if not isinstance(command, str) or not command.strip():
        return "command must be a non-empty string"
    command = command.strip()
    if "/" in command or "\\" in command:
        return "command must be a bare executable name, not a path"
    if any(ch in _MCP_SHELL_METACHARS for ch in command):
        return "command contains shell metacharacters"
    base = command.lower()
    if base.endswith(".exe") or base.endswith(".cmd") or base.endswith(".bat"):
        base = base.rsplit(".", 1)[0]
    # Canonicalize a trailing version suffix so versioned aliases collapse to the
    # family name (python3.11 -> python, node18 -> node, pip3 -> pip); both the
    # raw basename and the canonical form are denied, so an operator cannot
    # accidentally allowlist a runtime alias back into the path.
    canon = re.sub(r"[-_.]?\d+(?:\.\d+)*$", "", base)
    if base in _MCP_DENIED_COMMANDS or canon in _MCP_DENIED_COMMANDS:
        return (
            f"command '{command}' is not allowed on the agent MCP path: "
            "interpreters, runtimes, package runners, and shells can execute "
            "arbitrary code. Register such a server via the admin route instead."
        )
    if base not in _mcp_allowed_commands():
        return (
            f"command '{command}' is not in the MCP allowlist. Add it to "
            "ODYSSEUS_MCP_ALLOWED_COMMANDS if you trust it, or register the "
            "server via the admin route."
        )
    if args is not None:
        if isinstance(args, str):
            try:
                args = json.loads(args)
            except Exception:
                return "args must be a JSON list"
        if not isinstance(args, list):
            return "args must be a list"
        for a in args:
            if not isinstance(a, str):
                return "args must all be strings"
            s = a.strip()
            low = s.lower()
            if any(s == f or s.startswith(f) for f in _MCP_CODE_EXEC_SHORT_FLAGS):
                return f"arg '{a}' is a code-execution flag and is not allowed"
            if any(low == f or low.startswith(f + "=") for f in _MCP_CODE_EXEC_LONG_FLAGS):
                return f"arg '{a}' is a code-execution flag and is not allowed"
            if any(low.startswith(u) for u in _MCP_URL_SCHEMES):
                return f"arg '{a}' is a remote URL and is not allowed"
            if any(ch in _MCP_SHELL_METACHARS for ch in a):
                return f"arg '{a}' contains shell metacharacters"
    if env:
        if isinstance(env, str):
            try:
                env = json.loads(env)
            except Exception:
                return "env must be a JSON object"
        if not isinstance(env, dict):
            return "env must be an object"
        for k in env:
            if str(k).strip().upper() in _MCP_DANGEROUS_ENV:
                return f"env var '{k}' can inject code into the child process and is not allowed"
    return None
 async def do_manage_mcp(content: str, owner: Optional[str] = None) -> Dict:
    """Manage MCP servers: list, add, delete, enable, disable, reconnect."""
    try:
@@ -827,12 +684,6 @@ async def do_manage_mcp(content: str, owner: Optional[str] = None) -> Dict:
        env = args.get("env", {})
        if not name or not command:
            return {"error": "name and command are required", "exit_code": 1}
        # Validate BEFORE any DB write or spawn: a rejected registration must
        # leave no enabled row (which would otherwise auto-reconnect on restart)
        # and must not attempt a connection.
        _mcp_err = _validate_mcp_command(command, cmd_args, env)
        if _mcp_err:
            return {"error": f"manage_mcp: refused unsafe server registration: {_mcp_err}", "exit_code": 1}
        sid = str(_uuid.uuid4())[:8]
        db = SessionLocal()
        try:
@@ -1268,8 +1119,8 @@ async def do_manage_settings(content: str, owner: Optional[str] = None) -> Dict:
            _ALIASES = {
                "shell": ["bash"],
                "terminal": ["bash"],
-                "search": ["web_search", "web_fetch"],
+                "search": ["web_search"],
-                "web": ["web_search", "web_fetch"],
+                "web": ["web_search"],
                "browser": ["builtin_browser"],
                "documents": ["create_document", "edit_document", "update_document", "suggest_document"],
                "doc": ["create_document", "edit_document", "update_document", "suggest_document"],
@@ -1281,7 +1132,7 @@ async def do_manage_settings(content: str, owner: Optional[str] = None) -> Dict:
                "notes": ["manage_notes"],
                "calendar": ["manage_calendar"],
                "email": ["mcp__email__list_emails", "mcp__email__read_email", "mcp__email__send_email"],
-                "research": ["web_search", "web_fetch"],  # research is a per-request flag, not a tool — closest analog
+                "research": ["web_search"],  # research is a per-request flag, not a tool — closest analog
            }
            if action == "list_tools":
@@ -2863,25 +2714,13 @@ async def do_serve_model(content: str, owner: Optional[str] = None) -> Dict:
                endpoint_added=endpoint_added, endpoint_id=endpoint_id or "",
            )
            note = "" if registered else " (state-write failed — task may not show in UI)"
            where = host or "local"
            log_path = f"/tmp/odysseus-tmux/{sid}.log"
            return {
-                "output": (
+                "output": f"Serving {repo_id} (session: {sid}){note}",
                    f"Serving {repo_id} on {where} (session: {sid}){note}\n"
                    f"Next required check: call list_served_models. If this task is not ready, "
                    f"call tail_serve_output with session_id={sid} and tail=400 before answering. "
                    f"Do not tell the user to check logs; you have the log tool."
                ),
                "session_id": sid,
                "task_type": "serve",
                "phase": "running",
                "host": host,
                "endpoint_id": endpoint_id,
                "log_path": log_path,
                "next_tools": [
                    {"name": "list_served_models", "arguments": {}},
                    {"name": "tail_serve_output", "arguments": {"session_id": sid, "tail": 400}},
                ],
                "exit_code": 0,
            }
        # FastAPI HTTPException puts the message under `detail`, not `error`.
@@ -3049,10 +2888,6 @@ async def _cookbook_kill_session(session_id: str, *, remote_host: str = "",
            break
    if remote:
        try:
            remote, sport = _validate_cookbook_ssh_target(remote, sport)
        except HTTPException as e:
            return {"error": str(getattr(e, "detail", e)), "exit_code": 1}
        _pf = f"-p {shlex.quote(str(sport))} " if sport and str(sport) != "22" else ""
        cmd = (
            f"ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "
@@ -3141,8 +2976,8 @@ async def do_tail_serve_output(content: str, owner: Optional[str] = None) -> Dic
        tail = 400
    tail = max(20, min(tail, 4000))
    headers = _internal_headers()
-    remote = _string_arg(args.get("remote_host") or args.get("host"))
+    remote = (args.get("remote_host") or args.get("host") or "").strip()
-    sport = _string_arg(args.get("ssh_port"))
+    sport = (args.get("ssh_port") or "").strip()
    # Resolve host from cookbook state if caller didn't pass one — same
    # lookup _cookbook_kill_session uses.
    if not remote:
@@ -3160,12 +2995,6 @@ async def do_tail_serve_output(content: str, owner: Optional[str] = None) -> Dic
                    if not sport:
                        sport = t.get("sshPort") or ""
                    break
    if remote:
        try:
            remote, sport = _validate_cookbook_ssh_target(remote, sport)
        except HTTPException as e:
            return {"error": str(getattr(e, "detail", e)), "exit_code": 1}
    # Prefer the persisted /tmp/odysseus-tmux/SESSION.log file over the
    # live tmux pane. The pane is what the user would see scrolling on
    # their screen — including the post-crash neofetch banner and the
@@ -3228,17 +3057,8 @@ async def do_tail_serve_output(content: str, owner: Optional[str] = None) -> Dic
        MAX_CHARS = 8000
        if len(output_text) > MAX_CHARS:
            output_text = "…(earlier output truncated)…\n" + output_text[-MAX_CHARS:]
        if not output_text:
            output_text = (
                f"No log output captured yet for {session_id} on {host_label}. "
                "This usually means the tmux wrapper has started but the model process "
                "has not printed anything yet. Do not stop here: call list_served_models "
                "again to check whether it is still loading, ready, or crashed; if it is "
                "still not ready, call tail_serve_output again with a larger tail after "
                "the next status check."
            )
        return {
-            "output": output_text,
+            "output": output_text or "(empty pane)",
            "session_id": session_id,
            "host": host_label,
            "tail_lines": tail,
@@ -3352,7 +3172,7 @@ async def do_adopt_served_model(content: str, owner: Optional[str] = None) -> Di
    except ValueError:
        return {"error": "Invalid JSON arguments", "exit_code": 1}
-    host = _string_arg(args.get("host") or args.get("remote_host"))
+    host = (args.get("host") or args.get("remote_host") or "").strip()
    sess = (args.get("tmux_session") or args.get("session_id") or "").strip()
    model = (args.get("model") or args.get("repo_id") or "").strip()
    port = args.get("port") or 8000
@@ -3363,12 +3183,6 @@ async def do_adopt_served_model(content: str, owner: Optional[str] = None) -> Di
        return {"error": "tmux_session and model are required", "exit_code": 1}
    # Verify tmux session exists on the target host
    if host:
        try:
            host, _ = _validate_cookbook_ssh_target(host)
        except HTTPException as e:
            return {"error": str(getattr(e, "detail", e)), "exit_code": 1}
    headers = _internal_headers()
    if host:
        check = f"ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no {shlex.quote(host)} 'tmux has-session -t {shlex.quote(sess)} 2>&1'"
@@ -3983,7 +3797,7 @@ async def do_resolve_contact(content: str, owner: Optional[str] = None) -> Dict:
    if not name:
        return {"error": "name is required", "exit_code": 1}
-    contacts = {}  # email_or_phone -> {name, source, phone?}
+    contacts = {}  # email -> {name, source}
    # 1. CardDAV (Radicale) — structured contacts. Call in-process: a
    # server-side httpx GET to /api/contacts/search carries no session
@@ -3998,18 +3812,10 @@ async def do_resolve_contact(content: str, owner: Optional[str] = None) -> Dict:
            match = q in hay_name or any(q in (e or "").lower() for e in c.get("emails", []))
            if not match:
                continue
            has_email = False
            for email in (c.get("emails") or []):
                email = (email or "").strip().lower()
                if email and "@" in email:
                    contacts[email] = {"name": c.get("name") or email, "source": "contacts"}
                    has_email = True
            # Fall back to phone numbers when the contact has no email address
            if not has_email:
                for phone in (c.get("phones") or []):
                    phone = (phone or "").strip()
                    if phone:
                        contacts[phone] = {"name": c.get("name") or phone, "source": "contacts", "phone": phone}
    except Exception:
        pass
@@ -4029,11 +3835,8 @@ async def do_resolve_contact(content: str, owner: Optional[str] = None) -> Dict:
        return {"output": f"No contacts found matching '{name}'.", "exit_code": 0}
    lines = [f"Contacts matching '{name}':"]
-    for key, info in contacts.items():
+    for email, info in contacts.items():
-        if info.get("phone"):
+        lines.append(f"- {info['name']} <{email}> ({info['source']})")
            lines.append(f"- {info['name']} — phone: {info['phone']} ({info['source']})")
        else:
            lines.append(f"- {info['name']} <{key}> ({info['source']})")
    return {"output": "\n".join(lines), "exit_code": 0}
@@ -94,7 +94,6 @@ BUILTIN_TOOL_DESCRIPTIONS: Dict[str, str] = {
    "manage_endpoints": "Endpoint management: list, add, delete, enable, or disable model API endpoints.",
    "manage_mcp": "MCP server management: list, add, delete, reconnect servers, or list available tools.",
    "manage_webhooks": "Webhook management: list, add, delete, enable, or disable webhooks.",
    "api_call": "Call a configured API integration by name (Home Assistant, Miniflux, Gitea, Linkding, Jellyfin, RSS reader, git forge, bookmark manager, smart home, or any other registered service). Make a GET/POST/PUT/PATCH/DELETE request to the integration's endpoint path, with an optional JSON body. Use whenever the user asks to query or control one of their connected integrations/services.",
    "manage_tokens": "API token management: list, create, or delete API access tokens.",
    "manage_documents": "List, read, delete, or tidy documents in the editor panel. action='list' returns clickable rows (most-recent first) so the user can open any doc by clicking. action='read' (aka view/open/get) with document_id returns the content; supports offset=<N> + limit=<N> to page through large docs (response includes next_offset when more remains, so you can keep calling with offset=next_offset). action='delete' with document_id removes a doc (only way to delete). Use this for ANY 'show/read/list/open my documents/docs/files/notes' request — never shell or curl.",
    "manage_research": "List, read/open, or delete saved DEEP RESEARCH results from the Library. action='list' returns clickable [query](#research-<id>) rows (most-recent first). action='read' (aka open/view/get) with id returns the report + sources. action='delete' with id removes it. Use this for ANY 'open/read/find/delete my research / that report / the research on X' request. NOTE: this is for EXISTING research; to START new research use trigger_research.",
@@ -135,7 +134,6 @@ BUILTIN_TOOL_DESCRIPTIONS: Dict[str, str] = {
    "app_api": "Generic loopback to allowed Odysseus internal endpoints. Use this when the user wants something the UI can do but there's no named tool for it. Covers calendar, gallery, library/documents, memory, notes, tasks, settings, research, compare, cookbook GPUs/state — allowed UI buttons hit /api/* endpoints and you can hit them too. Sensitive auth/user/admin/shell paths and host-control Cookbook mutation routes are blocked; do NOT use app_api for shell commands, package installs, engine rebuilds, or PID signalling. Use named command tooling for shell commands. action='endpoints' with filter=<keyword> lists available endpoints. action='call' takes method+path+body. Hits same routes the UI uses — auth flows free. NOTE: themes are NOT an API endpoint — use the ui_control tool (create_theme / set_theme), not app_api. SESSIONS/CHATS: do NOT use app_api for these — GET /api/sessions returns EMPTY for tool calls (it's owner-filtered and tool calls authenticate as a different identity). EMAIL ACCOUNTS: do NOT use /api/email/accounts via app_api; use list_email_accounts, list_emails, and read_email instead. To list/rename/archive/delete/fork chats use the list_sessions and manage_session tools instead.",
    "edit_image": "Edit an image in the gallery: upscale (increase resolution), remove background (rembg), inpaint (fill selected area), or harmonize (blend edits). Specify image ID and action.",
    "trigger_research": "Start a deep research job on any topic — appears in the Deep Research sidebar, streams progress, produces a detailed report. Use for 'research X', 'look into Y', 'do deep research on Z', 'investigate'. NOT a scheduled task — it runs now and surfaces in the sidebar.",
    "manage_bg_jobs": "Inspect and control detached background `bash` jobs (the ones started with a `#!bg` marker). action='list' shows this chat's jobs (id/status/age/command); action='output' returns a job's captured output so far (check on a long-running job, or re-read a finished one); action='kill' stops a runaway job by id. Use for 'is the background job done', 'check on that job', 'show the build output', 'kill the background job', 'stop the bg task'. output/kill need a job_id from list.",
 }
@@ -350,12 +348,6 @@ class ToolIndex:
            {"list_email_accounts", "list_emails", "read_email", "send_email", "reply_to_email", "bulk_email", "delete_email", "archive_email", "mark_email_read", "resolve_contact", "ui_control"},
        frozenset({"calendar", "event", "meeting", "schedule", "appointment"}):
            {"manage_calendar"},
        # Detached background `bash` jobs (#!bg): check on / read output / kill.
        frozenset({"background job", "background jobs", "bg job", "bg jobs",
                   "background task", "is the job done", "check the job",
                   "check on that job", "job output", "kill the job",
                   "kill the background", "stop the background", "running job"}):
            {"manage_bg_jobs"},
        frozenset({"note", "todo", "reminder", "remind", "checklist", "remember to"}):
            {"manage_notes"},
        # Chat/session management. "rename" alone maps to documents below, so a
@@ -422,14 +414,6 @@ class ToolIndex:
                   "my settings", "change setting", "change a setting", "set setting",
                   "preference", "preferences", "configure"}):
            {"manage_settings", "ui_control"},
        # API-integration intent → the api_call tool. Mirrors the agent-loop
        # "integrations" domain so api_call still surfaces on the retrieval and
        # keyword-fallback paths (not just the deterministic domain seed) when a
        # user names a connected service.
        frozenset({"api_call", "api call", "integration", "integrations",
                   "home assistant", "homeassistant", "miniflux", "gitea",
                   "linkding", "jellyfin"}):
            {"api_call"},
        # Managing EXISTING research in the Library — open/read/find/delete.
        frozenset({"my research", "the research", "research on", "open research",
                   "read research", "find research", "delete research",
@@ -39,10 +39,6 @@ _XML_TOOL_CALL_RE = re.compile(
    r"<(?:[\w]+:)?(?:tool_call|function_call)>\s*([\s\S]*?)</(?:[\w]+:)?(?:tool_call|function_call)>",
    re.IGNORECASE,
 )
 _XML_OPEN_TOOL_CALL_RE = re.compile(
    r"<(?:[\w]+:)?(?:tool_call|function_call)>\s*([\s\S]*)\Z",
    re.IGNORECASE,
 )
 _XML_INVOKE_RE = re.compile(
    r'<invoke\s+name=["\'](\w+)["\']>\s*([\s\S]*?)</invoke>',
    re.IGNORECASE,
@@ -51,21 +47,6 @@ _XML_PARAM_RE = re.compile(
    r'<parameter\s+name=["\'](\w+)["\']>([\s\S]*?)</parameter>',
    re.IGNORECASE,
 )
 _XML_DIRECT_TOOL_RE = re.compile(
    r"<\s*([A-Za-z_][\w-]*)\s*>([\s\S]*?)</\s*\1\s*>",
    re.IGNORECASE,
 )
 # Pattern 3b: StepFun Step-3.x native tool-call tokens. The tokenizer defines:
 #   <｜tool▁calls▁begin｜> ... <｜tool▁calls▁end｜>
 #   <｜tool▁call▁begin｜>tool_name<｜tool▁sep｜>{...}<｜tool▁call▁end｜>
 # These can leak as text through llama.cpp/Ollama-style endpoints when the
 # engine does not return structured OpenAI tool_calls.
 _STEPFUN_CALL_BEGIN = "<｜tool▁call▁begin｜>"
 _STEPFUN_CALL_SEP = "<｜tool▁sep｜>"
 _STEPFUN_CALL_END = "<｜tool▁call▁end｜>"
 _STEPFUN_CALLS_BEGIN = "<｜tool▁calls▁begin｜>"
 _STEPFUN_CALLS_END = "<｜tool▁calls▁end｜>"
 # Pattern 4: <tool_code> blocks (MiniMax-M2.5 style)
 # {tool => 'tool_name', args => '<param>value</param>'}
@@ -194,9 +175,6 @@ _TOOL_NAME_MAP = {
    "notes": "manage_notes",
    "todo": "manage_notes",
    "todos": "manage_notes",
    "manage_bg_jobs": "manage_bg_jobs",
    "bg_jobs": "manage_bg_jobs",
    "background_jobs": "manage_bg_jobs",
 }
 _MISFENCED_WEB_TOOL_NAMES = {
@@ -465,138 +443,6 @@ def _parse_xml_invoke(inv_match) -> Optional[ToolBlock]:
    return function_call_to_tool_block(tool_name, json.dumps(params))
 def _parse_xml_direct_tool(tool_match) -> Optional[ToolBlock]:
    """Parse direct XML tool tags inside <tool_call>.
    Some local models emit:
      <tool_call><web_search>query</web_search></tool_call>
    instead of the invoke/parameter shape:
      <tool_call><invoke name="web_search"><parameter name="query">query</parameter></invoke></tool_call>
    Keep this as an adapter to the canonical function-call converter so aliases
    and per-tool argument formatting stay in one place.
    """
    tool_name = tool_match.group(1).lower().replace("-", "_")
    if tool_name in {"invoke", "parameter", "tool_call", "function_call"}:
        return None
    mapped = _TOOL_NAME_MAP.get(tool_name) or (tool_name if tool_name in TOOL_TAGS else None)
    if not mapped:
        return None
    body = tool_match.group(2).strip()
    if not body:
        return None
    try:
        params = json.loads(body)
        if not isinstance(params, dict):
            params = {}
    except json.JSONDecodeError:
        if mapped == "web_search":
            params = {"query": body}
        elif mapped == "web_fetch":
            params = {"url": body}
        elif mapped == "bash":
            params = {"command": body}
        elif mapped == "python":
            params = {"code": body}
        elif mapped in ("read_file", "write_file"):
            params = {"path": body}
        else:
            params = {"content": body}
    from src.tool_schemas import function_call_to_tool_block
    return function_call_to_tool_block(mapped, json.dumps(params))
 def _iter_stepfun_tool_calls(text: str):
    """Yield StepFun native tool-call token bodies without regex backtracking."""
    pos = 0
    while True:
        start = text.find(_STEPFUN_CALL_BEGIN, pos)
        if start < 0:
            return
        name_start = start + len(_STEPFUN_CALL_BEGIN)
        sep = text.find(_STEPFUN_CALL_SEP, name_start)
        if sep < 0:
            return
        end = text.find(_STEPFUN_CALL_END, sep + len(_STEPFUN_CALL_SEP))
        if end < 0:
            return
        raw_name = text[name_start:sep].strip()
        body = text[sep + len(_STEPFUN_CALL_SEP):end].strip()
        if raw_name and len(raw_name) <= 128:
            yield raw_name, body
        pos = end + len(_STEPFUN_CALL_END)
 def _strip_stepfun_tool_markup(text: str) -> str:
    """Remove StepFun tool-call token blocks and wrappers using literal scans."""
    out = []
    pos = 0
    while True:
        start = text.find(_STEPFUN_CALL_BEGIN, pos)
        if start < 0:
            out.append(text[pos:])
            break
        end = text.find(_STEPFUN_CALL_END, start + len(_STEPFUN_CALL_BEGIN))
        if end < 0:
            out.append(text[pos:])
            break
        out.append(text[pos:start])
        pos = end + len(_STEPFUN_CALL_END)
    cleaned = "".join(out)
    return cleaned.replace(_STEPFUN_CALLS_BEGIN, "").replace(_STEPFUN_CALLS_END, "")
 def _strip_bare_invoke_markup(text: str) -> str:
    """Remove bare <invoke ...>...</invoke> blocks without regex backtracking."""
    out = []
    pos = 0
    while True:
        start = text.lower().find("<invoke", pos)
        if start < 0:
            out.append(text[pos:])
            break
        tag_end = text.find(">", start)
        if tag_end < 0:
            out.append(text[pos:])
            break
        close = text.lower().find("</invoke>", tag_end + 1)
        if close < 0:
            out.append(text[pos:])
            break
        out.append(text[pos:start])
        pos = close + len("</invoke>")
    return "".join(out)
 def _parse_stepfun_tool_call(tool_name: str, body: str) -> Optional[ToolBlock]:
    """Parse StepFun native tool-call tokens into an Odysseus ToolBlock."""
    tool_name = tool_name.lower().replace("-", "_").replace(".", "_")
    mapped = _TOOL_NAME_MAP.get(tool_name) or (tool_name if tool_name in TOOL_TAGS else None)
    if not mapped:
        return None
    body = (body or "").strip()
    if not body:
        return None
    try:
        params = json.loads(body)
        if not isinstance(params, dict):
            params = {}
    except json.JSONDecodeError:
        if mapped == "web_search":
            params = {"query": body}
        elif mapped == "web_fetch":
            params = {"url": body}
        elif mapped == "bash":
            params = {"command": body}
        elif mapped == "python":
            params = {"code": body}
        elif mapped in ("read_file", "write_file"):
            params = {"path": body}
        else:
            params = {"content": body}
    from src.tool_schemas import function_call_to_tool_block
    return function_call_to_tool_block(mapped, json.dumps(params))
 def _parse_tool_code_block(raw: str) -> Optional[ToolBlock]:
    """Parse a <tool_code>{tool => 'name', args => '...'}</tool_code> block (MiniMax style)."""
    # Extract tool name
@@ -662,9 +508,8 @@ def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]:
    2. [TOOL_CALL] ... [/TOOL_CALL] blocks (some models)
    3. XML-style <tool_call>/<invoke> blocks
    4. <tool_code> blocks (MiniMax-M2.5 style)
-    5. StepFun Step-3 native <｜tool▁call▁begin｜> tokens
+    5. DeepSeek DSML markup (normalized to <invoke> first)
-    6. DeepSeek DSML markup (normalized to <invoke> first)
+    6. Non-native local model fallback: prose mentioning web_search followed by
    7. Non-native local model fallback: prose mentioning web_search followed by
       bare JSON args, e.g. {"query":"...", "time_filter":"week"}
    `skip_fenced`: when True, Pattern 1 (fenced ```bash/```python/```json code
@@ -719,38 +564,12 @@ def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]:
    # Pattern 3: XML-style <tool_call>/<invoke> blocks
    if not blocks:
        for tool_name, body in _iter_stepfun_tool_calls(text):
            block = _parse_stepfun_tool_call(tool_name, body)
            if block:
                blocks.append(block)
        if blocks:
            return blocks
        # Try wrapped: <tool_call><invoke ...>...</invoke></tool_call>
        for m in _XML_TOOL_CALL_RE.finditer(text):
            for inv in _XML_INVOKE_RE.finditer(m.group(1)):
                block = _parse_xml_invoke(inv)
                if block:
                    blocks.append(block)
            if not blocks:
                for direct in _XML_DIRECT_TOOL_RE.finditer(m.group(1)):
                    block = _parse_xml_direct_tool(direct)
                    if block:
                        blocks.append(block)
        # Some local models stream an opening <tool_call> wrapper and a
        # complete inner tool tag, but forget the closing </tool_call>.
        if not blocks:
            for m in _XML_OPEN_TOOL_CALL_RE.finditer(text):
                body = m.group(1)
                for inv in _XML_INVOKE_RE.finditer(body):
                    block = _parse_xml_invoke(inv)
                    if block:
                        blocks.append(block)
                if blocks:
                    break
                for direct in _XML_DIRECT_TOOL_RE.finditer(body):
                    block = _parse_xml_direct_tool(direct)
                    if block:
                        blocks.append(block)
        # Try bare <invoke> without wrapper
        if not blocks:
            for inv in _XML_INVOKE_RE.finditer(text):
@@ -792,9 +611,7 @@ def strip_tool_blocks(text: str, skip_fenced: bool = False) -> str:
    text = _normalize_dsml(text)
    cleaned = text if skip_fenced else _TOOL_BLOCK_RE.sub('', text)
    cleaned = _TOOL_CALL_RE.sub('', cleaned)
    cleaned = _strip_stepfun_tool_markup(cleaned)
    cleaned = _XML_TOOL_CALL_RE.sub('', cleaned)
    cleaned = _XML_OPEN_TOOL_CALL_RE.sub('', cleaned)
    cleaned = _TOOL_CODE_RE.sub('', cleaned)
    if not skip_fenced:
        raw_web_json = _parse_raw_web_json_lookup(cleaned)
@@ -802,6 +619,6 @@ def strip_tool_blocks(text: str, skip_fenced: bool = False) -> str:
            _, (start, end) = raw_web_json
            cleaned = cleaned[:start] + cleaned[end:]
    # Strip bare <invoke> blocks not wrapped in <tool_call>
-    cleaned = _strip_bare_invoke_markup(cleaned)
+    cleaned = re.sub(r'<invoke\s+name=["\'].*?</invoke>', '', cleaned, flags=re.DOTALL | re.IGNORECASE)
    cleaned = re.sub(r'\n{3,}', '\n\n', cleaned)
    return cleaned.strip()
@@ -68,12 +68,11 @@ FUNCTION_TOOL_SCHEMAS = [
        "type": "function",
        "function": {
            "name": "web_fetch",
-            "description": "Fetch and read the text content of a specific URL the user names (e.g. 'check example.com', 'what's on this page <url>'). Use when you already have a concrete URL/domain. NOT for open-ended searches (use web_search) or 'research X' jobs (use trigger_research). Downloads are size-budgeted; a '[partial content: ...]' notice in the result means the body was cut short and you can re-call with full=true for the rest.",
+            "description": "Fetch and read the text content of a specific URL the user names (e.g. 'check example.com', 'what's on this page <url>'). Use when you already have a concrete URL/domain. NOT for open-ended searches (use web_search) or 'research X' jobs (use trigger_research).",
            "parameters": {
                "type": "object",
                "properties": {
-                    "url": {"type": "string", "description": "The URL or domain to fetch (http/https; a bare domain like example.com is fine)"},
+                    "url": {"type": "string", "description": "The URL or domain to fetch (http/https; a bare domain like example.com is fine)"}
                    "full": {"type": "boolean", "description": "Raise the download budget to the hard cap for large pages/files. Use only after a result reported partial content."}
                },
                "required": ["url"]
            }
@@ -1009,7 +1008,7 @@ FUNCTION_TOOL_SCHEMAS = [
        "type": "function",
        "function": {
            "name": "resolve_contact",
-            "description": "Look up a contact by name. Searches CardDAV address book and sent email history. Returns email addresses (when available) or phone numbers. Use when the user says 'message [name]', 'email [name]', or asks for someone's contact details.",
+            "description": "Look up a contact's email address by name. Searches CardDAV address book and sent email history. Use when the user says 'message [name]' or 'email [name]' without an email address.",
            "parameters": {
                "type": "object",
                "properties": {
@@ -1188,21 +1187,6 @@ FUNCTION_TOOL_SCHEMAS = [
            }
        }
    },
    {
        "type": "function",
        "function": {
            "name": "manage_bg_jobs",
            "description": "Inspect and control detached background `bash` jobs (started with the `#!bg` marker). action='list' shows this chat's jobs with id/status/age/command; action='output' returns a job's captured output so far (use for a still-running job, or to re-read a finished one); action='kill' terminates a runaway job's process tree instead of waiting out its max-runtime. output and kill need job_id from list.",
            "parameters": {
                "type": "object",
                "properties": {
                    "action": {"type": "string", "enum": ["list", "output", "kill"], "description": "list | output | kill (default: list)"},
                    "job_id": {"type": "string", "description": "Background job id (required for output/kill; from action='list')"},
                },
                "required": ["action"]
            }
        }
    },
 ]
@@ -1221,26 +1205,23 @@ def function_call_to_tool_block(name: str, arguments: str) -> Optional[ToolBlock
        logger.error(f"Failed to parse function call arguments for {name}: {arguments}")
        return None
    tool_type = _TOOL_NAME_MAP.get(name, name)
    _BUILTIN_EMAIL_TOOLS = {"list_email_accounts", "send_email", "list_emails", "read_email", "reply_to_email",
                            "archive_email", "delete_email", "mark_email_read", "bulk_email", "download_attachment"}
    # Some models emit valid JSON that isn't an object (e.g. a bare array
-    # ["ls -la"], string, or number) as function arguments. Most local tools keep
+    # ["ls -la"], string, or number) as the function arguments. Every branch
-    # the legacy empty-object coercion for stream robustness, but email MCP tools
+    # below assumes a dict and calls args.get(...), so a non-dict would raise
-    # must fail closed so a malformed call cannot read the default mailbox.
+    # AttributeError and abort the whole agent stream. Coerce to {} instead.
    if not isinstance(args, dict):
        if tool_type.startswith("mcp__email__") or name in _BUILTIN_EMAIL_TOOLS:
            logger.warning(f"Non-object email function call arguments for {name}: {args!r}; rejecting")
            return None
        logger.warning(f"Non-object function call arguments for {name}: {args!r}; treating as empty")
        args = {}
    tool_type = _TOOL_NAME_MAP.get(name, name)
    # Allow MCP tools through (namespaced as mcp__serverid__toolname)
    if tool_type.startswith("mcp__"):
        content = json.dumps(args) if args else "{}"
        return ToolBlock(tool_type, content)
    # Email tools are implemented as MCP — route them to email
    _BUILTIN_EMAIL_TOOLS = {"list_email_accounts", "send_email", "list_emails", "read_email", "reply_to_email",
                            "archive_email", "delete_email", "mark_email_read", "bulk_email", "download_attachment"}
    if name in _BUILTIN_EMAIL_TOOLS:
        return ToolBlock(f"mcp__email__{name}", json.dumps(args) if args else "{}")
    if tool_type not in TOOL_TAGS:
@@ -14,7 +14,6 @@ logger = logging.getLogger(__name__)
 NON_ADMIN_BLOCKED_TOOLS = {
    "bash",
    "python",
    "manage_bg_jobs",
    "read_file",
    "write_file",
    "edit_file",
@@ -115,8 +114,6 @@ _PLAN_MODE_KNOWN_MUTATORS = {
    # Shell is never read-only-safe; block it explicitly so it stays out of plan
    # mode even if the schema list fails to load.
    "bash", "python",
    # Controls shell processes (kill); plan mode can't run bash anyway.
    "manage_bg_jobs",
 }
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Alexandre Teixeira	bd0c67b6d3	fix(agent): preserve loop guard stream behavior	2026-06-15 17:17:16 +01:00
Alexandre Teixeira	ff5bcd9864	fix(agent): surface early loop-guard stops	2026-06-15 17:07:15 +01:00