refactor(tools): move session tools to the agent_tools registry (#4454 )

Moves create_session, list_sessions, send_to_session and manage_session out of ai_interaction.py into src/agent_tools/session_tools.py (the do_ prefix dropped) and registers them in TOOL_HANDLERS, so dispatch flows through the registry instead of the dispatch_ai_tool elif in tool_execution.py. Same pattern as the model-interaction move. The bodies move verbatim; each fetches the runtime-set session manager via a get_session_manager() shim, and reuses _resolve_model / AI_CHAT_TIMEOUT from ai_interaction. manage_session's internal 'list' alias is repointed from the old do_list_sessions to the moved list_sessions. stream_ai_tool (dead, no callers) and do_pipeline stay put. dispatch_ai_tool loses its four now-unused branches. Tests: test_session_tools_registry covers registration, owner threading, the manage_session->list_sessions delegation, graceful no-manager handling, and registry dispatch. Verified end-to-end against a live SessionManager.
fix(ui): escape model name in model-info popup (DOM-XSS) + two latent sinks (#4605 )
2026-06-19 11:15:24 -04:00 · 2026-06-19 11:55:22 +02:00 · 2026-06-19 11:03:44 +02:00 · 2026-06-19 09:40:35 +02:00 · 2026-06-19 00:28:25 -07:00 · 2026-06-19 00:28:22 -07:00
156 changed files with 8169 additions and 1394 deletions
@@ -15,6 +15,10 @@ build/
 # at runtime — never baked into the image. Mirrored in .gitignore.
 secrets.env
 secrets.env.*
 secrets.env~
 .secrets.env.swp
 .secrets.env.swo
 **/#secrets.env#
 !secrets.env.example
 /data/
 /logs/
@@ -19,7 +19,7 @@ jobs:
    name: Python syntax (compileall)
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
+      - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0  # v7.0.0
        with:
          persist-credentials: false
      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
@@ -32,7 +32,7 @@ jobs:
    name: JS syntax (node --check)
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
+      - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0  # v7.0.0
        with:
          persist-credentials: false
      - uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e  # v6.4.0
@@ -54,7 +54,7 @@ jobs:
    # ROADMAP "fresh install smoke tests" item; make this required once green.
    continue-on-error: true
    steps:
-      - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
+      - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0  # v7.0.0
        with:
          fetch-depth: 0
          persist-credentials: false
@@ -37,7 +37,7 @@ jobs:
      contents: read
    steps:
      - name: Checkout repository
-        uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0  # v7.0.0
        with:
          persist-credentials: false
@@ -52,7 +52,7 @@ jobs:
      contents: read
    steps:
      - name: Checkout repository
-        uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0  # v7.0.0
        with:
          persist-credentials: false
@@ -93,7 +93,7 @@ jobs:
      security-events: write  # upload SARIF to the Security tab
    steps:
      - name: Checkout repository
-        uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0  # v7.0.0
        with:
          persist-credentials: false
@@ -36,7 +36,7 @@ jobs:
      contents: read
    steps:
      - name: Checkout repository
-        uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0  # v7.0.0
        with:
          persist-credentials: false
@@ -55,7 +55,7 @@ jobs:
      contents: read
    steps:
      - name: Checkout repository
-        uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0  # v7.0.0
        with:
          persist-credentials: false
@@ -45,7 +45,7 @@ jobs:
            arch: arm64
            runner: ubuntu-24.04-arm
    steps:
-      - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
+      - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0  # v7.0.0
        with:
          persist-credentials: false
      - name: Set up Buildx
@@ -86,7 +86,7 @@ jobs:
      contents: read
      packages: write
    steps:
-      - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
+      - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0  # v7.0.0
        with:
          persist-credentials: false
      - name: Read APP_VERSION + short sha
@@ -14,7 +14,7 @@ jobs:
    # Skip bots (Dependabot, release-drafter, etc.)
    if: ${{ github.event.issue.user.type != 'Bot' }}
    steps:
-      - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
+      - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0  # v7.0.0
        with:
          sparse-checkout: .github/scripts
          persist-credentials: false
@@ -23,7 +23,7 @@ jobs:
    # Skip bots: they open PRs programmatically and have their own process.
    if: github.event.pull_request.user.type != 'Bot'
    steps:
-      - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
+      - uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0  # v7.0.0
        with:
          ref: ${{ github.base_ref }}
          sparse-checkout: .github/scripts
@@ -35,7 +35,7 @@ jobs:
      contents: read
    steps:
      - name: Checkout repository
-        uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0  # v7.0.0
        with:
          # Full history so a secret committed in an earlier commit (and later
          # deleted) is still caught -- deletion does not remove it from Git.
@@ -36,7 +36,7 @@ jobs:
      contents: read
    steps:
      - name: Checkout repository
-        uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0  # v7.0.0
        with:
          persist-credentials: false
@@ -61,7 +61,7 @@ jobs:
      contents: read
    steps:
      - name: Checkout repository
-        uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
+        uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0  # v7.0.0
        with:
          persist-credentials: false
@@ -37,7 +37,7 @@ Manual development uses Python 3.11+:
 python3 -m venv venv
 source venv/bin/activate
 pip install -r requirements.txt
-python -m uvicorn app:app --host 0.0.0.0 --port 7000
+python -m uvicorn app:app --host 127.0.0.1 --port 7000
 ```
 Windows is not actively tested. Docker on Linux or a Linux/macOS manual install is the safer path for now.
@@ -0,0 +1,45 @@
 # -*- mode: python ; coding: utf-8 -*-
 a = Analysis(
    ['launcher.py'],
    pathex=[],
    binaries=[],
    datas=[('static', 'static'), ('scripts', 'scripts'), ('mcp_servers', 'mcp_servers'), ('services/hwfit/data', 'services/hwfit/data'), ('config', 'config'), ('.env.example', '.env.example')],
    hiddenimports=[],
    hookspath=[],
    hooksconfig={},
    runtime_hooks=[],
    excludes=[],
    noarchive=False,
    optimize=0,
 )
 pyz = PYZ(a.pure)
 exe = EXE(
    pyz,
    a.scripts,
    [],
    exclude_binaries=True,
    name='Odysseus',
    debug=False,
    bootloader_ignore_signals=False,
    strip=False,
    upx=True,
    console=False,
    disable_windowed_traceback=False,
    argv_emulation=False,
    target_arch=None,
    codesign_identity=None,
    entitlements_file=None,
    icon=['static\\icon.ico'],
 )
 coll = COLLECT(
    exe,
    a.binaries,
    a.datas,
    strip=False,
    upx=True,
    upx_exclude=[],
    name='Odysseus',
 )
@@ -1,6 +1,7 @@
 # app.py — slim orchestrator
 import mimetypes
 import os
 import sys
 def register_static_mime_types() -> None:
@@ -38,7 +39,7 @@ load_dotenv(encoding="utf-8-sig")
 import asyncio
 import logging
 import secrets
-from datetime import datetime
+from datetime import datetime, timezone
 from typing import Dict
 from contextlib import asynccontextmanager
@@ -113,12 +114,13 @@ app = FastAPI(
 )
 # ========= CORS =========
 CORS_ALLOW_METHODS = ["GET", "POST", "PUT", "PATCH", "DELETE"]
 allowed_origins = os.getenv("ALLOWED_ORIGINS", "http://localhost,http://127.0.0.1").split(",")
 app.add_middleware(
    CORSMiddleware,
    allow_origins=allowed_origins,
    allow_credentials=True,
-    allow_methods=["GET", "POST", "PUT", "DELETE"],
+    allow_methods=CORS_ALLOW_METHODS,
    allow_headers=[
        "Accept",
        "Authorization",
@@ -316,7 +318,7 @@ if AUTH_ENABLED:
            # (no admin cookie available in that context). Restricted to
            # loopback clients + matching token to keep it locked down.
            try:
-                from core.middleware import INTERNAL_TOOL_HEADER, INTERNAL_TOOL_TOKEN as _ITT
+                from core.middleware import INTERNAL_TOOL_HEADER, INTERNAL_TOOL_TOKEN as _ITT, INTERNAL_TOOL_USER
                _hdr = request.headers.get(INTERNAL_TOOL_HEADER)
                if _hdr and secrets.compare_digest(_hdr, _ITT) and _is_trusted_loopback(request):
                    # Impersonation: when the agent's loopback call sets
@@ -328,7 +330,7 @@ if AUTH_ENABLED:
                    if _impersonate and _impersonate in getattr(_auth_mgr, "users", {}):
                        request.state.current_user = _impersonate
                    else:
-                        request.state.current_user = "internal-tool"
+                        request.state.current_user = INTERNAL_TOOL_USER
                    request.state.api_token = False
                    return await call_next(request)
            except Exception as _e:
@@ -437,7 +439,7 @@ class _RevalidatingStatic(StaticFiles):
        return resp
-app.mount("/static", _RevalidatingStatic(directory="static"), name="static")
+app.mount("/static", _RevalidatingStatic(directory=STATIC_DIR), name="static")
 # ========= GENERATED IMAGES =========
@app.get("/api/generated-image/{filename}")
@@ -527,6 +529,7 @@ memory_vector     = components.get("memory_vector")
 upload_handler    = components["upload_handler"]
 app.state.upload_handler = upload_handler
 personal_docs_mgr = components["personal_docs_manager"]
 app.state.personal_docs_manager = personal_docs_mgr
 api_key_manager   = components["api_key_manager"]
 preset_manager    = components["preset_manager"]
 chat_processor    = components["chat_processor"]
@@ -860,7 +863,7 @@ async def get_version():
@app.get("/api/health")
 async def health_check() -> Dict[str, str]:
-    return {"status": "healthy", "timestamp": datetime.utcnow().isoformat()}
+    return {"status": "healthy", "timestamp": datetime.now(timezone.utc).isoformat()}
@app.get("/api/ready")
 async def readiness_check() -> JSONResponse:
@@ -1170,3 +1173,12 @@ async def _shutdown_event():
    except Exception as e:
        logger.warning(f"MCP shutdown error: {e}")
    logger.info("Application shutdown complete")
 if __name__ == "__main__":
    import uvicorn
    bind_host = os.getenv("APP_BIND", "127.0.0.1")
    bind_port = int(os.getenv("APP_PORT", "7000"))
    uvicorn.run(app, host=bind_host, port=bind_port, log_level="info")
@@ -0,0 +1,72 @@
 #Requires -Version 5.1
 <#
  Build a portable Windows distribution for Odysseus.
  Output layout:
    dist\Odysseus\Odysseus.exe
    dist\Odysseus\static\...
    dist\Odysseus\scripts\...
    dist\Odysseus\mcp_servers\...
    dist\Odysseus\services\hwfit\data\...
  The app then keeps using its normal filesystem layout when frozen.
  Usage:
    powershell -ExecutionPolicy Bypass -File .\build-windows-portable.ps1
 #>
 $ErrorActionPreference = "Stop"
 Set-Location -Path $PSScriptRoot
 function Write-Step($msg) { Write-Host ""; Write-Host ("==> " + $msg) -ForegroundColor Cyan }
 function Fail($msg) {
    Write-Host ""
    Write-Host ("ERROR: " + $msg) -ForegroundColor Red
    exit 1
 }
 Write-Step "Checking for Python"
 $pyExe = $null
 if (Test-Path ".\.venv\Scripts\python.exe") {
    $pyExe = (Resolve-Path ".\.venv\Scripts\python.exe").Path
 } else {
    foreach ($c in @("py", "python")) {
        $cmd = Get-Command $c -ErrorAction SilentlyContinue
        if ($cmd) { $pyExe = $cmd.Source; break }
    }
    if ($pyExe -like "*WindowsApps*python.exe") {
        $pyCmd = Get-Command py -ErrorAction SilentlyContinue
        if ($pyCmd) {
            $pyExe = $pyCmd.Source
        }
    }
 }
 if (-not $pyExe) {
    Fail "Python not found on PATH. Install Python 3.11+ first."
 }
 Write-Host ("Using Python: " + $pyExe)
 Write-Step "Installing build dependencies"
 & $pyExe -m pip install --upgrade pip --quiet
 & $pyExe -m pip install -r requirements.txt pyinstaller pystray Pillow
 if ($LASTEXITCODE -ne 0) { Fail "Dependency install failed." }
 Write-Step "Building portable exe bundle"
 Remove-Item -Recurse -Force build, dist -ErrorAction SilentlyContinue
 $dataArgs = @(
    "--add-data", "static;static",
    "--add-data", "scripts;scripts",
    "--add-data", "mcp_servers;mcp_servers",
    "--add-data", "services/hwfit/data;services/hwfit/data",
    "--add-data", "config;config",
    "--add-data", ".env.example;.env.example"
 )
 & $pyExe -m PyInstaller --noconfirm --clean --onedir --noconsole --icon=static/icon.ico --name Odysseus @dataArgs launcher.py
 if ($LASTEXITCODE -ne 0) { Fail "PyInstaller build failed." }
 Write-Host ""
 Write-Host "Build complete." -ForegroundColor Green
 Write-Host "Portable app folder: $PSScriptRoot\dist\Odysseus" -ForegroundColor Green
 Write-Host "Distribute the whole folder (or zip it) so static assets and scripts stay with the exe." -ForegroundColor Green
@@ -20,6 +20,7 @@ logger = logging.getLogger(__name__)
 from core.atomic_io import atomic_write_json as _atomic_write_json  # noqa: E402
 from core.middleware import INTERNAL_TOOL_USER  # noqa: E402
 DEFAULT_PRIVILEGES = {
    "can_use_agent": True,
@@ -47,7 +48,7 @@ ADMIN_PRIVILEGES["allowed_models_restricted"] = False
 # backwards for this sentinel.
 ADMIN_PRIVILEGES["block_all_models"] = False
-from src.constants import AUTH_FILE
+from src.constants import AUTH_FILE, PASSWORD_MIN_LENGTH
 DEFAULT_AUTH_PATH = AUTH_FILE
 TOKEN_TTL = 60 * 60 * 24 * 7  # 7 days
@@ -65,7 +66,7 @@ TOKEN_TTL = 60 * 60 * 24 * 7  # 7 days
 # of those names would be denied an assistant and inconsistently owner-scoped.
 # Refuse to create or rename into any of them so the sentinels can't be
 # impersonated. (Keep this in sync with that synthetic-owner set.)
-RESERVED_USERNAMES = frozenset({"internal-tool", "api", "demo", "system"})
+RESERVED_USERNAMES = frozenset({INTERNAL_TOOL_USER, "api", "demo", "system"})
 def normalize_known_username(users: Dict[str, Any], username: str | None) -> Optional[str]:
@@ -243,6 +244,15 @@ class AuthManager:
    def is_configured(self) -> bool:
        return len(self.users) > 0
    def policy(self) -> dict:
        """Return public auth policy constants for the frontend."""
        return {
            "password_min_length": PASSWORD_MIN_LENGTH,
            "reserved_usernames": sorted(RESERVED_USERNAMES),
            "signup_enabled": self.signup_enabled,
            "session_days": TOKEN_TTL // 86400,
        }
    # ------------------------------------------------------------------
    # Account management
    # ------------------------------------------------------------------
@@ -573,11 +583,15 @@ class AuthManager:
            return None
        return self.create_session_trusted(username)
-    def create_session_trusted(self, username: str) -> str:
+    def create_session_trusted(self, username: str) -> Optional[str]:
        """Issue a session token for an already-verified user.
        Call only after verify_password (and TOTP if enabled) have passed."""
        username = username.strip().lower()
        token = secrets.token_hex(32)
        with self._config_lock:
            if username not in self.users:
                logger.warning("Refused to issue session for missing user '%s'", username)
                return None
            with self._sessions_lock:
                self._sessions[token] = {
                    "username": username,
@@ -15,6 +15,8 @@ from starlette.responses import Response
 # same value from this module. Never persisted or exposed externally.
 INTERNAL_TOOL_TOKEN = os.environ.get("ODYSSEUS_INTERNAL_TOKEN") or secrets.token_hex(32)
 INTERNAL_TOOL_HEADER = "X-Odysseus-Internal-Token"
 # Pseudo-username on in-process tool-loopback requests; require_admin trusts it and it is reserved.
 INTERNAL_TOOL_USER = "internal-tool"
 def is_cors_preflight(method: str, headers) -> bool:
@@ -39,7 +41,7 @@ def require_admin(request: Request):
        hdr = request.headers.get(INTERNAL_TOOL_HEADER)
        if hdr and secrets.compare_digest(hdr, INTERNAL_TOOL_TOKEN):
            return
-        if getattr(request.state, "current_user", None) == "internal-tool":
+        if getattr(request.state, "current_user", None) == INTERNAL_TOOL_USER:
            return
    except Exception:
        pass
@@ -65,10 +67,9 @@ class SecurityHeadersMiddleware(BaseHTTPMiddleware):
        response = await call_next(request)
        path = request.url.path
-        # Tool render endpoints are served inside iframes — allow framing by self
+        # Tool render endpoints
        is_tool_render = path.startswith("/api/tools/") and path.endswith("/render")
-        # PDF previews are embedded by the in-app document library. Keep the
+        # Document library PDF preview endpoint
        # exception route-scoped so normal app pages remain unframeable.
        is_document_pdf_preview = path.startswith("/api/document/") and path.endswith("/render-pdf")
        # Visual report pages are self-contained HTML — need inline scripts + external images
        is_report = path.startswith("/api/research/report/")
@@ -95,9 +96,7 @@ class SecurityHeadersMiddleware(BaseHTTPMiddleware):
                "frame-ancestors 'none'"
            )
        elif is_tool_render:
-            # Tool iframe content: skip all framing headers — the iframe's
+            # Skip framing headers for tools.
            # sandbox="allow-scripts" attribute provides isolation.
            # Don't overwrite the route's own restrictive CSP either.
            pass
        elif is_document_pdf_preview:
            response.headers["X-Frame-Options"] = "SAMEORIGIN"
@@ -60,6 +60,13 @@ services:
      - ODYSSEUS_INPROCESS_TASKS=${ODYSSEUS_INPROCESS_TASKS:-1}
      - ODYSSEUS_SCRIPT_HOST=${ODYSSEUS_SCRIPT_HOST:-localhost}
      - ODYSSEUS_CHAT_UPLOAD_MAX_BYTES=${ODYSSEUS_CHAT_UPLOAD_MAX_BYTES:-10485760}
      - ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES=${ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES:-104857600}
      - ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES=${ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES:-26214400}
      - ODYSSEUS_MEMORY_IMPORT_MAX_BYTES=${ODYSSEUS_MEMORY_IMPORT_MAX_BYTES:-10485760}
      - ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES=${ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES:-26214400}
      - ODYSSEUS_EMAIL_COMPOSE_UPLOAD_MAX_BYTES=${ODYSSEUS_EMAIL_COMPOSE_UPLOAD_MAX_BYTES:-26214400}
      - ODYSSEUS_STT_MAX_AUDIO_BYTES=${ODYSSEUS_STT_MAX_AUDIO_BYTES:-26214400}
      - ODYSSEUS_ICS_MAX_BYTES=${ODYSSEUS_ICS_MAX_BYTES:-10485760}
      - DATA_BRAVE_API_KEY=${DATA_BRAVE_API_KEY:-}
      - GOOGLE_API_KEY=${GOOGLE_API_KEY:-}
      - GOOGLE_PSE_CX=${GOOGLE_PSE_CX:-}
@@ -59,6 +59,13 @@ services:
      - ODYSSEUS_INPROCESS_TASKS=${ODYSSEUS_INPROCESS_TASKS:-1}
      - ODYSSEUS_SCRIPT_HOST=${ODYSSEUS_SCRIPT_HOST:-localhost}
      - ODYSSEUS_CHAT_UPLOAD_MAX_BYTES=${ODYSSEUS_CHAT_UPLOAD_MAX_BYTES:-10485760}
      - ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES=${ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES:-104857600}
      - ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES=${ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES:-26214400}
      - ODYSSEUS_MEMORY_IMPORT_MAX_BYTES=${ODYSSEUS_MEMORY_IMPORT_MAX_BYTES:-10485760}
      - ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES=${ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES:-26214400}
      - ODYSSEUS_EMAIL_COMPOSE_UPLOAD_MAX_BYTES=${ODYSSEUS_EMAIL_COMPOSE_UPLOAD_MAX_BYTES:-26214400}
      - ODYSSEUS_STT_MAX_AUDIO_BYTES=${ODYSSEUS_STT_MAX_AUDIO_BYTES:-26214400}
      - ODYSSEUS_ICS_MAX_BYTES=${ODYSSEUS_ICS_MAX_BYTES:-10485760}
      - DATA_BRAVE_API_KEY=${DATA_BRAVE_API_KEY:-}
      - GOOGLE_API_KEY=${GOOGLE_API_KEY:-}
      - GOOGLE_PSE_CX=${GOOGLE_PSE_CX:-}
@@ -48,6 +48,13 @@ services:
      - ODYSSEUS_INPROCESS_TASKS=${ODYSSEUS_INPROCESS_TASKS:-1}
      - ODYSSEUS_SCRIPT_HOST=${ODYSSEUS_SCRIPT_HOST:-localhost}
      - ODYSSEUS_CHAT_UPLOAD_MAX_BYTES=${ODYSSEUS_CHAT_UPLOAD_MAX_BYTES:-10485760}
      - ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES=${ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES:-104857600}
      - ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES=${ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES:-26214400}
      - ODYSSEUS_MEMORY_IMPORT_MAX_BYTES=${ODYSSEUS_MEMORY_IMPORT_MAX_BYTES:-10485760}
      - ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES=${ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES:-26214400}
      - ODYSSEUS_EMAIL_COMPOSE_UPLOAD_MAX_BYTES=${ODYSSEUS_EMAIL_COMPOSE_UPLOAD_MAX_BYTES:-26214400}
      - ODYSSEUS_STT_MAX_AUDIO_BYTES=${ODYSSEUS_STT_MAX_AUDIO_BYTES:-26214400}
      - ODYSSEUS_ICS_MAX_BYTES=${ODYSSEUS_ICS_MAX_BYTES:-10485760}
      - DATA_BRAVE_API_KEY=${DATA_BRAVE_API_KEY:-}
      - GOOGLE_API_KEY=${GOOGLE_API_KEY:-}
      - GOOGLE_PSE_CX=${GOOGLE_PSE_CX:-}
@@ -13,6 +13,8 @@ set -e
 PUID="${PUID:-1000}"
 PGID="${PGID:-1000}"
 GOSU_BIN="$(command -v gosu)"
 PYTHON_BIN="$(command -v python)"
 # Reuse an existing matching group/user if the host's UID/GID already
 # corresponds to one in /etc/passwd (e.g. when the image is rebuilt
@@ -24,26 +26,57 @@ if ! getent passwd "$PUID" >/dev/null 2>&1; then
    useradd -u "$PUID" -g "$PGID" -M -s /bin/sh -d /app odysseus
 fi
-# Repair ownership on every writable path the app touches at runtime.
+mount_root_for() {
-#
+    awk -v target="$1" '$5 == target { print $4; exit }' /proc/self/mountinfo 2>/dev/null || true
-# Bind-mounted dirs (/app/data, /app/logs) are the obvious ones, but
+}
-# the app ALSO writes inside the image's own source tree at runtime:
+
-#   - services/cache/{search,content}/*  (search cache LRU)
+is_broad_mount_root() {
-#   - services/search_analytics.json
+    case "$1" in
-#   - services/search_engine_error.log
+        /|/home|/srv|/var|/usr|/opt|/tmp|/mnt|/media)
-#   - services/tts cache, etc.
+            return 0
-# These dirs were created as root during `docker build`, so dropping
+            ;;
-# to PUID:PGID would otherwise crash on the first import that tries
+    esac
-# to mkdir them. Chown the whole /app tree — fast (<1s on this size)
+    return 1
-# and idempotent via the `-not -uid` filter so we only touch files
+}
-# that need fixing.
+
-for dir in /app /app/data /app/logs; do
+repair_tree_ownership() {
    dir="$1"
    if [ -d "$dir" ]; then
-        # `find ... -not -uid` keeps this O(touched-files), not
+        find "$dir" -xdev -not -uid "$PUID" -print0 2>/dev/null \
        # O(everything), so terabyte-sized maildirs don't slow startup.
        find "$dir" -not -uid "$PUID" -print0 2>/dev/null \
            | xargs -0 -r chown "$PUID:$PGID" 2>/dev/null || true
    fi
 }
 repair_app_tree_ownership() {
    if [ -d /app ]; then
        find /app -xdev \
            \( -path /app/data -o -path /app/logs -o -path /app/.ssh -o -path /app/.cache -o -path /app/.local \) -prune \
            -o -not -uid "$PUID" -print0 2>/dev/null \
            | xargs -0 -r chown "$PUID:$PGID" 2>/dev/null || true
    fi
 }
 repair_bind_mount_ownership() {
    dir="$1"
    if [ ! -d "$dir" ]; then
        return
    fi
    mount_root="$(mount_root_for "$dir")"
    if is_broad_mount_root "$mount_root"; then
        echo "Skipping recursive ownership repair for $dir because it maps to broad host path $mount_root" >&2
        chown "$PUID:$PGID" "$dir" 2>/dev/null || true
        return
    fi
    repair_tree_ownership "$dir"
 }
 # Repair image-owned writable paths without walking into bind-mounted host
 # trees, then repair the app-owned mount roots separately.
 repair_app_tree_ownership
 for dir in /app/data /app/logs /app/.ssh /app/.cache/huggingface /app/.local; do
    repair_bind_mount_ownership "$dir"
 done
 # Cookbook installs vllm/etc. via `pip install --user`, which pulls
@@ -83,9 +116,9 @@ export PATH="/app/.local/bin:$PATH"
 # Run first-time setup as the app user so data/ files get the right ownership.
 # setup.py is idempotent — skips auth.json / .env if they already exist.
 # || true so a setup failure never prevents the container from starting.
-gosu "$PUID:$PGID" python /app/setup.py || true
+"$GOSU_BIN" "$PUID:$PGID" "$PYTHON_BIN" /app/setup.py || true
 # Drop root and run the actual app. `gosu` is preferred over `su` /
 # `sudo` because it cleans up the process tree (no extra shell layer)
 # so signals (SIGTERM from `docker stop`) reach uvicorn directly.
-exec gosu "$PUID:$PGID" "$@"
+exec "$GOSU_BIN" "$PUID:$PGID" "$@"
@@ -105,6 +105,14 @@ if (-not $pyExe) {
    }
 }
 if ($pyExe -like "*WindowsApps*python.exe") {
    $pyCmd = Get-Command py -ErrorAction SilentlyContinue
    if ($pyCmd) {
        $pyExe = $pyCmd.Source
        $pyArgs = @("-3.11")
    }
 }
 if (-not $pyExe) {
    Fail "Couldn't find Python 3.11+ for Windows setup. Install Python 3.11+ (or open the Python launcher with 'py -3.11') from https://www.python.org/downloads/, then re-run this script."
 }
@@ -0,0 +1,142 @@
 # launcher.py
 """Dedicated entrypoint for the standalone Windows portable launcher.
 Handles:
 - Immediate GUI splash screen creation using tkinter.
 - Suppressing console stream crashes in windowed GUI mode via NullWriter.
 - Spawning system tray icon via pystray and Pillow (lazy-loaded).
 - Auto-opening default browser pointing to the running backend.
 - Launching the FastAPI server (importing and running app.py).
 """
 import os
 import sys
 import threading
 import time
 import webbrowser
 # Define a dummy NullWriter to suppress standard stream crashes (isatty etc.) in GUI mode
 class NullWriter:
    def write(self, text):
        pass
    def flush(self):
        pass
    def isatty(self):
        return False
 if sys.stdout is None:
    sys.stdout = NullWriter()
 if sys.stderr is None:
    sys.stderr = NullWriter()
 splash_root = None
 # If running from a frozen PyInstaller bundle, launch the splash screen IMMEDIATELY
 if getattr(sys, 'frozen', False):
    import tkinter as tk
    def show_splash_instantly():
        global splash_root
        try:
            splash_root = tk.Tk()
            splash_root.title("Odysseus")
            splash_root.overrideredirect(True)
            splash_root.configure(bg="#1a1c23")
            # Accented borders
            splash_root.config(highlightbackground="#e06c75", highlightcolor="#e06c75", highlightthickness=1)
            w, h = 360, 160
            ws = splash_root.winfo_screenwidth()
            hs = splash_root.winfo_screenheight()
            x = (ws - w) // 2
            y = (hs - h) // 2
            splash_root.geometry(f"{w}x{h}+{x}+{y}")
            tk.Label(splash_root, text="⛵ Odysseus", font=("Segoe UI", 22, "bold"), bg="#1a1c23", fg="#e06c75").pack(pady=(22, 2))
            tk.Label(splash_root, text="Launching background services...", font=("Segoe UI", 10), bg="#1a1c23", fg="#d1d4e0").pack(pady=2)
            tk.Label(splash_root, text="Please wait, this will take a few seconds.", font=("Segoe UI", 8, "italic"), bg="#1a1c23", fg="#5c6370").pack(pady=(12, 0))
            splash_root.attributes("-topmost", True)
            splash_root.mainloop()
        except Exception:
            pass
    # Launch the GUI splash screen immediately on a background thread
    threading.Thread(target=show_splash_instantly, daemon=True).start()
 def create_tray_image():
    # Generate a beautiful 64x64 icon matching Odysseus brand red accent (#e06c75)
    from PIL import Image, ImageDraw
    image = Image.new('RGBA', (64, 64), (0, 0, 0, 0))
    dc = ImageDraw.Draw(image)
    accent_red = (224, 108, 117, 255)
    light_red = (224, 108, 117, 150)
    # Draw premium sailing boat
    dc.polygon([(32, 10), (32, 45), (12, 45)], fill=accent_red)
    dc.polygon([(32, 18), (32, 45), (48, 45)], fill=light_red)
    dc.polygon([(8, 48), (56, 48), (44, 56), (20, 56)], fill=accent_red)
    return image
 def on_open_browser(icon, item, url):
    webbrowser.open(url)
 def on_exit(icon, item):
    icon.stop()
    os._exit(0)
 def setup_system_tray(url):
    try:
        import pystray
        icon_img = create_tray_image()
        menu = (
            pystray.MenuItem('Open Odysseus', lambda icon, item: on_open_browser(icon, item, url), default=True),
            pystray.MenuItem('Exit', on_exit)
        )
        tray_icon = pystray.Icon(
            "Odysseus",
            icon_img,
            "Odysseus",
            menu
        )
        tray_icon.run()
    except Exception:
        pass
 def open_browser(url):
    # Allow uvicorn and app lifecycles to complete warmups
    time.sleep(3.5)
    # Safely close the splash screen
    try:
        global splash_root
        if splash_root:
            splash_root.after(0, splash_root.destroy)
    except Exception:
        pass
    webbrowser.open(url)
 if __name__ == "__main__":
    import uvicorn
    # Import the FastAPI app from app.py
    from app import app
    bind_host = os.getenv("APP_BIND", "127.0.0.1")
    bind_port = int(os.getenv("APP_PORT", "7000"))
    url = f"http://{bind_host}:{bind_port}"
    if getattr(sys, 'frozen', False):
        # Start browser manager thread
        threading.Thread(target=open_browser, args=(url,), daemon=True).start()
        # Start system tray manager thread
        threading.Thread(target=setup_system_tray, args=(url,), daemon=True).start()
    uvicorn.run(app, host=bind_host, port=bind_port, log_level="info")
@@ -23,6 +23,7 @@ import os.path
 from pathlib import Path
 from datetime import datetime, timedelta
 import uuid
 from contextvars import ContextVar
 from mcp.server import Server
 from mcp.server.stdio import stdio_server
@@ -55,6 +56,8 @@ def _uid_fetch_rows(data) -> list:
 # flat keys when no DB row matches (legacy single-account behaviour).
 _ACCOUNT_CACHE: dict = {}  # key = normalized account selector -> config dict
 _MCP_OWNER_ARG = "_odysseus_owner"
 _CURRENT_OWNER: ContextVar[str | None] = ContextVar("email_mcp_owner", default=None)
 def _clean_header_value(value) -> str:
@@ -68,6 +71,45 @@ def _db_path() -> Path:
    return Path(APP_DB)
 def _current_owner() -> str:
    owner = _CURRENT_OWNER.get()
    return str(owner or "").strip()
 def _account_visible_to_owner(row: dict, owner: str) -> bool:
    row_owner = str(row.get("owner") or "").strip()
    if row_owner == owner:
        return True
    if row_owner:
        return False
    # Legacy ownerless accounts are only visible to a scoped caller when the
    # mailbox itself matches the owner, mirroring the HTTP email route fallback.
    owner_l = owner.lower()
    return owner_l in {
        str(row.get("imap_user") or "").strip().lower(),
        str(row.get("from_address") or "").strip().lower(),
    }
 def _filter_accounts_for_owner(rows: list[dict]) -> list[dict]:
    owner = _current_owner()
    if owner:
        return [r for r in rows if _account_visible_to_owner(r, owner)]
    owners = {str(r.get("owner") or "").strip() for r in rows if str(r.get("owner") or "").strip()}
    if len(owners) > 1:
        return []
    return rows
 def _mcp_owner_required(rows: list[dict] | None = None) -> bool:
    if _current_owner():
        return False
    rows = rows if rows is not None else _read_accounts_from_db()
    owners = {str(r.get("owner") or "").strip() for r in rows if str(r.get("owner") or "").strip()}
    return len(owners) > 1
 def _load_email_writing_style() -> str:
    """Return the existing Settings > Email > Writing Style value."""
    try:
@@ -121,9 +163,8 @@ def _default_document_owner() -> str | None:
        return None
-def _list_accounts_raw() -> list:
+def _read_accounts_from_db() -> list:
-    """Return list of dicts from the email_accounts table. Empty list if table
+    """Return all enabled email account rows. Empty list if missing. Never raises."""
    missing or empty. Never raises."""
    path = _db_path()
    if not path.exists():
        return []
@@ -131,9 +172,10 @@ def _list_accounts_raw() -> list:
        conn = sqlite3.connect(str(path))
        conn.row_factory = sqlite3.Row
        columns = {r[1] for r in conn.execute("PRAGMA table_info(email_accounts)").fetchall()}
        owner_select = "owner" if "owner" in columns else "NULL AS owner"
        smtp_security_select = "smtp_security" if "smtp_security" in columns else "'' AS smtp_security"
        rows = conn.execute(f"""
-            SELECT id, name, is_default, enabled,
+            SELECT id, {owner_select}, name, is_default, enabled,
                   imap_host, imap_port, imap_user, imap_password, imap_starttls,
                   smtp_host, smtp_port, {smtp_security_select}, smtp_user, smtp_password, from_address
            FROM email_accounts WHERE enabled = 1
@@ -147,11 +189,15 @@ def _list_accounts_raw() -> list:
        return []
-def _resolve_account(selector: str | None) -> dict | None:
+def _list_accounts_raw() -> list:
    """Return owner-visible email account rows for the active MCP call."""
    return _filter_accounts_for_owner(_read_accounts_from_db())
 def _resolve_account_from_rows(rows: list[dict], selector: str | None) -> dict | None:
    """Given a selector (None = default, or a name/user/id string), return the
    matching row or None. Matching is case-insensitive substring on name +
    imap_user + from_address, plus exact id match."""
    rows = _list_accounts_raw()
    if not rows:
        return None
    if not selector:
@@ -186,6 +232,10 @@ def _resolve_account(selector: str | None) -> dict | None:
    return None
 def _resolve_account(selector: str | None) -> dict | None:
    return _resolve_account_from_rows(_list_accounts_raw(), selector)
 def _load_config(account: str | None = None) -> dict:
    """Return the full config dict for the requested account (or default).
@@ -194,7 +244,7 @@ def _load_config(account: str | None = None) -> dict:
      2. env vars + settings.json flat keys (legacy)
      3. hardcoded fallbacks (localhost:31143 etc.)
    """
-    cache_key = (account or "").strip().lower() or "__default__"
+    cache_key = (_current_owner(), (account or "").strip().lower() or "__default__")
    if cache_key in _ACCOUNT_CACHE:
        return _ACCOUNT_CACHE[cache_key]
@@ -223,8 +273,11 @@ def _load_config(account: str | None = None) -> dict:
        "account_name": None,
    }
-    rows = _list_accounts_raw()
+    raw_rows = _read_accounts_from_db()
-    row = _resolve_account(account)
+    rows = _filter_accounts_for_owner(raw_rows)
    row = _resolve_account_from_rows(rows, account)
    if _current_owner() and raw_rows and not rows:
        raise ValueError("No email account is configured for the authenticated owner")
    if account and rows and not row:
        available = ", ".join(
            f"{r.get('name') or r.get('imap_user')} <{r.get('imap_user') or r.get('from_address') or '?'}>"
@@ -953,7 +1006,7 @@ def _stash_agent_draft(*, to, subject, body, in_reply_to=None, references=None,
            now,
            account or None,
            "agent_draft",
-            "",
+            _current_owner(),
        ))
        conn.commit()
        conn.close()
@@ -1139,7 +1192,7 @@ def _create_email_draft_document(
    doc_id = str(uuid.uuid4())
    ver_id = str(uuid.uuid4())
    doc_title = (title or subject or "Email draft").strip() or "Email draft"
-    doc_owner = _default_document_owner()
+    doc_owner = _current_owner() or _default_document_owner()
    db = SessionLocal()
    try:
@@ -1925,10 +1978,22 @@ async def list_tools() -> list[Tool]:
@server.call_tool()
 async def call_tool(name: str, arguments: dict) -> list[TextContent]:
    arguments = dict(arguments) if isinstance(arguments, dict) else {}
    owner = str(arguments.pop(_MCP_OWNER_ARG, "") or "").strip()
    owner_token = _CURRENT_OWNER.set(owner or None)
    try:
        all_db_accounts = _read_accounts_from_db()
        if _mcp_owner_required(all_db_accounts):
            return [TextContent(
                type="text",
                text="Error: email MCP requires an authenticated owner when multiple email account owners are configured.",
            )]
        if name == "list_email_accounts":
-            rows = _list_accounts_raw()
+            rows = _filter_accounts_for_owner(all_db_accounts)
            if not rows:
                if all_db_accounts and owner:
                    return [TextContent(type="text", text="No email accounts configured for this owner.")]
                return [TextContent(type="text", text="No email accounts configured. Legacy single-account mode active.")]
            lines = [f"Found {len(rows)} email account(s):\n"]
            for r in rows:
@@ -2108,6 +2173,16 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
                bcc=arguments.get("bcc"),
                account=acct,
            )
            if "error" in result:
                return [TextContent(type="text", text=f"Error: {result['error']}")]
            if result.get("pending"):
                return [TextContent(
                    type="text",
                    text=(
                        f"Draft staged for approval (pending id: {result.get('pending_id')}). "
                        "Nothing has been sent yet. Review and approve it in Odysseus before delivery."
                    ),
                )]
            acct_note = f" (from {result['account']})" if result.get("account") else ""
            return [TextContent(type="text", text=f"Sent email to {result['to']} with subject '{result['subject']}'{acct_note}.")]
@@ -2283,6 +2358,8 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
    except Exception as e:
        return [TextContent(type="text", text=f"Error: {e}")]
    finally:
        _CURRENT_OWNER.reset(owner_token)
 # ── Main ──
@@ -4,93 +4,19 @@
  "requires": true,
  "packages": {
    "": {
      "dependencies": {
        "@anthropic-ai/sdk": "^0.104.1"
      },
      "devDependencies": {
-        "@antithesishq/bombadil": "^0.5.0"
+        "@antithesishq/bombadil": "^0.6.1"
      }
    },
    "node_modules/@anthropic-ai/sdk": {
      "version": "0.104.1",
      "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.104.1.tgz",
      "integrity": "sha512-gGACa/+IaiXzRRmF96aOhamoBgapKRBiFWbmmTFP8aMkpaEcuStF+Q61bjo4vPxBM7gqWJNZqsngslRdnLHv0Q==",
      "license": "MIT",
      "dependencies": {
        "json-schema-to-ts": "^3.1.1",
        "standardwebhooks": "^1.0.0"
      },
      "bin": {
        "anthropic-ai-sdk": "bin/cli"
      },
      "peerDependencies": {
        "zod": "^3.25.0 || ^4.0.0"
      },
      "peerDependenciesMeta": {
        "zod": {
          "optional": true
        }
      }
    },
    "node_modules/@antithesishq/bombadil": {
-      "version": "0.5.0",
+      "version": "0.6.1",
-      "resolved": "https://registry.npmjs.org/@antithesishq/bombadil/-/bombadil-0.5.0.tgz",
+      "resolved": "https://registry.npmjs.org/@antithesishq/bombadil/-/bombadil-0.6.1.tgz",
-      "integrity": "sha512-s0zImmr0iyvSP6QcVLvf40CUiZYIdWBAxiq20uhzujwvfitYa3PGJN652k/pLtVccHM/JrGQxZdvLnihZpltHA==",
+      "integrity": "sha512-d1iufG3MI7gSMSiSmMeNdcMW+qR0yQXL2zdkVynC3n3DYgFJYlYXKUQzygmqU12m4RWlR5iOdQU1hsx5UT6+IA==",
      "dev": true,
      "license": "MIT",
      "bin": {
        "bombadil": "bin/bombadil.js"
      }
    },
    "node_modules/@babel/runtime": {
      "version": "7.29.7",
      "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.29.7.tgz",
      "integrity": "sha512-Nq8OhGWiZIZGV6hLHoyAKLLcJihP/xFeBMGJoUrxTX2psI8dCifzLhZISFb+VWS3wFMRDmCGw5R+dOySCqPLhw==",
      "license": "MIT",
      "engines": {
        "node": ">=6.9.0"
      }
    },
    "node_modules/@stablelib/base64": {
      "version": "1.0.1",
      "resolved": "https://registry.npmjs.org/@stablelib/base64/-/base64-1.0.1.tgz",
      "integrity": "sha512-1bnPQqSxSuc3Ii6MhBysoWCg58j97aUjuCSZrGSmDxNqtytIi0k8utUenAwTZN4V5mXXYGsVUI9zeBqy+jBOSQ==",
      "license": "MIT"
    },
    "node_modules/fast-sha256": {
      "version": "1.3.0",
      "resolved": "https://registry.npmjs.org/fast-sha256/-/fast-sha256-1.3.0.tgz",
      "integrity": "sha512-n11RGP/lrWEFI/bWdygLxhI+pVeo1ZYIVwvvPkW7azl/rOy+F3HYRZ2K5zeE9mmkhQppyv9sQFx0JM9UabnpPQ==",
      "license": "Unlicense"
    },
    "node_modules/json-schema-to-ts": {
      "version": "3.1.1",
      "resolved": "https://registry.npmjs.org/json-schema-to-ts/-/json-schema-to-ts-3.1.1.tgz",
      "integrity": "sha512-+DWg8jCJG2TEnpy7kOm/7/AxaYoaRbjVB4LFZLySZlWn8exGs3A4OLJR966cVvU26N7X9TWxl+Jsw7dzAqKT6g==",
      "license": "MIT",
      "dependencies": {
        "@babel/runtime": "^7.18.3",
        "ts-algebra": "^2.0.0"
      },
      "engines": {
        "node": ">=16"
      }
    },
    "node_modules/standardwebhooks": {
      "version": "1.0.0",
      "resolved": "https://registry.npmjs.org/standardwebhooks/-/standardwebhooks-1.0.0.tgz",
      "integrity": "sha512-BbHGOQK9olHPMvQNHWul6MYlrRTAOKn03rOe4A8O3CLWhNf4YHBqq2HJKKC+sfqpxiBY52pNeesD6jIiLDz8jg==",
      "license": "MIT",
      "dependencies": {
        "@stablelib/base64": "^1.0.0",
        "fast-sha256": "^1.3.0"
      }
    },
    "node_modules/ts-algebra": {
      "version": "2.0.0",
      "resolved": "https://registry.npmjs.org/ts-algebra/-/ts-algebra-2.0.0.tgz",
      "integrity": "sha512-FPAhNPFMrkwz76P7cdjdmiShwMynZYN6SgOujD1urY4oNm80Ou9oMdmbR45LotcKOXoy7wSmHkRFE6Mxbrhefw==",
      "license": "MIT"
    }
  }
 }
@@ -4,9 +4,6 @@
    "url": "https://github.com/pewdiepie-archdaemon/odysseus.git"
  },
  "devDependencies": {
-    "@antithesishq/bombadil": "^0.5.0"
+    "@antithesishq/bombadil": "^0.6.1"
  },
  "dependencies": {
    "@anthropic-ai/sdk": "^0.104.1"
  }
 }
@@ -16,6 +16,7 @@ from pydantic import BaseModel
 from core.database import SessionLocal, CrewMember, ScheduledTask
 from src.auth_helpers import get_current_user
 from core.auth import RESERVED_USERNAMES
 from src.task_scheduler import compute_next_run
@@ -89,11 +90,11 @@ def setup_assistant_routes(task_scheduler) -> APIRouter:
    # check-in tasks seeded. Hitting any /assistant route under one of these
    # used to seed a full CrewMember + Morning/Midday/Evening tasks under that
    # owner, which then double-fired alongside the real user's check-ins.
-    _SYNTHETIC_OWNERS = frozenset({"internal-tool", "api", "demo", "system", ""})
+    # RESERVED_USERNAMES covers the same set; the `not owner` guard handles "".
    async def _get_or_create(owner: str) -> CrewMember:
        """Return the per-owner assistant CrewMember, creating it on demand."""
-        if not owner or owner in _SYNTHETIC_OWNERS:
+        if not owner or owner in RESERVED_USERNAMES:
            raise HTTPException(status_code=400, detail=f"Cannot seed assistant for {owner!r}")
        db = SessionLocal()
        try:
@@ -12,8 +12,8 @@ import re
 from pathlib import Path
 from core.atomic_io import atomic_write_json, atomic_write_text
-from core.auth import AuthManager, SetAdminResult
+from core.auth import AuthManager, RESERVED_USERNAMES, SetAdminResult, TOKEN_TTL
-from src.constants import DEEP_RESEARCH_DIR, MEMORY_FILE, SKILLS_DIR
+from src.constants import DEEP_RESEARCH_DIR, MEMORY_FILE, PASSWORD_MIN_LENGTH, SKILLS_DIR
 from src.rate_limiter import RateLimiter
 from src.settings_scrub import scrub_settings
 from src.settings import (
@@ -102,8 +102,12 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
            raise HTTPException(429, "Too many requests — try again later")
        if auth_manager.is_configured:
            raise HTTPException(400, "Already configured")
-        if len(body.password) < 8:
+        if len(body.password) < PASSWORD_MIN_LENGTH:
-            raise HTTPException(400, "Password must be at least 8 characters")
+            raise HTTPException(400, f"Password must be at least {PASSWORD_MIN_LENGTH} characters")
        if len(body.username.strip()) < 1:
            raise HTTPException(400, "Username is required")
        if body.username.lower() in RESERVED_USERNAMES:
            raise HTTPException(403, "Username is reserved")
        ok = await asyncio.to_thread(auth_manager.setup, body.username, body.password)
        if not ok:
            raise HTTPException(500, "Setup failed")
@@ -118,10 +122,12 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
            raise HTTPException(400, "Run setup first")
        if not auth_manager.signup_enabled:
            raise HTTPException(403, "Registration is disabled. Ask an admin for an account.")
-        if len(body.password) < 8:
+        if len(body.password) < PASSWORD_MIN_LENGTH:
-            raise HTTPException(400, "Password must be at least 8 characters")
+            raise HTTPException(400, f"Password must be at least {PASSWORD_MIN_LENGTH} characters")
        if len(body.username.strip()) < 1:
            raise HTTPException(400, "Username is required")
        if body.username.lower() in RESERVED_USERNAMES:
            raise HTTPException(403, "Username is reserved")
        ok = await asyncio.to_thread(auth_manager.create_user, body.username, body.password, is_admin=False)
        if not ok:
            raise HTTPException(409, "Username already taken")
@@ -144,6 +150,8 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
                raise HTTPException(401, "Invalid 2FA code")
        # All checks passed — create session (password already verified above)
        token = await asyncio.to_thread(auth_manager.create_session_trusted, username)
        if not token:
            raise HTTPException(401, "Invalid credentials")
        cookie_kwargs = dict(
            key=SESSION_COOKIE,
            value=token,
@@ -153,7 +161,7 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
            path="/",
        )
        if body.remember:
-            cookie_kwargs["max_age"] = 60 * 60 * 24 * 7  # 7 days
+            cookie_kwargs["max_age"] = TOKEN_TTL
        response.set_cookie(**cookie_kwargs)
        return {"ok": True, "username": username}
@@ -182,13 +190,18 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
            pass
        return result
    @router.get("/policy")
    async def auth_policy():
        """Return public auth policy constants for the frontend."""
        return auth_manager.policy()
    @router.post("/change-password")
    async def change_password(body: ChangePasswordRequest, request: Request):
        user = _get_current_user(request)
        if not user:
            raise HTTPException(401, "Not authenticated")
-        if len(body.new_password) < 8:
+        if len(body.new_password) < PASSWORD_MIN_LENGTH:
-            raise HTTPException(400, "Password must be at least 8 characters")
+            raise HTTPException(400, f"Password must be at least {PASSWORD_MIN_LENGTH} characters")
        current_token = request.cookies.get(SESSION_COOKIE)
        ok = await asyncio.to_thread(auth_manager.change_password, user, body.current_password, body.new_password)
        if not ok:
@@ -268,8 +281,12 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
        user = _get_current_user(request)
        if not user or not auth_manager.is_admin(user):
            raise HTTPException(403, "Admin only")
-        if len(body.password) < 8:
+        if len(body.password) < PASSWORD_MIN_LENGTH:
-            raise HTTPException(400, "Password must be at least 8 characters")
+            raise HTTPException(400, f"Password must be at least {PASSWORD_MIN_LENGTH} characters")
        if len(body.username.strip()) < 1:
            raise HTTPException(400, "Username is required")
        if body.username.lower() in RESERVED_USERNAMES:
            raise HTTPException(403, "Username is reserved")
        ok = auth_manager.create_user(body.username, body.password, body.is_admin)
        if not ok:
            raise HTTPException(409, "Username already taken")
@@ -432,6 +449,23 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
        except Exception as e:
            logger.warning("Failed to rename upload owner references %s -> %s: %s", old_username, new_username, e)
        # direct personal RAG uploads live in per-owner directories and the
        # vector metadata also carries the username used for owner-filtered
        # search. Keep both in sync with the auth rename.
        try:
            from routes.personal_routes import rename_personal_upload_owner
            personal_docs_manager = getattr(request.app.state, "personal_docs_manager", None)
            if personal_docs_manager is not None:
                rag_manager = getattr(personal_docs_manager, "rag_manager", None)
                rename_personal_upload_owner(
                    old_username,
                    new_username,
                    personal_docs_manager=personal_docs_manager,
                    rag_manager=rag_manager,
                )
        except Exception as e:
            logger.warning("Failed to rename personal RAG upload owner references %s -> %s: %s", old_username, new_username, e)
        # skills: SKILL.md frontmatter carries owner: <username>; the usage
        # sidecar (_usage.json) keys entries as owner::skill-name. Both must
        # be updated or the renamed user's Skills panel goes empty.
@@ -23,6 +23,22 @@ from fastapi import HTTPException
 logger = logging.getLogger(__name__)
 # Strong references to in-flight fire-and-forget tasks scheduled from this
 # module. asyncio only keeps weak references to tasks created via
 # create_task, so without this the GC can collect a task mid-execution and
 # the background work (extraction, auto-naming) silently never runs.
 # Mirrors WebhookManager._spawn_tracked from src/webhook_manager.py.
 _BG_TASKS: set[asyncio.Task] = set()
 def _spawn_bg(coro) -> asyncio.Task:
    """Schedule a background task and hold a strong reference until it finishes."""
    task = asyncio.create_task(coro)
    _BG_TASKS.add(task)
    task.add_done_callback(_BG_TASKS.discard)
    return task
 # ── Data containers ────────────────────────────────────────────────────── #
@dataclass
@@ -159,17 +175,9 @@ async def auto_name_session(session_manager, sess):
            return
        owner = getattr(sess, "owner", None)
-        t_url, t_model, t_headers = resolve_task_endpoint(owner=owner)
+        t_url, t_model, t_headers = resolve_task_endpoint(
-        if not t_model:
+            sess.endpoint_url, sess.model, sess.headers, owner=owner
-            # If no task/utility model is configured at all, fall back to
+        )
            # the session's own model so auto-naming still works even on
            # minimal setups.
            from src.endpoint_resolver import resolve_endpoint
            _fallback = resolve_endpoint("default", owner=owner)
            if _fallback and _fallback[1]:
                t_url, t_model, t_headers = _fallback
            else:
                t_url, t_model, t_headers = sess.endpoint_url, sess.model, sess.headers
        if not t_model:
            logger.debug("[auto-name] No model provided, skipping")
            return
@@ -576,7 +584,8 @@ async def build_chat_context(
    if not incognito:
        fire_message_event(request, webhook_manager, session_id, sess, message, compare_mode)
-    # Resolve user prefs
+    # Resolve owner-scoped prefs/context. Browser requests keep the cookie user;
    # bearer-token chat requests use the token owner instead of the "api" sentinel.
    user = effective_user(request)
    uprefs = load_prefs_for_user(user)
@@ -1112,7 +1121,7 @@ def run_post_response_tasks(
            )))
    if _extraction_jobs:
-        asyncio.create_task(_run_extraction_jobs_sequentially(session_id, _extraction_jobs))
+        _spawn_bg(_run_extraction_jobs_sequentially(session_id, _extraction_jobs))
    # Token accumulation
    if last_metrics:
@@ -1127,4 +1136,4 @@ def run_post_response_tasks(
    # Auto-name
    if needs_auto_name(sess.name):
-        asyncio.create_task(auto_name_session(session_manager, sess))
+        _spawn_bg(auto_name_session(session_manager, sess))
@@ -46,8 +46,12 @@ def _ssh_prefix_for_task(task: dict) -> tuple[str, str]:
    shell metacharacters in ``remoteHost`` is rejected with 400 rather than
    injected.
    """
-    host = validate_remote_host((task.get("remoteHost") or "").strip() or None) or ""
+    raw_host = task.get("remoteHost")
-    ssh_port = validate_ssh_port((task.get("sshPort") or "").strip() or None) or ""
+    raw_port = task.get("sshPort")
    host_value = str(raw_host).strip() if raw_host is not None else None
    port_value = str(raw_port).strip() if raw_port is not None else None
    host = validate_remote_host(host_value or None) or ""
    ssh_port = validate_ssh_port(port_value or None) or ""
    port_flag = f"-p {ssh_port} " if ssh_port and ssh_port != "22" else ""
    return host, port_flag
@@ -306,7 +310,10 @@ def setup_codex_routes(
    @router.post("/emails/draft-document")
    async def codex_email_draft_document(request: Request, body: dict[str, Any] = Body(default_factory=dict)):
-        owner = _scope_owner_all(request, {"email:draft", "documents:write"})
+        owner = _scope_owner(request, EMAIL_DRAFT_SCOPES)
        docs_owner = _scope_owner_all(request, DOCS_WRITE_SCOPES)
        if docs_owner != owner:
            raise HTTPException(403, "API token owner mismatch")
        if documents_create_endpoint is None:
            raise HTTPException(503, "Documents integration is not available")
        from routes.document_routes import DocumentCreate
@@ -1284,6 +1284,11 @@ def setup_cookbook_routes() -> APIRouter:
        # LOCAL execution on a native-Windows host never uses tmux (detached
        # process path below), regardless of the UI-supplied platform.
        local_windows = IS_WINDOWS and not remote
        if is_windows and remote and "diffusion_server.py" in req.cmd:
            raise HTTPException(
                400,
                "Remote Windows Diffusers serving is not supported yet; use local Windows or a Linux remote server.",
            )
        if not is_windows and not local_windows and not await _binary_available("tmux", remote, req.ssh_port):
            return {
@@ -102,8 +102,11 @@ def _owner_session_filter(q, user):
    The owner backfill runs in init_db before the app serves requests, so
    by the time this filter is live there are no NULL-owner rows to leak;
-    we therefore match the owner strictly."""
+    we therefore match the owner strictly for authenticated callers."""
-    if user is None:
+    if not user:
        from src.auth_helpers import _auth_disabled
        if user == "" or _auth_disabled():
            return q
        return q.filter(False)
    return q.filter(Document.owner == user)
@@ -1332,6 +1332,12 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
            if not pdf_path:
                raise HTTPException(404, f"Source PDF {upload_id} not found")
            # Fail fast with a clear 503 if the optional PyMuPDF dependency
            # is missing — fill_fields/stamp_annotations will otherwise
            # raise RuntimeError deep inside and bubble out as a 500.
            # Mirrors the convention in _load_pdf_viewer_fitz above.
            _load_pdf_viewer_fitz()
            values = parse_markdown_to_values(doc.current_content or "")
            out_path = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False).name
            _to_unlink.append(out_path)
@@ -2171,12 +2171,10 @@ def setup_email_routes():
        try:
            conn = sqlite3.connect(SCHEDULED_DB)
            conn.row_factory = sqlite3.Row
            # The MCP server can't easily set owner, so it stores '' — fall
            # back to those rows in addition to the caller's owner.
            rows = conn.execute(
                """SELECT id, to_addr, subject, body, created_at, account_id
                   FROM scheduled_emails
-                   WHERE status = 'agent_draft' AND (owner = ? OR owner = '')
+                   WHERE status = 'agent_draft' AND owner = ?
                   ORDER BY created_at DESC""",
                (owner or "",),
            ).fetchall()
@@ -2197,7 +2195,7 @@ def setup_email_routes():
            cur = conn.execute(
                """UPDATE scheduled_emails
                   SET status = 'pending', send_at = ?
-                   WHERE id = ? AND status = 'agent_draft' AND (owner = ? OR owner = '')""",
+                   WHERE id = ? AND status = 'agent_draft' AND owner = ?""",
                (datetime.utcnow().isoformat(), sid, owner or ""),
            )
            conn.commit()
@@ -2218,7 +2216,7 @@ def setup_email_routes():
            conn = sqlite3.connect(SCHEDULED_DB)
            cur = conn.execute(
                """UPDATE scheduled_emails SET status = 'cancelled'
-                   WHERE id = ? AND status = 'agent_draft' AND (owner = ? OR owner = '')""",
+                   WHERE id = ? AND status = 'agent_draft' AND owner = ?""",
                (sid, owner or ""),
            )
            conn.commit()
@@ -67,14 +67,6 @@ def _gallery_image_path(filename: str) -> Path:
        raise HTTPException(400, "Unsafe gallery filename")
    if safe_name != original:
        raise HTTPException(400, "Unsafe gallery filename")
    if not path.exists():
        cwd_root = (Path.cwd() / "data" / "generated_images").resolve()
        cwd_path = (cwd_root / safe_name).resolve()
        try:
            if os.path.commonpath([str(cwd_root), str(cwd_path)]) == str(cwd_root) and cwd_path.exists():
                return cwd_path
        except Exception:
            pass
    return path
@@ -273,65 +273,30 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
    async def api_audit_memories(request: Request, session: str = Form(None)):
        """Deduplicate and consolidate memories via LLM.
-        Uses the default model from settings, or falls back to a session's model.
+        Uses task/utility/default settings through the shared resolver, with
        the active session as fallback when no task or utility model is set.
        Returns before and after memory counts.
        """
        from routes.model_routes import _load_settings, _normalize_base, build_chat_url
        from core.database import ModelEndpoint
        import json as _json
        endpoint_url = model = None
        headers = {}
        # Try utility model from settings first — memory audit is a background
        # task and should prefer the lighter utility model over the main chat model.
        from src.task_endpoint import resolve_task_endpoint
        user = _owner(request)
-        t_url, t_model, t_headers = resolve_task_endpoint(owner=user)
+        fallback_url = fallback_model = None
-        if t_url and t_model:
+        fallback_headers = None
-            endpoint_url, model, headers = t_url, t_model, t_headers
+        if session:
        else:
            # Fall back to default model if no task/utility model configured
            settings = _load_settings()
            ep_id = settings.get("default_endpoint_id", "")
            default_model = settings.get("default_model", "")
            if ep_id:
                db = SessionLocal()
                try:
                    ep = db.query(ModelEndpoint).filter(
                        ModelEndpoint.id == ep_id, ModelEndpoint.is_enabled == True
                    ).first()
                    if ep:
                        base = _normalize_base(ep.base_url)
                        endpoint_url = build_chat_url(base)
                        model = default_model
                        if not model and ep.models:
                            try:
                                models = _json.loads(ep.models) if isinstance(ep.models, str) else ep.models
                                if models:
                                    model = models[0]
                            except Exception:
                                pass
                        if ep.api_key:
                            headers = {"Authorization": f"Bearer {ep.api_key}"}
                finally:
                    db.close()
            # Fall back to session model if no default configured
            if not endpoint_url and session:
            try:
                sess = session_manager.get_session(session)
-                    _assert_session_owner(sess, _owner(request))
+                _assert_session_owner(sess, user)
-                    endpoint_url = sess.endpoint_url
+                fallback_url = sess.endpoint_url
-                    model = sess.model
+                fallback_model = sess.model
-                    headers = sess.headers
+                fallback_headers = sess.headers
            except KeyError:
                pass
        endpoint_url, model, headers = resolve_task_endpoint(
            fallback_url, fallback_model, fallback_headers, owner=user
        )
        if not endpoint_url or not model:
            raise HTTPException(400, "No default model configured — set one in Settings")
        user = _owner(request)
        result = await audit_memories(
            memory_manager,
            memory_vector,
@@ -369,18 +334,28 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
        model = None
        headers = {}
        user = _owner(request)
        if session:
            try:
                sess = session_manager.get_session(session)
-                _assert_session_owner(sess, _owner(request))
+                _assert_session_owner(sess, user)
                endpoint_url, model, headers = resolve_task_endpoint(
                    sess.endpoint_url, sess.model, sess.headers, owner=_owner(request)
                )
            except KeyError:
-                logger.warning("Session %s not found, falling back to utility endpoint", session)
+                sess = None
-                endpoint_url, model, headers = resolve_endpoint("utility", owner=_owner(request))
+            except HTTPException as exc:
                if exc.status_code != 404:
                    raise
                sess = None
            if sess is None:
                logger.warning("Session %s not found or inaccessible, falling back to utility endpoint", session)
                endpoint_url, model, headers = resolve_endpoint("utility", owner=user)
            else:
-            endpoint_url, model, headers = resolve_task_endpoint(owner=_owner(request))
+                endpoint_url, model, headers = resolve_task_endpoint(
                    sess.endpoint_url, sess.model, sess.headers, owner=user
                )
        else:
            endpoint_url, model, headers = resolve_task_endpoint(owner=user)
        if not endpoint_url or not model:
            raise HTTPException(400, "No LLM model configured. Set a default model in Settings.")
@@ -5,6 +5,7 @@ import re
 import uuid
 import json
 import hashlib
 import ipaddress
 import socket
 import time as _time
 import logging
@@ -562,6 +563,8 @@ def _safe_build_models_url(base_url: str) -> str:
    """Build a /models URL without letting optional provider imports break probes."""
    try:
        return build_models_url(base_url)
    except ValueError:
        raise
    except Exception as exc:
        logger.debug("Model URL detection failed for %s: %s", base_url, exc)
        return f"{(base_url or '').rstrip('/')}/models"
@@ -633,7 +636,7 @@ def _probe_single_model(base: str, api_key: str, model_id: str, timeout: int = 1
    try:
        t0 = _time.time()
-        r = httpx.post(target_url, headers=h, json=payload, timeout=timeout)
+        r = httpx.post(target_url, headers=h, json=payload, timeout=timeout, verify=llm_verify())
        latency = round((_time.time() - t0) * 1000)
        if r.is_success:
            return {"status": "ok", "latency_ms": latency}
@@ -659,13 +662,20 @@ def _probe_single_model(base: str, api_key: str, model_id: str, timeout: int = 1
 # Hostnames / IP prefixes that indicate a local endpoint
 _LOCAL_HOSTS = {"localhost", "127.0.0.1", "0.0.0.0", "::1"}
-_PRIVATE_PREFIXES = ("10.", "172.16.", "172.17.", "172.18.", "172.19.",
+_PRIVATE_NETWORKS = (
-                     "172.20.", "172.21.", "172.22.", "172.23.", "172.24.",
+    ipaddress.ip_network("10.0.0.0/8"),
-                     "172.25.", "172.26.", "172.27.", "172.28.", "172.29.",
+    ipaddress.ip_network("172.16.0.0/12"),
-                     "172.30.", "172.31.", "192.168.")
+    ipaddress.ip_network("192.168.0.0/16"),
 )
 _TAILSCALE_CGNAT = ipaddress.ip_network("100.64.0.0/10")
-_TAILSCALE_RE = re.compile(r"^100\.(6[4-9]|[7-9]\d|1[01]\d|12[0-7])\.")
+def _local_ip_literal(host: str) -> bool:
    try:
        ip = ipaddress.ip_address(host)
    except ValueError:
        return False
    return any(ip in network for network in _PRIVATE_NETWORKS) or ip in _TAILSCALE_CGNAT
 def _classify_endpoint(base_url: str, endpoint_kind: str = "auto") -> str:
@@ -679,9 +689,7 @@ def _classify_endpoint(base_url: str, endpoint_kind: str = "auto") -> str:
        return "api"
    try:
        host = urlparse(base_url).hostname or ""
-        if host in _LOCAL_HOSTS or host.startswith(_PRIVATE_PREFIXES):
+        if host in _LOCAL_HOSTS or _local_ip_literal(host):
            return "local"
        if _TAILSCALE_RE.match(host):
            return "local"
    except Exception:
        pass
@@ -10,6 +10,7 @@ from fastapi import APIRouter, HTTPException, Request
 from pydantic import BaseModel
 from core.database import SessionLocal, Note
 from core.middleware import INTERNAL_TOOL_USER
 from src.auth_helpers import require_user
 from src.constants import DATA_DIR
 from sqlalchemy.orm.attributes import flag_modified
@@ -582,7 +583,7 @@ def setup_note_routes(task_scheduler=None):
        return require_user(request) or None
    def _is_admin_or_single_user(request: Request, user: str | None) -> bool:
-        if user == "internal-tool":
+        if user == INTERNAL_TOOL_USER:
            return True
        if not user:
            # require_user() already admitted this request, which only happens
@@ -2,8 +2,9 @@
 """Routes for personal documents management."""
 import os
 import logging
 import shutil
 import uuid
-from typing import List, Tuple
+from typing import Any, Dict, List, Tuple
 from fastapi import APIRouter, HTTPException, Query, Request, UploadFile, File, Depends
 from src.request_models import DirectoryRequest
 from core.constants import BASE_DIR, PERSONAL_DIR, PERSONAL_UPLOADS_DIR
@@ -18,13 +19,14 @@ UPLOADS_DIR = PERSONAL_UPLOADS_DIR
 logger = logging.getLogger(__name__)
-def _personal_upload_dir_for_owner(owner: str | None) -> str:
+def _personal_upload_dir_for_owner(owner: str | None, *, create: bool = True) -> str:
    """Return the per-owner upload directory used for direct RAG uploads."""
    owner_segment = secure_filename((owner or "local").strip())[:80] or "local"
    upload_dir = os.path.abspath(os.path.join(UPLOADS_DIR, owner_segment))
    base_abs = os.path.abspath(UPLOADS_DIR)
    if os.path.commonpath([upload_dir, base_abs]) != base_abs:
        raise ValueError("Unsafe upload owner path")
    if create:
        os.makedirs(upload_dir, exist_ok=True)
    return upload_dir
@@ -44,6 +46,87 @@ def _unique_personal_upload_path(upload_dir: str, original_name: str | None) ->
        raise ValueError("Unsafe upload filename")
    return file_path, filename, safe_name
 def _unique_existing_target(path: str) -> str:
    """Return a non-existing sibling path for rename collision handling."""
    if not os.path.exists(path):
        return path
    stem, ext = os.path.splitext(path)
    while True:
        candidate = f"{stem}-{uuid.uuid4().hex[:10]}{ext}"
        if not os.path.exists(candidate):
            return candidate
 def _remove_empty_tree(path: str) -> None:
    """Best-effort removal of empty directories under ``path``."""
    if not os.path.isdir(path):
        return
    for root, dirs, _files in os.walk(path, topdown=False):
        for dirname in dirs:
            candidate = os.path.join(root, dirname)
            try:
                os.rmdir(candidate)
            except OSError:
                pass
    try:
        os.rmdir(path)
    except OSError:
        pass
 def rename_personal_upload_owner(
    old_owner: str,
    new_owner: str,
    *,
    personal_docs_manager: Any = None,
    rag_manager: Any = None,
 ) -> Dict[str, Any]:
    """Move direct personal uploads and rewrite RAG owner metadata on user rename."""
    old_dir = _personal_upload_dir_for_owner(old_owner, create=False)
    new_dir = _personal_upload_dir_for_owner(new_owner, create=False)
    path_map: Dict[str, str] = {}
    moved_files = 0
    if os.path.isdir(old_dir) and old_dir != new_dir:
        os.makedirs(new_dir, exist_ok=True)
        for root, _dirs, files in os.walk(old_dir):
            rel_root = os.path.relpath(root, old_dir)
            target_root = new_dir if rel_root == "." else os.path.join(new_dir, rel_root)
            os.makedirs(target_root, exist_ok=True)
            for filename in files:
                source = os.path.abspath(os.path.join(root, filename))
                target = _unique_existing_target(os.path.abspath(os.path.join(target_root, filename)))
                shutil.move(source, target)
                path_map[source] = target
                moved_files += 1
        _remove_empty_tree(old_dir)
    if personal_docs_manager is not None:
        rename_directory = getattr(personal_docs_manager, "rename_directory", None)
        if callable(rename_directory):
            rename_directory(old_dir, new_dir, path_map=path_map)
    rag_result = None
    if rag_manager is not None:
        rename_owner = getattr(rag_manager, "rename_owner", None)
        if callable(rename_owner):
            rag_result = rename_owner(
                old_owner,
                new_owner,
                path_map=path_map,
                path_prefixes=[(old_dir, new_dir)],
            )
    return {
        "old_dir": old_dir,
        "new_dir": new_dir,
        "moved_files": moved_files,
        "path_map": path_map,
        "rag_result": rag_result,
    }
 def setup_personal_routes(personal_docs_manager, rag_manager, rag_available):
    """
    Setup personal documents related routes.
@@ -12,8 +12,10 @@ from typing import Optional
 from fastapi import APIRouter, HTTPException, Query, Request
 from fastapi.responses import HTMLResponse, StreamingResponse
 from pydantic import BaseModel, Field
 from core.middleware import INTERNAL_TOOL_USER
 from src.endpoint_resolver import resolve_endpoint
 from src.auth_helpers import _auth_disabled, get_current_user
 from core.auth import RESERVED_USERNAMES
 from src.constants import DEEP_RESEARCH_DIR
 _SESSION_ID_RE = re.compile(r"^[a-zA-Z0-9-]{1,128}$")
@@ -385,9 +387,9 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
        """Launch a research job from the dedicated panel."""
        from src.auth_helpers import require_privilege
        user = require_privilege(request, "can_use_research")
-        if user == "internal-tool":
+        if user == INTERNAL_TOOL_USER:
            tool_owner = (request.headers.get("X-Odysseus-Owner") or "").strip()
-            if tool_owner and tool_owner not in {"internal-tool", "api", "demo", "system"}:
+            if tool_owner and tool_owner not in RESERVED_USERNAMES:
                auth_mgr = getattr(request.app.state, "auth_manager", None)
                if auth_mgr is not None and getattr(auth_mgr, "is_configured", False):
                    try:
@@ -1004,6 +1004,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
        """
        from src.llm_core import llm_call
        user = effective_user(request)
        single_user_mode = not user and _auth_disabled()
        user_sessions = session_manager.get_sessions_for_user(user)
        # Delete empty and throwaway sessions before sorting
@@ -1022,7 +1023,12 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
        }
        _THROWAWAY_MAX_MESSAGES = 4  # only delete if <= this many messages
        try:
-            rows = db.query(DbSession).filter(DbSession.archived == False, DbSession.owner == user).limit(2000).all()
+            rows_q = db.query(DbSession).filter(DbSession.archived == False)
            if user:
                rows_q = rows_q.filter(DbSession.owner == user)
            elif not single_user_mode:
                rows_q = rows_q.filter(DbSession.owner == user)
            rows = rows_q.limit(2000).all()
            folder_map = {r.id: r.folder for r in rows}
            # Precompute per-session message counts in TWO aggregate queries
            # instead of 1–3 queries PER session — with many chats the per-row
@@ -1242,7 +1248,12 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
        db = SessionLocal()
        try:
            for sid, folder_name in assignments.items():
-                db_session = db.query(DbSession).filter(DbSession.id == sid, DbSession.owner == user).first()
+                db_session_q = db.query(DbSession).filter(DbSession.id == sid)
                if user:
                    db_session_q = db_session_q.filter(DbSession.owner == user)
                elif not single_user_mode:
                    db_session_q = db_session_q.filter(DbSession.owner == user)
                db_session = db_session_q.first()
                if db_session:
                    db_session.folder = folder_name
                    db_session.updated_at = datetime.utcnow()
@@ -15,6 +15,7 @@ from collections import namedtuple
 from pathlib import Path
 from typing import Dict, Any
 from core.platform_compat import IS_APPLE_SILICON, which_tool
 from core.middleware import INTERNAL_TOOL_USER
 from src.optional_deps import prepare_optional_dependency_import
 # POSIX-only: `pty`/`fcntl` transitively import `termios`, which does NOT exist
@@ -55,7 +56,7 @@ def _require_admin(request: Request):
    # In-process tool loopback. The AuthMiddleware already validated the
    # internal token + loopback client before setting this marker, so
    # honour it here as admin-equivalent.
-    if user == "internal-tool":
+    if user == INTERNAL_TOOL_USER:
        return
    if not user or user == "api":
        raise HTTPException(403, "Admin only")
@@ -11,6 +11,7 @@ from fastapi import APIRouter, HTTPException, Request
 from pydantic import BaseModel
 from core.database import SessionLocal, ScheduledTask, TaskRun
 from core.middleware import INTERNAL_TOOL_USER
 from core.constants import internal_api_base
 from src.auth_helpers import get_current_user
 from src.constants import DATA_DIR, EMAIL_URGENCY_CACHE_DIR
@@ -427,7 +428,7 @@ def setup_task_routes(task_scheduler) -> APIRouter:
        # In-process tool-loopback marker — AuthMiddleware validated
        # the internal token + loopback client before stamping this,
        # so treat as admin-equivalent.
-        if user == "internal-tool":
+        if user == INTERNAL_TOOL_USER:
            return True
        try:
            from core.auth import AuthManager
@@ -103,9 +103,13 @@ def cmd_list(args) -> None:
    end = _parse_dt(args.end) if args.end else (start + timedelta(days=30))
    db = SessionLocal()
    try:
        # Overlap semantics, matching the web route (routes/calendar_routes.py)
        # and the recurring-expansion contract: an event is in the window when
        # it starts before the window end AND ends after the window start. This
        # includes multi-day / in-progress events that began before `start`.
        q = db.query(CalendarEvent).filter(
            CalendarEvent.dtstart >= start,
            CalendarEvent.dtstart < end,
            CalendarEvent.dtend > start,
        )
        if args.calendar:
            cal = db.query(CalendarCal).filter(CalendarCal.name == args.calendar).first()
@@ -130,6 +130,44 @@ def _lookup_bandwidth(system):
    return None
 def _canonical_cpu_backend(system):
    """Return the canonical CPU backend for cpu_only speed estimation.
    Normalizes CPU-architecture aliases separately from the GPU backend, and
    overrides GPU-only backends (CUDA/ROCm/Metal) so they do not inherit a
    discrete-GPU fallback constant when the model is actually running on CPU.
    """
    backend = (system.get("backend") or "").lower().strip()
    cpu_arch = (system.get("cpu_arch") or "").lower().strip()
    cpu_name = (system.get("cpu_name") or "").lower()
    gpu_name = (system.get("gpu_name") or "").lower()
    # Already-canonical CPU backends
    if backend in ("cpu_x86", "cpu_arm"):
        return backend
    # Raw CPU-architecture aliases. Treat plain "arm" as 32-bit ARM, not the
    # ARM64-class CPU fallback used for Apple Silicon/aarch64 machines.
    if backend in ("x86_64", "amd64", "i386", "i686"):
        return "cpu_x86"
    if backend in ("arm64", "aarch64"):
        return "cpu_arm"
    # Prefer an explicit CPU architecture field when present
    if cpu_arch:
        if cpu_arch in ("x86_64", "amd64", "x86", "i386", "i686"):
            return "cpu_x86"
        if cpu_arch in ("arm64", "aarch64"):
            return "cpu_arm"
    # Apple Silicon enters ranking as backend="metal"; its CPU path is ARM.
    if backend in ("metal", "mps", "apple") or "apple" in cpu_name or "apple" in gpu_name:
        return "cpu_arm"
    # Conservative default for CUDA/ROCm/discrete GPU backends and unknowns.
    return "cpu_x86"
 def _estimate_speed(model, quant, run_mode, system, offload_frac=0.0):
    """Estimate tok/s. Uses active params for MoE (only active experts run per token).
@@ -147,6 +185,11 @@ def _estimate_speed(model, quant, run_mode, system, offload_frac=0.0):
    bw = _lookup_bandwidth(system)
    backend = system.get("backend", "cpu_x86")
    # CPU-only inference must never inherit a GPU backend's fallback constant,
    # even if the detected system happens to report a CUDA/Metal/ROCm backend.
    if run_mode == "cpu_only":
        backend = _canonical_cpu_backend(system)
    if bw and run_mode in ("gpu", "cpu_offload"):
        bpp = QUANT_BYTES_PER_PARAM.get(quant, 0.5)
        model_gb = pb * bpp
@@ -320,7 +320,7 @@ def _detect_apple_silicon():
    # Only Apple Silicon (arm64) has a Metal GPU worth serving LLMs on; Intel
    # Macs fall through to the CPU path.
-    if "arm" not in arch and "aarch64" not in arch:
+    if _canonical_cpu_arch(arch) != "arm64":
        return None
    # Chip name, e.g. "Apple M4 Max" — carries the Pro/Max/Ultra variant that
@@ -503,6 +503,25 @@ def _get_cpu_count():
    return os.cpu_count() or 1
 def _canonical_cpu_arch(value):
    arch = str(value or "").lower().strip().replace("-", "_")
    if arch in ("x86_64", "amd64", "x64"):
        return "x86_64"
    if arch in ("i386", "i686", "x86"):
        return "x86"
    if arch in ("arm64", "aarch64"):
        return "arm64"
    if arch == "arm" or arch.startswith("armv"):
        return "arm"
    return arch
 def _get_cpu_arch():
    if _remote_host:
        return _canonical_cpu_arch(_run(["uname", "-m"]) or "")
    return _canonical_cpu_arch(platform.machine())
 def _powershell_exe():
    """Pick the best PowerShell executable for LOCAL execution: prefer pwsh
    (PowerShell 7+), fall back to Windows PowerShell 5.1. Returns an absolute
@@ -528,6 +547,7 @@ def _detect_windows():
        $r.cpu_name = $cpu.Name
        $r.cpu_cores = (Get-CimInstance Win32_Processor | Measure-Object -Property NumberOfLogicalProcessors -Sum).Sum
        $r.arch = $cpu.AddressWidth
        $r.cpu_arch = if ($env:PROCESSOR_ARCHITEW6432) { $env:PROCESSOR_ARCHITEW6432 } else { $env:PROCESSOR_ARCHITECTURE }
        # GPU detection via nvidia-smi (fastest) or WMI fallback
        try { 
            $nv = nvidia-smi --query-gpu=memory.total,name --format=csv,noheader,nounits 2>$null
@@ -599,6 +619,7 @@ def _detect_windows():
            "available_ram_gb": d.get("avail_gb", 0),
            "cpu_cores": _as_int(d.get("cpu_cores"), 1),
            "cpu_name": _cpu_name,
            "cpu_arch": _canonical_cpu_arch(d.get("cpu_arch")),
            "has_gpu": bool(d.get("gpu_name")),
            "gpu_name": d.get("gpu_name"),
            "gpu_vram_gb": d.get("gpu_vram_gb"),
@@ -794,6 +815,7 @@ def detect_system(host="", ssh_port="", platform="", fresh=False):
    available_ram = round(_get_available_ram_gb(), 1)
    cpu_cores = _get_cpu_count()
    cpu_name = _get_cpu_name()
    cpu_arch = _get_cpu_arch()
    gpu_info = _detect_apple_silicon() or _detect_nvidia() or _detect_amd()
@@ -803,6 +825,7 @@ def detect_system(host="", ssh_port="", platform="", fresh=False):
            "available_ram_gb": available_ram,
            "cpu_cores": cpu_cores,
            "cpu_name": cpu_name,
            "cpu_arch": cpu_arch,
            "has_gpu": True,
            "gpu_name": gpu_info["gpu_name"],
            "gpu_vram_gb": gpu_info["gpu_vram_gb"],
@@ -817,17 +840,13 @@ def detect_system(host="", ssh_port="", platform="", fresh=False):
            "unified_memory": gpu_info.get("unified_memory", False),
        }
    else:
-        if _remote_host:
+        backend = "cpu_arm" if cpu_arch == "arm64" else "cpu_x86"
            arch_out = _run(["uname", "-m"]) or ""
        else:
            import platform as _platform
            arch_out = _platform.machine().lower()
        backend = "cpu_arm" if "aarch64" in arch_out or "arm" in arch_out else "cpu_x86"
        result = {
            "total_ram_gb": total_ram,
            "available_ram_gb": available_ram,
            "cpu_cores": cpu_cores,
            "cpu_name": cpu_name,
            "cpu_arch": cpu_arch,
            "has_gpu": False,
            "gpu_name": None,
            "gpu_vram_gb": None,
@@ -16,8 +16,9 @@ sys.path.insert(0, BASE_DIR)
 from src.constants import (
    DATA_DIR, AUTH_FILE, UPLOAD_DIR, PERSONAL_DIR, PERSONAL_UPLOADS_DIR,
    TTS_CACHE_DIR, GENERATED_IMAGES_DIR, DEEP_RESEARCH_DIR, CHROMA_DIR,
-    RAG_DIR, MEMORY_VECTORS_DIR,
+    RAG_DIR, MEMORY_VECTORS_DIR, PASSWORD_MIN_LENGTH,
 )
 from core.auth import RESERVED_USERNAMES
 DIRS = [
    DATA_DIR,
@@ -59,15 +60,23 @@ def _prompt_admin_credentials():
    print("  (Press Enter to accept defaults)")
    print()
    while True:
        username = input("  Username [admin]: ").strip().lower()
        if not username:
            username = "admin"
        if username in RESERVED_USERNAMES:
            print(f"  '{username}' is a reserved username. Choose another.")
            continue
        break
    while True:
        password = getpass.getpass("  Password: ")
        if not password:
            print("  Password cannot be empty.")
            continue
        if len(password) < PASSWORD_MIN_LENGTH:
            print(f"  Password must be at least {PASSWORD_MIN_LENGTH} characters.")
            continue
        confirm = getpass.getpass("  Confirm password: ")
        if password != confirm:
            print("  Passwords don't match. Try again.")
@@ -93,8 +102,13 @@ def create_default_admin():
        password = os.getenv("ODYSSEUS_ADMIN_PASSWORD", "").strip()
        if username and password:
-            # Both provided via env — use them directly
+            # Both provided via env — validate before using
-            pass
+            if username in RESERVED_USERNAMES:
                print(f"  [error] ODYSSEUS_ADMIN_USER '{username}' is a reserved username")
                return "failed"
            if len(password) < PASSWORD_MIN_LENGTH:
                print(f"  [error] ODYSSEUS_ADMIN_PASSWORD must be at least {PASSWORD_MIN_LENGTH} characters")
                return "failed"
        elif sys.stdin.isatty() and not os.getenv("ODYSSEUS_SKIP_ADMIN_PROMPT"):
            # Interactive terminal — ask the user
            username, password = _prompt_admin_credentials()
@@ -0,0 +1,412 @@
 # Architecture Runtime Inventory
 > **Purpose**: Phase 0 planning baseline for codebase readability improvements (#4071).
 > **Parent issue**: [#4082](https://github.com/pewdiepie-archdaemon/odysseus/issues/4082)
 > **Last updated**: dev@b58af42 | 2026-06-16
 > **Status**: Draft — to be reviewed before follow-up slices open.
 > **Snapshot basis**: Importer / file / import-line counts are refreshed to `dev@b58af42` (2026-06-16) and are recomputable via the commands in §3.4. **Line counts** in §2.1 / §2.2 are a snapshot from an earlier baseline and drift as `dev` moves — recompute any of them with `wc -l <file>`. This inventory tracks structure and risk, not live metrics.
 This document maps the current runtime module structure, identifies high-risk boundaries, and recommends safe first refactor slices. It does **not** move files, change imports, or alter runtime behavior.
 ---
 ## 1. Current Structure Overview
 ### 1.1 Top-Level Layout
 ```
 odysseus/
 ├── app.py                    # FastAPI app entrypoint (1,145 lines)
 ├── conf/                     # Configuration (config.py, settings.py, settings_scrub.py)
 ├── src/                      # 95 flat .py files + 2 subdirectories
 │   ├── agent_tools/          # Tool helpers: document, filesystem, subprocess, web
 │   └── search/               # Search subsystem
 ├── routes/                   # 54 flat .py files — HTTP route handlers
 ├── core/                     # 10 files — database models, auth, middleware, session
 ├── mcp_servers/              # 5 files — MCP server implementations
 ├── scripts/                  # CLI tools and one-shot scripts
 ├── static/                   # Frontend HTML/CSS/JS
 ├── tests/                    # 583 test files (~54,800 lines)
 └── services/                 # (exists as needed)
 ```
 ### 1.2 Directory Flatness Metric
 | Directory | Flat `.py` Files | Subdirectories | Concern |
 |-----------|-----------------|----------------|---------|
 | `src/` | **95** | 2 (`agent_tools/`, `search/`) | No domain grouping; 95 files in one directory |
 | `routes/` | **54** | 0 | All route handlers in one flat directory |
 | `core/` | 10 | 0 | Manageable, but `database.py` is oversized |
 ---
 ## 2. Largest Runtime Modules
 ### 2.1 Python Backend
 | Rank | File | Lines | Classes | Functions | Risk |
 |------|------|-------|---------|-----------|------|
 | 1 | `src/tool_implementations.py` | **4,032** | 0 | ~48 | **HIGH** |
 | 2 | `routes/email_routes.py` | **3,245** | — | — | **MEDIUM** |
 | 3 | `routes/cookbook_routes.py` | **2,969** | — | — | **MEDIUM** |
 | 4 | `src/agent_loop.py` | **2,961** | 0 | ~24 | **HIGH** |
 | 5 | `src/task_scheduler.py` | **2,330** | — | 5 | MEDIUM |
 | 6 | `routes/model_routes.py` | **2,266** | — | — | MEDIUM |
 | 7 | `core/database.py` | **2,265** | 28 | ~59 helpers | **HIGH** |
 | 8 | `src/builtin_actions.py` | **2,262** | 2 | ~24 | MEDIUM |
 | 9 | `src/llm_core.py` | **2,164** | — | — | MEDIUM |
 | 10 | `mcp_servers/email_server.py` | 2,197 | — | — | LOW (separate process) |
 | 11 | `src/visual_report.py` | 1,918 | — | — | LOW |
 | 12 | `routes/gallery_routes.py` | 1,896 | — | — | LOW |
 | 13 | `src/ai_interaction.py` | 1,846 | — | — | MEDIUM |
 | 14 | `routes/document_routes.py` | 1,717 | — | — | LOW |
 | 15 | `routes/skills_routes.py` | 1,648 | — | — | LOW |
 **Heuristic**: Files > 2,000 lines with 20+ public symbols and many importers are the highest-risk splits. Files 1,000–2,000 lines are medium-risk if tightly coupled.
 ### 2.2 Frontend
 | File | Lines | Concern |
 |------|-------|---------|
 | `static/style.css` | **36,653** | Entire app CSS in one file (tracked separately in #2617) |
 | `static/js/document.js` | **9,776** | Single JS file for document functionality |
 | `static/js/slashCommands.js` | 6,498 | |
 | `static/js/settings.js` | 5,266 | |
 | `static/js/emailLibrary.js` | 5,217 | |
 | `static/js/notes.js` | 5,124 | |
 | `static/js/chat.js` | 4,985 | |
 | `static/app.js` | 4,090 | |
 **Note**: Frontend modularization is tracked separately in #2617 (CSS) and is not the focus of this Phase 0 inventory. Frontend is listed here for completeness but follow-up slices should target Python backend boundaries first.
 ---
 ## 3. Import Dependency Graph
 ### 3.1 Who Depends on `core/database.py`
 **102 files** import from `core.database` — this is the most depended-upon module:
 - All route handlers (`routes/*.py`)
 - Most `src/*.py` files
 - `core/session_manager.py`, `core/auth.py`
 - Multiple test files
 **Implication**: Any split of `core/database.py` is the highest-risk refactor. It should be tackled **last**, never first.
 ### 3.2 Who Depends on `src/tool_implementations.py`
 **17 files** import from `src.tool_implementations`:
 - `src/agent_loop.py`, `src/builtin_actions.py`, `src/tool_index.py`
 - `src/task_scheduler.py`, `src/tool_policy.py`
 - Various tests
 ### 3.3 Who Depends on `src/agent_loop.py`
 **22 files** import from `src.agent_loop`:
 - `src/tool_policy.py`, `src/teacher_escalation.py`, `src/bg_monitor.py`
 - `src/task_scheduler.py`
 - Multiple test files
 ### 3.4 Cross-Layer Import Violations
 **`src/` importing from `routes/`** (backwards dependency — domain logic depending on HTTP layer):
 ```
 src/tool_implementations.py ──→ routes/calendar_routes.py
 src/tool_implementations.py ──→ routes/cookbook_helpers.py
 src/tool_implementations.py ──→ routes/email_helpers.py
 src/tool_implementations.py ──→ routes/email_pollers.py
 src/tool_implementations.py ──→ routes/email_routes.py
 src/tool_implementations.py ──→ routes/model_routes.py
 src/tool_implementations.py ──→ routes/note_routes.py
 src/tool_implementations.py ──→ routes/prefs_routes.py
 ```
 > These are **runtime imports** (inside function bodies, not at module top), which mitigates circular import risk but indicates fuzzy layer boundaries. Function-level inline imports from the HTTP layer into business logic are a code smell.
 **Import counts (top-level)**:
 | Direction | Count | Notes |
 |-----------|-------|-------|
 | `routes/` → `src/` | **374** | Expected: HTTP handlers call domain logic |
 | `routes/` → `core/` | **126** | Expected: handlers access DB models |
 | `src/` → `routes/` | **31** | **Unexpected**: domain logic reaching into HTTP layer (direct grep of import lines referencing `routes/`) |
 | `src/` → `core/` | **106** | Acceptable but could be reduced with a data-access layer |
 > **How the metrics in this document are computed** — recompute against current `dev` before treating any count as authoritative (the tree drifts; these numbers are a snapshot, not a live value):
 > - `src/` flat `.py` files: `find src -maxdepth 1 -name '*.py' | wc -l`
 > - `tests/` test files: `find tests -name 'test_*.py' | wc -l`
 > - `core.database` importers: `grep -rlE '(from|import) +core\.database' --include='*.py' . | grep -v core/database.py | wc -l`
 > - `src.agent_loop` importers: `grep -rlE '(from|import) +src\.agent_loop' --include='*.py' . | grep -v src/agent_loop.py | wc -l`
 > - Cross-layer import lines: `grep -rhE '(from|import) +<pkg>' --include='*.py' <dir>/ | wc -l` (e.g. `(from|import) +routes` over `src/`)
 ---
 ## 4. Route Ownership Map
 Routes can be grouped into logical feature domains. Current flat structure obscures these boundaries:
 | Domain | Route Files | Total Lines | Review Complexity |
 |--------|-------------|-------------|-------------------|
 | **Email** | `email_routes.py`, `email_helpers.py`, `email_pollers.py` | 5,936 | HIGH — most complex domain |
 | **Chat / Agent** | `chat_routes.py`, `chat_helpers.py`, `shell_routes.py`, `codex_routes.py`, `skills_routes.py` | 6,365 | HIGH — core interaction surface |
 | **Cookbook** | `cookbook_routes.py`, `cookbook_helpers.py`, `cookbook_output.py` | 4,110 | MEDIUM |
 | **Model / LLM** | `model_routes.py`, `assistant_routes.py`, `copilot_routes.py` | 2,764 | MEDIUM |
 | **Calendar / Contacts** | `calendar_routes.py`, `contacts_routes.py` | 2,336 | MEDIUM |
 | **Documents** | `document_routes.py`, `document_helpers.py` | 1,954 | LOW |
 | **Auth** | `auth_routes.py`, `api_token_routes.py`, `device_flow.py` | 1,171 | LOW |
 | **Tasks** | `task_routes.py` (standalone) | 1,157 | LOW |
 | **Session** | `session_routes.py` (standalone) | 1,287 | LOW |
 | **Gallery** | `gallery_routes.py`, `gallery_helpers.py` | 1,896 | LOW |
 | **Memory** | `memory_routes.py` | — | LOW |
 | **Research** | `research_routes.py` | — | LOW |
 | **MCP** | `mcp_routes.py` | — | LOW |
 | **Notes** | `note_routes.py` | — | LOW |
 | **Other** | `prefs_routes.py`, `upload_routes.py`, `vault_routes.py`, `webhook_routes.py`, `workspace_routes.py`, `search_routes.py`, `history_routes.py`, `hwfit_routes.py`, `preset_routes.py`, `signature_routes.py`, `backup_routes.py`, `cleanup_routes.py`, `diagnostics_routes.py`, `embedding_routes.py`, `emoji_routes.py`, `font_routes.py`, `stt_routes.py`, `tts_routes.py`, `compare_routes.py`, `personal_routes.py`, `editor_draft_routes.py`, `admin_wipe_routes.py`, `chatgpt_subscription_routes.py` | 2,000+ | LOW individual, HIGH cumulative |
 ---
 ## 5. Tool Registry & Implementation Boundaries
 ### 5.1 Current Tool Architecture
 | Component | File | Lines | Role |
 |-----------|------|-------|------|
 | Tool schemas | `src/tool_schemas.py` | 1,392 | JSON Schema tool definitions (Duck-TypedDict) |
 | Tool index | `src/tool_index.py` | 542 | RAG-based tool retrieval from ChromaDB |
 | Tool implementations | `src/tool_implementations.py` | 4,032 | 33 `do_*` functions — all tool execution logic |
 | Tool security | `src/tool_security.py` | — | Owner-scoped tool blocking |
 | Tool policy | `src/tool_policy.py` | — | Guide-only directive, plan-mode disabled tools |
 | Tool utils | `src/tool_utils.py` | — | Shared tool helpers |
 ### 5.2 Tool Implementation Categories
 The 33 `do_*` functions in `tool_implementations.py` fall into natural domain groups — the basis for slice 1's split in §6.2:
 | Category | `do_*` functions | Count |
 |----------|------------------|-------|
 | **System / config** | `do_manage_skills`, `do_manage_tasks`, `do_manage_endpoints`, `do_manage_mcp`, `do_manage_webhooks`, `do_manage_tokens`, `do_manage_settings`, `do_api_call`, `do_app_api` | 9 |
 | **Cookbook / model serving** | `do_download_model`, `do_serve_model`, `do_list_served_models`, `do_stop_served_model`, `do_tail_serve_output`, `do_list_downloads`, `do_cancel_download`, `do_search_hf_models`, `do_adopt_served_model`, `do_list_cookbook_servers`, `do_list_serve_presets`, `do_serve_preset`, `do_list_cached_models` | 13 |
 | **Notes** | `do_manage_notes` | 1 |
 | **Calendar** | `do_manage_calendar` | 1 |
 | **Search** | `do_search_chats` | 1 |
 | **Research** | `do_manage_research`, `do_trigger_research` | 2 |
 | **Contacts** | `do_resolve_contact`, `do_manage_contact` | 2 |
 | **Vault** | `do_vault_search`, `do_vault_get`, `do_vault_unlock` | 3 |
 | **Image** | `do_edit_image` | 1 |
 | | **Total** | **33** |
 > Low-level tools (filesystem, subprocess, web fetch, document parsing) live in `src/agent_tools/`, **not** in `tool_implementations.py` — out of scope for this split.
 ---
 ## 6. Risk Assessment & Candidate Slice Ranking
 > **Candidate proposals, not a committed plan.** The rankings, package shapes (e.g. `src/pkg/`, `src/domain/`, `src/infra/`, `src/api/`), split ordering, and route-grouping strategy below are **options for maintainer discussion**. Per #4082/#4071, slice ownership and order are settled by maintainers before any follow-up PR. §1–§3 above are the factual current-state inventory.
 ### 6.1 Risk Scale
 | Level | Criteria |
 |-------|----------|
 | **LOW** | File has ≤3 importers AND ≤500 lines, OR is a pure refactor with clear boundaries |
 | **MEDIUM** | File has 4–15 importers OR 500–1,500 lines |
 | **HIGH** | File has 16+ importers OR >2,000 lines, OR has cross-layer import violations |
 ### 6.2 Ranked Split Candidates
 | Priority | Target | Risk | Rationale |
 |----------|--------|------|-----------|
 | **1** | `src/tool_implementations.py` → `src/tools/*.py` | **MEDIUM** | 4,032 lines → ~10 files by tool category. Already has natural boundaries. 17 importers, tracked in #3629. Use `__init__.py` shim to keep existing imports working. |
 | **2** | `routes/` → domain subdirectories (one domain per PR) | **MEDIUM** | 54 flat files. Done **one domain at a time** (e.g. a standalone PR for the email domain, then chat, …), not a broad reorganization — route modules carry helper imports, registration assumptions, and test import paths. |
 | **3** | `src/agent_loop.py` → `src/agent/loop.py` + submodules | **MEDIUM-HIGH** | 2,961 lines, 24 functions. Can extract prompt building, classification, verification, and runaway detection. Tracked in #3266. |
 | **4** | `src/` → `src/pkg/`, `src/domain/`, `src/infra/`, `src/api/` | **MEDIUM** | Structural reorganization. Split flat `src/` into layered packages. Must come after routes and tools are stable. |
 | **5** | `routes/email_*.py` consolidation | **LOW** | Already grouped by filename prefix. Low-risk cleanup within the email domain. |
 | **6** | `core/database.py` → `src/infra/database/models/*.py` | **HIGH** | 28 classes, 102 importers. Highest-risk split. Must be **last** in any sequence. Requires careful import shim strategy. |
 | **7** | Frontend CSS modularization | **MEDIUM** | 36,653 lines. Tracked in #2617. Separate timeline from backend work. |
 | **8** | Frontend JS modularization | **MEDIUM** | 9,776 lines in `document.js`. Introduce ES modules at minimum. |
 ### 6.3 Candidate First 3 Behavior-Preserving Slices
 **Slice 1: Split `tool_implementations.py`** (Lowest-risk high-impact)
 - Create `src/tools/` package with one file per tool category
 - Add `src/tools/__init__.py` re-exporting all symbols with current names
 - Update 17 importers to use new paths (can be deferred via shim)
 - Validation: `python -m pytest tests/ -x -q` + manual smoke test of tool execution
 - Reference: #3629
 **Slice 2: Group `routes/` by domain** (one domain per PR, not a broad sweep)
 Route modules carry helper imports, router registration assumptions, and test import paths, so this must be done **one domain at a time** rather than as a single reorganization PR. Example sequence (each its own PR):
 - PR 2a: move the **email** domain (`email_routes.py`, `email_helpers.py`, `email_pollers.py`) → `routes/email/` + shim
 - PR 2b: move the **chat/agent** domain → `routes/chat/` + shim
 - PR 2c: move the **cookbook** domain → `routes/cookbook/` + shim
 - …and so on per domain from §4
 Each PR: add `__init__.py` re-exporting old names, update `app.py` router imports, validation `python app.py` starts clean. **No behavior change** — pure file reorganization.
 **Slice 3: Extract `agent_loop.py` submodules** (Improve reviewability)
 - Move prompt assembly → `src/agent/prompt.py`
 - Move request classification → `src/agent/classifier.py`
 - Move sub-agent verification → `src/agent/verifier.py`
 - Move runaway detection → `src/agent/runaway.py`
 - Move context management → `src/agent/context.py`
 - Keep `src/agent/loop.py` as the main orchestration module
 - Validation: `python -m pytest tests/test_agent_loop.py tests/test_loop_breaker_runaway.py -v`
 ---
 ## 7. Safety Guardrails for Follow-Up Work
 Per maintainer guidance in #4082 and #4071:
 - [ ] **One domain/slice per PR** — never mix multiple reorganizations
 - [ ] **No behavior changes** mixed with file moves — pure reorganization only
 - [ ] **Keep compatibility shims** — `__init__.py` re-exports for all existing import paths
 - [ ] **Add or identify focused tests** before risky splits
 - [ ] **Do not start with `core/database.py`** or broad route movement unless this inventory shows a safe boundary
 - [ ] **Prefer small, reviewable slices** over large restructures
 - [ ] **No packaging/runtime/tooling migration** mixed into file moves
 - [ ] **No frontend framework migration** inside this stabilization lane
 - [ ] **Validate with `python -m compileall`** — every PR must pass CI checks
 - [ ] **Validate with `pytest`** — run the full test suite before opening each PR
 ---
 ## 8. Validation Commands
 Each follow-up PR should be verifiable with these commands before submission:
 ```bash
 # Syntax check — must pass with zero errors
 python -m compileall src/ routes/ core/ conf/
 # Full test suite — must match baseline pass rate
 python -m pytest tests/ -x -q
 # Import shim verification — existing import paths must still work
 python -c "from src.tool_implementations import do_search_chats; print('OK')"
 # App startup smoke test (if backend touched)
 timeout 5 python app.py 2>&1 | head -5 || true
 ```
 ---
 ## 9. Open Questions
 1. Is `#2538` (specs ground truth) the canonical behavior map baseline, and should this inventory be kept in sync with those specs once merged?
 2. Should route grouping follow the domain map proposed here, or is there a different taxonomy preferred by maintainers?
 3. For the `tool_implementations.py` split (#3629), is the tool categorization in §5.2 acceptable, or should it follow a different grouping?
 4. Should compatibility shims (`__init__.py`) be temporary (removed in a follow-up wave) or permanent?
 5. Should an ADR (Architecture Decision Record) document be started to track decisions made during this process?
 ---
 ## 10. Future Direction (NOT current state)
 The following are **future refactor targets** (candidate directions **pending maintainer agreement**, not committed), recorded here so this inventory does not imply they exist today. None of them are present in the current `dev` tree:
 - `main.py` — proposed rename of the `app.py` entrypoint. Today the app boots via `app.py`.
 - `src/agent/` — proposed package to hold `agent_loop.py` submodules (prompt/classifier/verifier/runaway/context). Today `agent_loop.py` is a single flat file in `src/`.
 - `src/infra/`, `src/domain/`, `src/pkg/`, `src/api/` — proposed layered reorganization of the flat `src/` directory (slice 4 in §6).
 These become real only when the corresponding slices land.
 ---
 ## Appendix A: File Listing
 ### `src/` (95 files — 61 shown; run `ls src/*.py` for the full list)
 ```
 agent_loop.py          tool_implementations.py   tool_schemas.py
 tool_index.py          tool_security.py          tool_policy.py
 tool_utils.py          builtin_actions.py        task_scheduler.py
 llm_core.py            model_context.py          model_discovery.py
 session_search.py      context_budget.py         context_compactor.py
 ai_interaction.py      action_intents.py         agent_runs.py
 app_helpers.py         app_initializer.py        config.py
 database.py            memory.py                 memory_provider.py
 secret_storage.py      prompt_security.py        url_security.py
 url_safety.py          rate_limiter.py           cleanup_service.py
 readiness.py           service_health.py         exceptions.py
 request_models.py      assistant_log.py          bg_monitor.py
 builtin_mcp.py         chat_helpers.py           chroma_client.py
 document_processor.py  embedding_lanes.py        deep_research.py
 research_handler.py    research_utils.py         personal_docs.py
 rag_manager.py         rag_singleton.py          topic_analyzer.py
 visual_report.py       youtube_handler.py        pdf_forms.py
 pdf_form_doc.py        pdf_runtime.py            caldav_writeback.py
 email_thread_parser.py text_helpers.py           user_time.py
 teacher_escalation.py  cookbook_serve_lifecycle.py
 chatgpt_subscription.py  mcp_manager.py
 ```
 ### `routes/` (54 files)
 ```
 __init__.py    _validators.py
 auth_routes.py              api_token_routes.py       device_flow.py
 chat_routes.py              chat_helpers.py           shell_routes.py
 codex_routes.py             skills_routes.py
 email_routes.py             email_helpers.py          email_pollers.py
 cookbook_routes.py          cookbook_helpers.py       cookbook_output.py
 model_routes.py             assistant_routes.py       copilot_routes.py
 calendar_routes.py          contacts_routes.py
 document_routes.py          document_helpers.py
 gallery_routes.py           gallery_helpers.py
 task_routes.py              session_routes.py
 note_routes.py              memory_routes.py          research_routes.py
 mcp_routes.py               search_routes.py          history_routes.py
 webhook_routes.py           workspace_routes.py       upload_routes.py
 vault_routes.py             prefs_routes.py           preset_routes.py
 signature_routes.py         personal_routes.py        hwfit_routes.py
 backup_routes.py            cleanup_routes.py         diagnostics_routes.py
 embedding_routes.py         emoji_routes.py           font_routes.py
 stt_routes.py               tts_routes.py             compare_routes.py
 editor_draft_routes.py      chatgpt_subscription_routes.py    admin_wipe_routes.py
 ```
 ### `core/` (10 files)
 ```
 __init__.py    constants.py    database.py    models.py
 auth.py        middleware.py   session_manager.py   exceptions.py
 atomic_io.py   platform_compat.py
 ```
 ---
 ## Appendix B: Key Import Relationships
 ```
 core/database.py  ←── 102 importers (routes/*, src/*, core/*, tests/*)
    ↑
    ├── routes/auth_routes.py
    ├── routes/email_routes.py
    ├── src/builtin_actions.py
    ├── src/task_scheduler.py
    ├── src/tool_implementations.py (inline)
    └── ...97 more
 src/tool_implementations.py  ←── 17 importers
    ↑
    ├── src/agent_loop.py
    ├── src/builtin_actions.py
    ├── src/tool_index.py
    ├── src/task_scheduler.py
    ├── src/tool_policy.py
    └── ...12 more (mostly tests)
 src/agent_loop.py  ←── 22 importers
    ↑
    ├── src/tool_policy.py
    ├── src/teacher_escalation.py
    ├── src/bg_monitor.py
    ├── src/task_scheduler.py
    └── 18 more (incl. tests)
 ```
@@ -267,6 +267,10 @@ _DOMAIN_RULES = {
 - Use `resolve_contact` to look up a contact's email or phone number by name. Searches the CardDAV address book and sent email history.
 - Use `manage_contact` to list, add, update, or delete contacts in the address book.
 - Do NOT use `manage_memory` for contact lookups — contact details live in the address book, not memory.""",
    "integrations": """\
 ## Integration/API rules
 - To query or control a configured service integration (Home Assistant, Miniflux, Gitea, Linkding, Jellyfin, or any other registered service), use `api_call` with the integration name, HTTP method, path, and optional JSON body.
 - Do not use shell, curl, or `app_api` to reach a user's connected integration when `api_call` is available.""",
 }
 _DOMAIN_TOOL_MAP = {
@@ -277,9 +281,10 @@ _DOMAIN_TOOL_MAP = {
    "notes_calendar_tasks": {"manage_notes", "manage_calendar", "manage_tasks"},
    "ui": {"ui_control"},
    "sessions": {"create_session", "list_sessions", "manage_session", "send_to_session", "search_chats"},
-    "files": {"bash", "python", "read_file", "write_file", "edit_file", "grep", "glob", "ls", "get_workspace"},
+    "files": {"bash", "python", "read_file", "write_file", "edit_file", "grep", "glob", "ls", "get_workspace", "manage_bg_jobs"},
    "settings": {"manage_settings", "manage_endpoints", "manage_mcp", "manage_webhooks", "manage_tokens", "app_api"},
    "contacts": {"resolve_contact", "manage_contact"},
    "integrations": {"api_call"},
 }
 def _domain_rules_for_tools(tool_names: set) -> list[str]:
@@ -811,10 +816,25 @@ def _classify_agent_request(messages: List[Dict], last_user: str) -> Dict[str, o
        domains.add("sessions")
    if has(r"\b(file|folder|directory|repo|git|grep|find in files|read file|edit file|shell|terminal|bash|python)\b"):
        domains.add("files")
    # Managing detached bash jobs: "kill the background job", "stop the job",
    # "kill that job", "check the job output", "is the bg job done".
    if (has(r"\b(background|bg)\s+(jobs?|task)\b")
            or has(r"\b(kill|stop|cancel|terminate|check|tail|show|list)\b.{0,16}\bjobs?\b")
            or has(r"\bjobs?\b.{0,16}\b(output|status|done|finished|running)\b")):
        domains.add("files")
    if has(r"\b(endpoint|api token|mcp|webhook|preference|configure|config|setting)\b"):
        domains.add("settings")
    if has(r"\b(contact|contacts|phone|phone number|address book|vcard)\b"):
        domains.add("contacts")
    # API-integration intent — calling a configured service via the api_call
    # tool. Without this the #3794 repro ("Use the api_call tool to call Home
    # Assistant GET /api/states") matched no domain, classified as low-signal,
    # and the tool never reached the schema filter. Detect it explicitly so the
    # "integrations" domain seeds api_call deterministically (see
    # _DOMAIN_TOOL_MAP), independent of embedding retrieval.
    if has(r"\bapi[ _]call\b", r"\bintegrations?\b",
           r"\b(?:home ?assistant|miniflux|gitea|linkding|jellyfin)\b"):
        domains.add("integrations")
    low_signal = not continuation and not domains
    return {
@@ -843,8 +863,11 @@ def _recent_context_for_retrieval(messages: List[Dict], max_user: int = 3, max_c
        if isinstance(content, list):
            content = " ".join(b.get("text", "") for b in content if isinstance(b, dict))
        content = (content or "").strip()
-        # Skip injected tool-result envelopes — role=user but not human intent.
+        # Skip injected envelopes — role=user but not human intent. Tool results
-        if not content or content.startswith("[Tool execution results]"):
+        # are now wrapped via untrusted_context_message (metadata.trusted=False);
        # keep the legacy "[Tool execution results]" prefix for older histories.
        meta = msg.get("metadata") or {}
        if not content or meta.get("trusted") is False or content.startswith("[Tool execution results]"):
            continue
        collected.append(content)
        if len(collected) >= max_user:
@@ -1562,8 +1585,14 @@ def _append_tool_results(
        if round_reasoning:
            msg["reasoning_content"] = round_reasoning
        messages.append(msg)
        # Tool output (shell/python stdout, file reads, fetched pages, email
        # bodies, MCP results) is sourced from outside the server. Wrap it as
        # untrusted data so prompt-injection inside a tool result is treated as
        # data, not instructions — same hardening as skills (#788) and the
        # web/RAG context. THREAT_MODEL.md lists tool output as a surface that
        # must go through untrusted_context_message.
        messages.append(
-            {"role": "user", "content": f"[Tool execution results]\n\n{tool_output_text}"}
+            untrusted_context_message("tool execution results", tool_output_text)
        )
@@ -22,6 +22,9 @@ from .subprocess_tools import BashTool, PythonTool
 from .web_tools import WebSearchTool, WebFetchTool
 from .filesystem_tools import ReadFileTool, WriteFileTool, EditFileTool, LsTool, GlobTool, GrepTool, GetWorkspaceTool
 from .document_tools import CreateDocumentTool, UpdateDocumentTool, EditDocumentTool, SuggestDocumentTool, ManageDocumentTool
 from .model_interaction_tools import ChatWithModelTool, AskTeacherTool, ListModelsTool
 from .bg_job_tools import ManageBgJobsTool
 from .session_tools import CreateSessionTool, ListSessionsTool, SendToSessionTool, ManageSessionTool
 TOOL_HANDLERS = {
    "bash": BashTool().execute,
@@ -40,6 +43,14 @@ TOOL_HANDLERS = {
    "suggest_document": SuggestDocumentTool().execute,
    "manage_documents": ManageDocumentTool().execute,
    "get_workspace": GetWorkspaceTool().execute,
    "chat_with_model": ChatWithModelTool().execute,
    "ask_teacher": AskTeacherTool().execute,
    "list_models": ListModelsTool().execute,
    "manage_bg_jobs": ManageBgJobsTool().execute,
    "create_session": CreateSessionTool().execute,
    "list_sessions": ListSessionsTool().execute,
    "send_to_session": SendToSessionTool().execute,
    "manage_session": ManageSessionTool().execute,
 }
 # ---------------------------------------------------------------------------
@@ -52,7 +63,7 @@ PYTHON_TIMEOUT = 30
 # Tool types that trigger execution
 TOOL_TAGS = {"bash", "python", "web_search", "web_fetch", "read_file", "write_file", "edit_file",
-             "grep", "glob", "ls", "get_workspace",
+             "grep", "glob", "ls", "get_workspace", "manage_bg_jobs",
             "create_document", "update_document", "edit_document",
             "search_chats",
             "chat_with_model", "create_session", "list_sessions",
@@ -0,0 +1,98 @@
 """Agent tool to inspect and control detached background `bash` jobs.
 `bash` blocks prefixed with a `#!bg` marker run detached via `src.bg_jobs`; the
 agent is auto-re-invoked with the output when they finish. This tool covers the
 gaps in that flow: list the jobs in the current chat, read a still-running job's
 output on demand, and kill a runaway job instead of waiting out its max-runtime.
 Registry tool (`TOOL_HANDLERS["manage_bg_jobs"]`). Jobs are scoped to the chat
 that launched them, so every action requires the caller's `session_id` and a job
 from another session is treated as not found.
 """
 import json
 import time
 from typing import Any, Dict, List
 _LIST_ACTIONS = {"list", "ls", "jobs"}
 _OUTPUT_ACTIONS = {"output", "get", "read", "tail", "status", "show"}
 _KILL_ACTIONS = {"kill", "stop", "cancel", "terminate"}
 def _age(rec: Dict[str, Any]) -> str:
    start = rec.get("started_at")
    if not start:
        return "?"
    secs = int(time.time() - start)
    if secs < 60:
        return f"{secs}s"
    if secs < 3600:
        return f"{secs // 60}m"
    return f"{secs // 3600}h{(secs % 3600) // 60}m"
 def _status_label(rec: Dict[str, Any]) -> str:
    status = rec.get("status", "?")
    if rec.get("killed"):
        return "killed"
    if rec.get("timed_out"):
        return "timed out"
    if rec.get("died"):
        return "died"
    if status in ("done", "failed"):
        return f"{status} (exit {rec.get('exit_code')})"
    return status
 def _row(rec: Dict[str, Any]) -> str:
    cmd = (rec.get("command") or "").strip().splitlines()[0][:80]
    return f"[{rec.get('id')}] {_status_label(rec)} | {_age(rec)} | {cmd}"
 class ManageBgJobsTool:
    async def execute(self, content: str, ctx: dict) -> dict:
        from src import bg_jobs
        session_id = ctx.get("session_id")
        raw = (content or "").strip()
        try:
            args = json.loads(raw) if raw else {}
        except (ValueError, TypeError):
            args = {}
        if not isinstance(args, dict):
            args = {}
        action = str(args.get("action", "list")).strip().lower()
        job_id = str(args.get("job_id") or args.get("id") or "").strip()
        if not session_id:
            return {"error": "manage_bg_jobs: no active chat session; background jobs are scoped to a chat.", "exit_code": 1}
        if action in _LIST_ACTIONS:
            jobs: List[Dict[str, Any]] = bg_jobs.list_for_session(session_id)
            if not jobs:
                return {"output": "No background jobs in this chat.", "exit_code": 0}
            jobs.sort(key=lambda r: r.get("started_at") or 0, reverse=True)
            lines = "\n".join(_row(r) for r in jobs)
            return {"output": f"{len(jobs)} background job(s):\n{lines}", "exit_code": 0}
        if action in _OUTPUT_ACTIONS or action in _KILL_ACTIONS:
            if not job_id:
                return {"error": f"manage_bg_jobs: action '{action}' requires a job_id (see action='list').", "exit_code": 1}
            rec = bg_jobs.get(job_id)
            # Scope: only the chat that launched a job may see or control it.
            if rec is None or rec.get("session_id") != session_id:
                return {"error": f"manage_bg_jobs: no background job '{job_id}' in this chat.", "exit_code": 1}
            if action in _KILL_ACTIONS:
                if rec.get("status") != "running":
                    return {"output": f"Job `{job_id}` already {_status_label(rec)}; nothing to kill.", "exit_code": 0}
                killed = bg_jobs.kill(job_id)
                return {"output": f"Killed background job `{job_id}` ({(killed or {}).get('command', '').splitlines()[0][:80]}).", "exit_code": 0}
            out = rec.get("output") or "(no output yet)"
            return {
                "output": f"Job `{job_id}` [{_status_label(rec)}, {_age(rec)}]\nCommand: {rec.get('command')}\n\nOutput:\n{out}",
                "exit_code": 0,
            }
        return {"error": f"manage_bg_jobs: unknown action '{action}'. Use list, output, or kill.", "exit_code": 1}
@@ -1,6 +1,7 @@
 import asyncio
 import json
 import os
 import re
 import difflib
 import fnmatch
 import shutil
@@ -16,6 +17,31 @@ _CODENAV_SKIP_DIRS = frozenset({
 _CODENAV_MAX_HITS = 200
 _CODENAV_MAX_LINE = 400
 def _glob_to_regex(pat: str) -> "re.Pattern":
    """Translate a forward-slash glob (**, *, ?) into a compiled regex.
    `**/` matches zero or more complete directories.
    `*` matches within a single path segment (does not cross /).
    """
    i, n, out = 0, len(pat), []
    while i < n:
        if pat[i : i + 3] == "**/":
            out.append("(?:[^/]+/)*")
            i += 3
        elif pat[i : i + 2] == "**":
            out.append(".*")
            i += 2
        elif pat[i] == "*":
            out.append("[^/]*")
            i += 1
        elif pat[i] == "?":
            out.append("[^/]")
            i += 1
        else:
            out.append(re.escape(pat[i]))
            i += 1
    return re.compile("".join(out))
 def _unified_diff(old: str, new: str, path: str) -> Optional[Dict[str, Any]]:
    if old == new:
        return None
@@ -259,23 +285,38 @@ class GlobTool:
            return {"error": f"glob: {e}", "exit_code": 1}
        def _glob():
-            from pathlib import Path
+            base = os.path.abspath(root)
-            base = Path(root)
+            if not os.path.isdir(base):
            if not base.is_dir():
                return None, f"glob: {root}: not a directory"
            norm_pat = pattern.replace("\\", "/")
            # Fast path: literal pattern (no wildcards) → direct path lookup.
            if not any(c in norm_pat for c in "*?["):
                cand = os.path.normpath(os.path.join(base, norm_pat))
                if os.path.exists(cand):
                    return [cand], None
                # Literal not at exact path — fall through to walk so
                # e.g. "foo.py" still matches at any depth (like rglob).
            # Compile glob to regex: * stays within one segment, **/ spans dirs.
            regex = _glob_to_regex(norm_pat)
            matched = []
            cap = _CODENAV_MAX_HITS * 5
            try:
-                for p in base.rglob(pattern):
+                for dp, dns, fns in os.walk(base):
-                    if set(p.relative_to(base).parts) & _CODENAV_SKIP_DIRS:
+                    # Prune skipped dirs before descending (unlike rglob which
-                        continue
+                    # descends first then filters — fatal on large node_modules).
                    dns[:] = [d for d in dns if d not in _CODENAV_SKIP_DIRS]
                    for name in fns + dns:
                        full = os.path.join(dp, name)
                        rel = os.path.relpath(full, base).replace(os.sep, "/")
                        if regex.fullmatch(rel) or regex.fullmatch(name):
                            try:
-                        mtime = p.stat().st_mtime
+                                mtime = os.stat(full).st_mtime
                            except OSError:
                                mtime = 0
-                    matched.append((mtime, str(p)))
+                            matched.append((mtime, full))
-                    if len(matched) > _CODENAV_MAX_HITS * 5:
+                    if len(matched) > cap:
                        break
-            except (OSError, ValueError) as _e:
+            except OSError as _e:
                return None, f"glob: {_e}"
            matched.sort(key=lambda t: t[0], reverse=True)
            return [pth for _, pth in matched[:_CODENAV_MAX_HITS]], None
@@ -0,0 +1,208 @@
 """model_interaction_tools.py - agent tools for talking to other models.
 Owns the model-interaction tool implementations (chat_with_model, ask_teacher,
 list_models) and their handler classes, registered in ``TOOL_HANDLERS``. Part
 of the tool -> registry migration (#3629): the implementations were moved here
 out of ``src.ai_interaction`` so dispatch flows through the registry instead of
 the elif chain / dispatch_ai_tool in tool_execution.py.
 Shared helpers that still live in ``src.ai_interaction`` and are used by tools
 not yet migrated (``_resolve_model``, ``AI_CHAT_TIMEOUT``) are imported lazily
 inside the functions to avoid an import cycle at module load.
 """
 import logging
 from typing import Dict, Optional
 logger = logging.getLogger(__name__)
 _TEACHER_SYSTEM_PROMPT = (
    "You are a senior AI mentor. A less capable model is stuck on a problem and asking for help. "
    "Provide clear, actionable guidance:\n"
    "1. Brief analysis of the problem\n"
    "2. Recommended approach (step by step)\n"
    "3. Key things to watch out for\n\n"
    "Be concise and practical. No preamble."
 )
 async def chat_with_model(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
    """Send a message to a specific model and return its response.
    Content format:
      Line 1: model_name (or model_name@endpoint_name)
      Line 2+: the message to send
    """
    from src.ai_interaction import _resolve_model, AI_CHAT_TIMEOUT
    from src.llm_core import llm_call_async
    lines = content.strip().split("\n", 1)
    if not lines or not lines[0].strip():
        return {"error": "First line must be the model name"}
    model_spec = lines[0].strip()
    message = lines[1].strip() if len(lines) > 1 else ""
    if not message:
        return {"error": "No message provided (line 2+ is the message)"}
    try:
        url, model, headers = _resolve_model(model_spec, owner=owner)
    except ValueError as e:
        return {"error": str(e)}
    try:
        response = await llm_call_async(
            url, model,
            [{"role": "user", "content": message}],
            headers=headers,
            timeout=AI_CHAT_TIMEOUT,
        )
        # Truncate very long responses
        if len(response) > 10000:
            response = response[:10000] + "\n... (truncated)"
        return {"model": model, "response": response}
    except Exception as e:
        logger.error(f"chat_with_model failed: {e}")
        return {"error": f"Failed to get response from {model_spec}: {e}"}
 async def ask_teacher(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
    """Ask a more capable model for help.
    Content format:
      Line 1: model_name (or 'auto')
      Line 2+: the problem description
    """
    from src.ai_interaction import _resolve_model, AI_CHAT_TIMEOUT
    from src.llm_core import llm_call_async
    from src.settings import get_setting
    lines = content.strip().split("\n", 1)
    model_spec = lines[0].strip() if lines else "auto"
    problem = lines[1].strip() if len(lines) > 1 else ""
    if not problem:
        return {"error": "No problem description provided"}
    if model_spec.lower() in ("auto", ""):
        model_spec = get_setting("teacher_model", "")
        if not model_spec:
            return {"error": "No teacher model configured. Specify a model name or set teacher_model in settings."}
    try:
        url, model, headers = _resolve_model(model_spec, owner=owner)
    except ValueError as e:
        return {"error": str(e)}
    try:
        response = await llm_call_async(
            url, model,
            [
                {"role": "system", "content": _TEACHER_SYSTEM_PROMPT},
                {"role": "user", "content": f"Problem:\n{problem}"},
            ],
            headers=headers,
            timeout=AI_CHAT_TIMEOUT,
        )
        if len(response) > 8000:
            response = response[:8000] + "\n... (truncated)"
        return {"model": model, "response": response, "teacher": True}
    except Exception as e:
        logger.error(f"ask_teacher failed: {e}")
        return {"error": f"Teacher call failed ({model_spec}): {e}"}
 async def list_models(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
    """List all available models across configured endpoints.
    Content = optional filter keyword.
    """
    import json
    import httpx
    from src.database import SessionLocal, ModelEndpoint
    from src.llm_core import _detect_provider, ANTHROPIC_MODELS
    from src.auth_helpers import owner_filter
    from src.endpoint_resolver import resolve_endpoint_runtime, build_headers, build_models_url
    keyword = content.strip().lower() if content.strip() else None
    db = SessionLocal()
    try:
        query = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True)
        if owner:
            query = owner_filter(query, ModelEndpoint, owner)
        endpoints = query.all()
        if not endpoints:
            return {"results": "No enabled model endpoints configured."}
        result_lines = []
        total_models = 0
        for ep in endpoints:
            try:
                base, api_key = resolve_endpoint_runtime(ep, owner=owner)
            except Exception:
                continue
            provider = _detect_provider(base)
            headers = build_headers(api_key, base)
            model_ids = []
            if provider == "anthropic":
                model_ids = list(ANTHROPIC_MODELS)
            else:
                try:
                    models_url = build_models_url(base)
                    if models_url:
                        r = httpx.get(models_url, headers=headers, timeout=5)
                        r.raise_for_status()
                        data = r.json()
                        model_ids = [m.get("id") for m in (data.get("data") or []) if m.get("id")]
                        if not model_ids:
                            model_ids = [
                                m.get("name") or m.get("model")
                                for m in (data.get("models") or [])
                                if m.get("name") or m.get("model")
                            ]
                    else:
                        model_ids = json.loads(ep.cached_models or "[]")
                except Exception:
                    model_ids = ["(endpoint offline)"]
            if keyword:
                model_ids = [m for m in model_ids if keyword in m.lower() or keyword in (ep.name or "").lower()]
            if model_ids:
                result_lines.append(f"\n**{ep.name or base}** ({provider}):")
                for mid in model_ids:
                    result_lines.append(f"  - `{mid}`")
                    total_models += 1
        if not result_lines:
            return {"results": "No models found" + (f" matching '{keyword}'" if keyword else "") + "."}
        header = f"Available models ({total_models} total):"
        return {"results": header + "\n".join(result_lines)}
    except Exception as e:
        logger.error(f"list_models failed: {e}")
        return {"error": str(e)}
    finally:
        db.close()
 # ---------------------------------------------------------------------------
 # Handler classes registered in TOOL_HANDLERS
 # ---------------------------------------------------------------------------
 class ChatWithModelTool:
    async def execute(self, content: str, ctx: dict) -> Dict:
        return await chat_with_model(content, ctx.get("session_id"), owner=ctx.get("owner"))
 class AskTeacherTool:
    async def execute(self, content: str, ctx: dict) -> Dict:
        return await ask_teacher(content, ctx.get("session_id"), owner=ctx.get("owner"))
 class ListModelsTool:
    async def execute(self, content: str, ctx: dict) -> Dict:
        return await list_models(content, ctx.get("session_id"), owner=ctx.get("owner"))
@@ -0,0 +1,464 @@
 """session_tools.py - agent tools for AI-to-AI session management.
 Owns create_session, list_sessions, send_to_session and manage_session, moved
 out of src.ai_interaction as part of the tool -> registry migration (#3629), and
 their handler classes registered in TOOL_HANDLERS.
 The session manager is a runtime-set singleton in src.ai_interaction, so each
 function fetches it via get_session_manager() (imported here); _resolve_model and
 AI_CHAT_TIMEOUT are reused from there too.
 """
 import json
 import logging
 import uuid
 from typing import Dict, Optional
 from src.ai_interaction import get_session_manager, _resolve_model, AI_CHAT_TIMEOUT
 logger = logging.getLogger(__name__)
 async def create_session(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
    """Create a new chat session.
    Content format:
      Line 1: session name
      Line 2: model_name (or model_name@endpoint_name)
    """
    _session_manager = get_session_manager()
    if not _session_manager:
        return {"error": "Session manager not available"}
    lines = content.strip().split("\n")
    if len(lines) < 2:
        return {"error": "Need 2 lines: session name, then model spec"}
    name = lines[0].strip()
    model_spec = lines[1].strip()
    if not name:
        return {"error": "Session name cannot be empty"}
    try:
        url, model, headers = _resolve_model(model_spec, owner=owner)
    except ValueError as e:
        return {"error": str(e)}
    sid = str(uuid.uuid4())[:8]
    try:
        _session_manager.create_session(
            session_id=sid,
            name=name,
            endpoint_url=url,
            model=model,
            rag=False,
            owner=owner,
        )
        # Store headers on session for future calls
        sess = _session_manager.get_session(sid)
        if sess and headers:
            sess.headers = headers
        try:
            from src.event_bus import fire_event
            fire_event("session_created", owner)
        except Exception:
            logger.debug("session_created event dispatch failed", exc_info=True)
        return {"session_id": sid, "name": name, "model": model, "endpoint_url": url}
    except Exception as e:
        logger.error(f"create_session failed: {e}")
        return {"error": f"Failed to create session: {e}"}
 async def list_sessions(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
    """List sessions sorted by most-recently-active first.
    Output includes a relative "last active" timestamp per row so the
    agent can answer "open my last chat" without guessing from titles.
    The most-recent session is always first in the list.
    Content = optional filter keyword (matches session name).
    """
    _session_manager = get_session_manager()
    if not _session_manager:
        return {"error": "Session manager not available"}
    keyword = content.strip().lower() if content.strip() else None
    try:
        from core.database import SessionLocal, Session as DbSession
        from datetime import datetime, timezone
        # Pull every session's last_accessed from the DB so we can sort
        # by recency. In-memory sessions hold name + model + msg_count;
        # the DB row holds the timestamps.
        db = SessionLocal()
        try:
            db_rows = {r.id: r for r in db.query(DbSession).all()}
        finally:
            db.close()
        # SECURITY: scope to the caller's sessions. Passing None returned
        # every user's sessions, which the agent tool then exposed via the
        # "list my chats" reply.
        sessions = _session_manager.get_sessions_for_user(owner)
        rows = []
        for sid, sess in sessions.items():
            if keyword and keyword not in (sess.name or "").lower():
                continue
            db_row = db_rows.get(sid)
            # Prefer last_accessed; fall back to updated_at, then created_at.
            ts = None
            if db_row:
                ts = getattr(db_row, 'last_accessed', None) or getattr(db_row, 'updated_at', None) or getattr(db_row, 'created_at', None)
            rows.append((ts, sid, sess))
        # Sort by timestamp DESC; rows without a timestamp sink to the bottom.
        rows.sort(key=lambda r: r[0] or datetime.min, reverse=True)
        def _rel(ts):
            if not ts:
                return 'never'
            now = datetime.utcnow()
            try:
                if ts.tzinfo is not None:
                    now = datetime.now(timezone.utc)
                diff = (now - ts).total_seconds()
            except Exception:
                return 'unknown'
            if diff < 60: return 'just now'
            if diff < 3600: return f'{int(diff / 60)}m ago'
            if diff < 86400: return f'{int(diff / 3600)}h ago'
            if diff < 86400 * 7: return f'{int(diff / 86400)}d ago'
            return ts.strftime('%Y-%m-%d')
        lines = []
        for i, (ts, sid, sess) in enumerate(rows):
            if i >= 50:
                lines.append(f"... and {len(rows) - 50} more (showing first 50)")
                break
            safe_name = (sess.name or "Untitled").replace("[", "\\[").replace("]", "\\]")
            msg_count = getattr(sess, "message_count", 0) or 0
            model = getattr(sess, "model", "unknown")
            marker = " ← most recent" if i == 0 else ""
            lines.append(f"- **[{safe_name}](#session-{sid})** (id: `{sid}`, model: {model}, {msg_count} msgs, last active {_rel(ts)}){marker}")
        if not lines:
            return {"results": "No sessions found" + (f" matching '{keyword}'" if keyword else "") + "."}
        return {
            "results": (
                f"Found {len(rows)} session(s), sorted most-recent first:\n"
                + "\n".join(lines)
                + "\n\nAssistant: when replying to the user, preserve the chat-title markdown links exactly as shown, e.g. `[Chat](#session-id)`. Do not rewrite this as a plain, non-clickable table."
            )
        }
    except Exception as e:
        logger.error(f"list_sessions failed: {e}")
        return {"error": str(e)}
 async def send_to_session(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
    """Send a message to an existing session and get a response.
    Content format:
      Line 1: session_id
      Line 2+: message
    """
    _session_manager = get_session_manager()
    from src.llm_core import llm_call_async
    from core.models import ChatMessage
    if not _session_manager:
        return {"error": "Session manager not available"}
    lines = content.strip().split("\n", 1)
    if len(lines) < 2:
        return {"error": "Need 2 lines: session_id, then message"}
    target_sid = lines[0].strip()
    message = lines[1].strip()
    sess = _session_manager.get_session(target_sid)
    if not sess:
        return {"error": f"Session '{target_sid}' not found"}
    # Owner-scope: reject access to another user's session
    if owner and getattr(sess, "owner", None) and sess.owner != owner:
        return {"error": f"Session '{target_sid}' not found"}
    if not message:
        return {"error": "No message provided"}
    try:
        # Build context from session history
        context = sess.get_context_messages()
        context.append({"role": "user", "content": message})
        response = await llm_call_async(
            sess.endpoint_url, sess.model, context,
            headers=sess.headers,
            timeout=AI_CHAT_TIMEOUT,
        )
        # Save both messages to session
        sess.add_message(ChatMessage("user", message))
        sess.add_message(ChatMessage("assistant", response))
        # Truncate for tool output
        if len(response) > 10000:
            response = response[:10000] + "\n... (truncated)"
        return {
            "session_id": target_sid,
            "session_name": sess.name,
            "response": response,
        }
    except Exception as e:
        logger.error(f"send_to_session failed: {e}")
        return {"error": f"Failed to send to session: {e}"}
 async def manage_session(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
    """Manage sessions: rename, archive, delete, important, truncate, fork.
    Content format:
      Line 1: action (rename|archive|unarchive|delete|important|unimportant|truncate|fork)
      Line 2: target session_id (or "current" to use the active session)
      Line 3+: action-specific params (e.g. new name for rename, keep_count for truncate)
    """
    _session_manager = get_session_manager()
    if not _session_manager:
        return {"error": "Session manager not available"}
    from src.database import SessionLocal, Session as DbSession
    # Accept BOTH the structured JSON args the tool schema advertises
    # ({action, session_id, value}) AND the legacy line-based format
    # (line1=action, line2=session_id, line3=value). Native function-calling
    # models send JSON; fenced-block callers send lines. Previously only the
    # line format was parsed, so a model that followed the schema (JSON) got
    # "Need at least 2 lines" / "Rename needs line 3" and couldn't drive it.
    _raw = (content or "").strip()
    action = ""
    target_sid = ""
    value = None      # the action param: new name (rename) / keep_count (truncate, fork)
    _list_filter = ""
    _parsed = None
    if _raw.startswith("{"):
        try:
            _parsed = json.loads(_raw)
        except Exception:
            _parsed = None
    if isinstance(_parsed, dict):
        action = str(_parsed.get("action") or "").strip().lower()
        target_sid = str(_parsed.get("session_id") or _parsed.get("session") or _parsed.get("id") or "").strip()
        _v = _parsed.get("value")
        if _v is None:
            _v = (_parsed.get("name") or _parsed.get("new_name")
                  or _parsed.get("title") or _parsed.get("keep_count"))
        value = None if _v is None else str(_v).strip()
        _list_filter = str(_parsed.get("filter") or "").strip()
    else:
        lines = _raw.split("\n")
        if not lines or not lines[0].strip():
            return {"error": "Missing action (rename|archive|delete|important|truncate|fork|list|switch)"}
        action = lines[0].strip().lower()
        target_sid = lines[1].strip() if len(lines) >= 2 else ""
        value = lines[2].strip() if len(lines) >= 3 else None
        _list_filter = "\n".join(lines[1:]).strip()
    if not action:
        return {"error": "Missing action (rename|archive|delete|important|truncate|fork|list|switch)"}
    # `list` alias - dispatch to list_sessions so the agent's natural
    # first guess (every other manage_* tool has a `list` action) works.
    if action == "list":
        return await list_sessions(_list_filter, session_id, owner=owner)
    if not target_sid:
        return {"error": "Need a session_id (or 'current' for the active chat)"}
    # Allow "current" to refer to the active session
    if target_sid.lower() == "current" and session_id:
        target_sid = session_id
    # `switch` / `open` / `select` / `view` - the agent reaches for
    # these when the user asks to "open" or "switch to" a session.
    # There's no server-side way to make the browser navigate, so we
    # just return a clickable anchor link the user can click. The
    # frontend's chat-history click delegate routes `#session-<id>`
    # to selectSession(). The agent's reply naturally embeds this
    # result so the user sees a single clickable line.
    def _session_query(db):
        query = db.query(DbSession).filter(DbSession.id == target_sid)
        if owner is not None:
            query = query.filter(DbSession.owner == owner)
        return query
    if action in ("switch", "open", "select", "view"):
        db = SessionLocal()
        try:
            db_sess = _session_query(db).first()
            if not db_sess:
                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
            name = db_sess.name or target_sid
        finally:
            db.close()
        return {
            "action": action,
            "session_id": target_sid,
            "name": name,
            "results": f"[{name}](#session-{target_sid}) - click to open.",
        }
    db = SessionLocal()
    try:
        if action == "rename":
            if not value:
                return {"error": "rename needs a new name (the `value` arg, or line 3 in the legacy format)"}
            new_name = value
            db_sess = _session_query(db).first()
            if not db_sess:
                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
            db_sess.name = new_name
            db.commit()
            _session_manager.update_session_name(target_sid, new_name)
            return {"action": "rename", "session_id": target_sid, "name": new_name,
                    "results": f"Session renamed to '{new_name}'"}
        elif action == "archive":
            db_sess = _session_query(db).first()
            if not db_sess:
                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
            db_sess.archived = True
            db.commit()
            return {"action": "archive", "session_id": target_sid,
                    "results": f"Session '{db_sess.name}' archived"}
        elif action == "unarchive":
            db_sess = _session_query(db).first()
            if not db_sess:
                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
            db_sess.archived = False
            db.commit()
            return {"action": "unarchive", "session_id": target_sid,
                    "results": f"Session '{db_sess.name}' unarchived"}
        elif action == "delete":
            if target_sid == session_id:
                return {"error": "Cannot delete the current session while chatting in it. Delete other sessions first."}
            db_sess = _session_query(db).first()
            if not db_sess:
                return {"error": f"Session '{target_sid}' not found. Refusing to delete an unknown chat id; use the exact id from list_sessions."}
            if db_sess and db_sess.is_important:
                return {"error": f"Session '{db_sess.name}' is starred/favorited. Unstar it first before deleting."}
            try:
                ok = _session_manager.delete_session(target_sid)
                if not ok:
                    return {"error": f"Session '{target_sid}' was not deleted because it no longer exists."}
                return {"action": "delete", "session_id": target_sid,
                        "results": f"Session '{db_sess.name or target_sid}' deleted"}
            except Exception as e:
                return {"error": f"Failed to delete session: {e}"}
        elif action in ("important", "unimportant"):
            is_important = action == "important"
            db_sess = _session_query(db).first()
            if not db_sess:
                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
            # Prevent AI from unstarring sessions - only the user can do that manually
            if not is_important and db_sess.is_important:
                return {"error": f"Session '{db_sess.name}' is starred by the user. Only the user can unstar sessions manually."}
            db_sess.is_important = is_important
            db.commit()
            status = "marked as important" if is_important else "unmarked as important"
            return {"action": action, "session_id": target_sid,
                    "results": f"Session '{db_sess.name}' {status}"}
        elif action == "truncate":
            db_sess = _session_query(db).first()
            if not db_sess:
                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
            keep_count = 10
            if value:
                try:
                    keep_count = int(value)
                except ValueError:
                    pass
            success = _session_manager.truncate_messages(target_sid, keep_count)
            if success:
                return {"action": "truncate", "session_id": target_sid,
                        "results": f"Session truncated to last {keep_count} messages"}
            return {"error": f"Failed to truncate session '{target_sid}'"}
        elif action == "fork":
            db_sess = _session_query(db).first()
            if not db_sess:
                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
            keep_count = 0  # 0 = all messages
            if value:
                try:
                    keep_count = int(value)
                except ValueError:
                    pass
            source = _session_manager.get_session(target_sid)
            if not source:
                return {"error": f"Session '{target_sid}' not found"}
            new_sid = str(uuid.uuid4())[:8]
            _session_manager.create_session(
                session_id=new_sid,
                name=f"Fork: {source.name}",
                endpoint_url=source.endpoint_url,
                model=source.model,
                rag=False,
                owner=owner,
            )
            # Copy messages
            history = source.get_context_messages()
            if keep_count > 0:
                history = history[:keep_count]
            from core.models import ChatMessage as InMemoryMsg
            new_sess = _session_manager.get_session(new_sid)
            for msg in history:
                new_sess.add_message(InMemoryMsg(msg["role"], msg["content"]))
            try:
                from src.event_bus import fire_event
                fire_event("session_created", owner)
            except Exception:
                logger.debug("session_created event dispatch failed", exc_info=True)
            return {"action": "fork", "session_id": new_sid,
                    "source_session": target_sid, "messages_copied": len(history),
                    "results": f"Forked session '{source.name}' -> new session {new_sid} ({len(history)} messages)"}
        else:
            return {"error": f"Unknown action '{action}'. Use: list, switch, rename, archive, unarchive, delete, important, unimportant, truncate, fork"}
    except Exception as e:
        logger.error(f"manage_session failed: {e}")
        return {"error": str(e)}
    finally:
        db.close()
 # ---------------------------------------------------------------------------
 # Handler classes registered in TOOL_HANDLERS
 # ---------------------------------------------------------------------------
 class CreateSessionTool:
    async def execute(self, content: str, ctx: dict) -> Dict:
        return await create_session(content, ctx.get("session_id"), owner=ctx.get("owner"))
 class ListSessionsTool:
    async def execute(self, content: str, ctx: dict) -> Dict:
        return await list_sessions(content, ctx.get("session_id"), owner=ctx.get("owner"))
 class SendToSessionTool:
    async def execute(self, content: str, ctx: dict) -> Dict:
        return await send_to_session(content, ctx.get("session_id"), owner=ctx.get("owner"))
 class ManageSessionTool:
    async def execute(self, content: str, ctx: dict) -> Dict:
        return await manage_session(content, ctx.get("session_id"), owner=ctx.get("owner"))
@@ -1,8 +1,14 @@
 """
 ai_interaction.py
-AI-to-AI interaction tools: chat_with_model, create_session, list_sessions,
+AI-to-AI interaction tools: pipeline and manage_memory, plus shared model
-send_to_session, pipeline.
+resolution (_resolve_model), the session-manager singleton, and dispatch_ai_tool.
 As part of the tool -> registry migration (#3629), chat_with_model, ask_teacher
 and list_models moved to src/agent_tools/model_interaction_tools.py, and
 create_session, list_sessions, send_to_session and manage_session moved to
 src/agent_tools/session_tools.py. Those modules reuse get_session_manager /
 _resolve_model / AI_CHAT_TIMEOUT from here.
 These are agent tools — the LLM writes fenced code blocks and they execute
 through the standard agent_tools.py pipeline.
@@ -159,440 +165,6 @@ def _resolve_model(spec: str, owner: Optional[str] = None) -> Tuple[str, str, Di
 # Tool implementations
 # ---------------------------------------------------------------------------
 async def do_chat_with_model(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
    """Send a message to a specific model and return its response.
    Content format:
      Line 1: model_name (or model_name@endpoint_name)
      Line 2+: the message to send
    """
    from src.llm_core import llm_call_async
    lines = content.strip().split("\n", 1)
    if not lines or not lines[0].strip():
        return {"error": "First line must be the model name"}
    model_spec = lines[0].strip()
    message = lines[1].strip() if len(lines) > 1 else ""
    if not message:
        return {"error": "No message provided (line 2+ is the message)"}
    try:
        url, model, headers = _resolve_model(model_spec, owner=owner)
    except ValueError as e:
        return {"error": str(e)}
    try:
        response = await llm_call_async(
            url, model,
            [{"role": "user", "content": message}],
            headers=headers,
            timeout=AI_CHAT_TIMEOUT,
        )
        # Truncate very long responses
        if len(response) > 10000:
            response = response[:10000] + "\n... (truncated)"
        return {"model": model, "response": response}
    except Exception as e:
        logger.error(f"chat_with_model failed: {e}")
        return {"error": f"Failed to get response from {model_spec}: {e}"}
 _TEACHER_SYSTEM_PROMPT = (
    "You are a senior AI mentor. A less capable model is stuck on a problem and asking for help. "
    "Provide clear, actionable guidance:\n"
    "1. Brief analysis of the problem\n"
    "2. Recommended approach (step by step)\n"
    "3. Key things to watch out for\n\n"
    "Be concise and practical. No preamble."
 )
 async def do_ask_teacher(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
    """Ask a more capable model for help.
    Content format:
      Line 1: model_name (or 'auto')
      Line 2+: the problem description
    """
    from src.llm_core import llm_call_async
    from src.settings import get_setting
    lines = content.strip().split("\n", 1)
    model_spec = lines[0].strip() if lines else "auto"
    problem = lines[1].strip() if len(lines) > 1 else ""
    if not problem:
        return {"error": "No problem description provided"}
    if model_spec.lower() in ("auto", ""):
        model_spec = get_setting("teacher_model", "")
        if not model_spec:
            return {"error": "No teacher model configured. Specify a model name or set teacher_model in settings."}
    try:
        url, model, headers = _resolve_model(model_spec, owner=owner)
    except ValueError as e:
        return {"error": str(e)}
    try:
        response = await llm_call_async(
            url, model,
            [
                {"role": "system", "content": _TEACHER_SYSTEM_PROMPT},
                {"role": "user", "content": f"Problem:\n{problem}"},
            ],
            headers=headers,
            timeout=AI_CHAT_TIMEOUT,
        )
        if len(response) > 8000:
            response = response[:8000] + "\n... (truncated)"
        return {"model": model, "response": response, "teacher": True}
    except Exception as e:
        logger.error(f"ask_teacher failed: {e}")
        return {"error": f"Teacher call failed ({model_spec}): {e}"}
 async def do_second_opinion(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
    """Get a second opinion from another model, then have the original model
    evaluate the feedback and produce a unified version.
    Content format:
      Line 1: model_name (or model_name@endpoint_name)
      Line 2+ (optional): specific question or focus area
    Flow:
      1. Pull recent conversation context
      2. Send to reviewer model → get honest feedback
      3. Send feedback back to the session's own model → evaluate & unify
      4. Return both the review and the unified response
    """
    from src.llm_core import llm_call_async
    lines = content.strip().split("\n", 1)
    if not lines or not lines[0].strip():
        return {"error": "First line must be the model name"}
    model_spec = lines[0].strip()
    focus = lines[1].strip() if len(lines) > 1 else ""
    try:
        reviewer_url, reviewer_model, reviewer_headers = _resolve_model(model_spec, owner=owner)
    except ValueError as e:
        return {"error": str(e)}
    # Pull recent conversation context from current session
    context_text = ""
    sess = None
    if session_id and _session_manager:
        sess = _session_manager.get_session(session_id)
        if sess:
            messages = sess.get_context_messages()
            recent = messages[-15:] if len(messages) > 15 else messages
            parts = []
            for m in recent:
                role = m.get("role", "unknown").upper()
                text = m.get("content", "")
                if isinstance(text, list):
                    text = " ".join(
                        p.get("text", "") for p in text if isinstance(p, dict)
                    )
                if text:
                    parts.append(f"[{role}]: {text[:2000]}")
            context_text = "\n\n".join(parts)
    if not context_text:
        return {"error": "No conversation context found to review"}
    # ── Step 1: Get the reviewer's feedback ──
    reviewer_system = (
        "You are giving a second opinion on a conversation between a user and an AI assistant. "
        "Your job is to be genuinely helpful and honest — not a yes-man, but not a contrarian either.\n\n"
        "Guidelines:\n"
        "- If the plan/idea is solid, say so clearly. Don't manufacture problems that aren't there.\n"
        "- If you spot a real flaw, blind spot, or simpler approach — call it out directly.\n"
        "- Be practical. Don't over-engineer or over-analyze. Real-world tradeoffs matter.\n"
        "- If there's a meaningfully better way to do something, suggest it concretely.\n"
        "- Give credit where it's due — highlight what's working well.\n"
        "- Keep it concise and actionable. No fluff.\n"
        "- You're a second pair of eyes, not a professor grading a paper."
    )
    reviewer_message = f"Here's the conversation so far:\n\n{context_text}"
    if focus:
        reviewer_message += f"\n\n---\nSpecifically, I want your take on: {focus}"
    else:
        reviewer_message += "\n\n---\nGive me your honest second opinion on what's being discussed."
    try:
        review = await llm_call_async(
            reviewer_url, reviewer_model,
            [
                {"role": "system", "content": reviewer_system},
                {"role": "user", "content": reviewer_message},
            ],
            headers=reviewer_headers,
            timeout=AI_CHAT_TIMEOUT,
        )
        if len(review) > 8000:
            review = review[:8000] + "\n... (truncated)"
    except Exception as e:
        logger.error(f"second_opinion reviewer call failed: {e}")
        return {"error": f"Failed to get second opinion from {model_spec}: {e}"}
    # ── Step 2: Send review back to session's own model for evaluation ──
    unified = ""
    original_model = "unknown"
    if sess:
        original_url = sess.endpoint_url
        original_model = sess.model
        original_headers = getattr(sess, "headers", None) or {}
        unify_system = (
            "Another AI model just reviewed the conversation you've been having with the user. "
            "Read their feedback carefully, then respond with:\n\n"
            "1. **What you agree with** — acknowledge valid points honestly.\n"
            "2. **What you disagree with** — explain why, briefly.\n"
            "3. **Unified version** — produce an updated/refined version of whatever was being discussed, "
            "incorporating the feedback you found valid. Don't accept every note blindly — "
            "use your judgment on what actually improves things vs what's unnecessary.\n\n"
            "Be concise and practical. The user wants a better result, not a meta-discussion."
        )
        unify_message = (
            f"Here's the conversation context:\n\n{context_text}\n\n"
            f"---\n\n"
            f"**Review from {reviewer_model}:**\n\n{review}\n\n"
            f"---\n\n"
            f"Evaluate this feedback and produce a unified improved version."
        )
        try:
            unified = await llm_call_async(
                original_url, original_model,
                [
                    {"role": "system", "content": unify_system},
                    {"role": "user", "content": unify_message},
                ],
                headers=original_headers,
                timeout=AI_CHAT_TIMEOUT,
            )
            if len(unified) > 10000:
                unified = unified[:10000] + "\n... (truncated)"
        except Exception as e:
            logger.error(f"second_opinion unify call failed: {e}")
            unified = f"(Failed to get unified response: {e})"
    # Build combined result
    combined = (
        f"## Second Opinion from {reviewer_model}\n\n{review}"
        f"\n\n---\n\n"
        f"## {original_model}'s Response\n\n{unified}"
    )
    return {
        "model": reviewer_model,
        "response": combined,
        "instruction": "Present these results to the user exactly as they are. Do NOT call second_opinion again. The user can continue the conversation from here.",
    }
 async def do_create_session(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
    """Create a new chat session.
    Content format:
      Line 1: session name
      Line 2: model_name (or model_name@endpoint_name)
    """
    if not _session_manager:
        return {"error": "Session manager not available"}
    lines = content.strip().split("\n")
    if len(lines) < 2:
        return {"error": "Need 2 lines: session name, then model spec"}
    name = lines[0].strip()
    model_spec = lines[1].strip()
    if not name:
        return {"error": "Session name cannot be empty"}
    try:
        url, model, headers = _resolve_model(model_spec, owner=owner)
    except ValueError as e:
        return {"error": str(e)}
    sid = str(uuid.uuid4())[:8]
    try:
        _session_manager.create_session(
            session_id=sid,
            name=name,
            endpoint_url=url,
            model=model,
            rag=False,
            owner=owner,
        )
        # Store headers on session for future calls
        sess = _session_manager.get_session(sid)
        if sess and headers:
            sess.headers = headers
        try:
            from src.event_bus import fire_event
            fire_event("session_created", owner)
        except Exception:
            logger.debug("session_created event dispatch failed", exc_info=True)
        return {"session_id": sid, "name": name, "model": model, "endpoint_url": url}
    except Exception as e:
        logger.error(f"create_session failed: {e}")
        return {"error": f"Failed to create session: {e}"}
 async def do_list_sessions(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
    """List sessions sorted by most-recently-active first.
    Output includes a relative "last active" timestamp per row so the
    agent can answer "open my last chat" without guessing from titles.
    The most-recent session is always first in the list.
    Content = optional filter keyword (matches session name).
    """
    if not _session_manager:
        return {"error": "Session manager not available"}
    keyword = content.strip().lower() if content.strip() else None
    try:
        from core.database import SessionLocal, Session as DbSession
        from datetime import datetime, timezone
        # Pull every session's last_accessed from the DB so we can sort
        # by recency. In-memory sessions hold name + model + msg_count;
        # the DB row holds the timestamps.
        db = SessionLocal()
        try:
            db_rows = {r.id: r for r in db.query(DbSession).all()}
        finally:
            db.close()
        # SECURITY: scope to the caller's sessions. Passing None returned
        # every user's sessions, which the agent tool then exposed via the
        # "list my chats" reply.
        sessions = _session_manager.get_sessions_for_user(owner)
        rows = []
        for sid, sess in sessions.items():
            if keyword and keyword not in (sess.name or "").lower():
                continue
            db_row = db_rows.get(sid)
            # Prefer last_accessed; fall back to updated_at, then created_at.
            ts = None
            if db_row:
                ts = getattr(db_row, 'last_accessed', None) or getattr(db_row, 'updated_at', None) or getattr(db_row, 'created_at', None)
            rows.append((ts, sid, sess))
        # Sort by timestamp DESC; rows without a timestamp sink to the bottom.
        rows.sort(key=lambda r: r[0] or datetime.min, reverse=True)
        def _rel(ts):
            if not ts:
                return 'never'
            now = datetime.utcnow()
            try:
                if ts.tzinfo is not None:
                    now = datetime.now(timezone.utc)
                diff = (now - ts).total_seconds()
            except Exception:
                return 'unknown'
            if diff < 60: return 'just now'
            if diff < 3600: return f'{int(diff / 60)}m ago'
            if diff < 86400: return f'{int(diff / 3600)}h ago'
            if diff < 86400 * 7: return f'{int(diff / 86400)}d ago'
            return ts.strftime('%Y-%m-%d')
        lines = []
        for i, (ts, sid, sess) in enumerate(rows):
            if i >= 50:
                lines.append(f"... and {len(rows) - 50} more (showing first 50)")
                break
            safe_name = (sess.name or "Untitled").replace("[", "\\[").replace("]", "\\]")
            msg_count = getattr(sess, "message_count", 0) or 0
            model = getattr(sess, "model", "unknown")
            marker = " ← most recent" if i == 0 else ""
            lines.append(f"- **[{safe_name}](#session-{sid})** (id: `{sid}`, model: {model}, {msg_count} msgs, last active {_rel(ts)}){marker}")
        if not lines:
            return {"results": "No sessions found" + (f" matching '{keyword}'" if keyword else "") + "."}
        return {
            "results": (
                f"Found {len(rows)} session(s), sorted most-recent first:\n"
                + "\n".join(lines)
                + "\n\nAssistant: when replying to the user, preserve the chat-title markdown links exactly as shown, e.g. `[Chat](#session-id)`. Do not rewrite this as a plain, non-clickable table."
            )
        }
    except Exception as e:
        logger.error(f"list_sessions failed: {e}")
        return {"error": str(e)}
 async def do_send_to_session(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
    """Send a message to an existing session and get a response.
    Content format:
      Line 1: session_id
      Line 2+: message
    """
    from src.llm_core import llm_call_async
    from core.models import ChatMessage
    if not _session_manager:
        return {"error": "Session manager not available"}
    lines = content.strip().split("\n", 1)
    if len(lines) < 2:
        return {"error": "Need 2 lines: session_id, then message"}
    target_sid = lines[0].strip()
    message = lines[1].strip()
    sess = _session_manager.get_session(target_sid)
    if not sess:
        return {"error": f"Session '{target_sid}' not found"}
    # Owner-scope: reject access to another user's session
    if owner and getattr(sess, "owner", None) and sess.owner != owner:
        return {"error": f"Session '{target_sid}' not found"}
    if not message:
        return {"error": "No message provided"}
    try:
        # Build context from session history
        context = sess.get_context_messages()
        context.append({"role": "user", "content": message})
        response = await llm_call_async(
            sess.endpoint_url, sess.model, context,
            headers=sess.headers,
            timeout=AI_CHAT_TIMEOUT,
        )
        # Save both messages to session
        sess.add_message(ChatMessage("user", message))
        sess.add_message(ChatMessage("assistant", response))
        # Truncate for tool output
        if len(response) > 10000:
            response = response[:10000] + "\n... (truncated)"
        return {
            "session_id": target_sid,
            "session_name": sess.name,
            "response": response,
        }
    except Exception as e:
        logger.error(f"send_to_session failed: {e}")
        return {"error": f"Failed to send to session: {e}"}
 async def stream_ai_tool(tool: str, content: str, session_id: Optional[str] = None, owner: Optional[str] = None):
@@ -715,229 +287,6 @@ async def do_pipeline(content: str, session_id: Optional[str] = None, owner: Opt
 # Session management tool
 # ---------------------------------------------------------------------------
 async def do_manage_session(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
    """Manage sessions: rename, archive, delete, important, truncate, fork.
    Content format:
      Line 1: action (rename|archive|unarchive|delete|important|unimportant|truncate|fork)
      Line 2: target session_id (or "current" to use the active session)
      Line 3+: action-specific params (e.g. new name for rename, keep_count for truncate)
    """
    if not _session_manager:
        return {"error": "Session manager not available"}
    from src.database import SessionLocal, Session as DbSession
    # Accept BOTH the structured JSON args the tool schema advertises
    # ({action, session_id, value}) AND the legacy line-based format
    # (line1=action, line2=session_id, line3=value). Native function-calling
    # models send JSON; fenced-block callers send lines. Previously only the
    # line format was parsed, so a model that followed the schema (JSON) got
    # "Need at least 2 lines" / "Rename needs line 3" and couldn't drive it.
    _raw = (content or "").strip()
    action = ""
    target_sid = ""
    value = None      # the action param: new name (rename) / keep_count (truncate, fork)
    _list_filter = ""
    _parsed = None
    if _raw.startswith("{"):
        try:
            _parsed = json.loads(_raw)
        except Exception:
            _parsed = None
    if isinstance(_parsed, dict):
        action = str(_parsed.get("action") or "").strip().lower()
        target_sid = str(_parsed.get("session_id") or _parsed.get("session") or _parsed.get("id") or "").strip()
        _v = _parsed.get("value")
        if _v is None:
            _v = (_parsed.get("name") or _parsed.get("new_name")
                  or _parsed.get("title") or _parsed.get("keep_count"))
        value = None if _v is None else str(_v).strip()
        _list_filter = str(_parsed.get("filter") or "").strip()
    else:
        lines = _raw.split("\n")
        if not lines or not lines[0].strip():
            return {"error": "Missing action (rename|archive|delete|important|truncate|fork|list|switch)"}
        action = lines[0].strip().lower()
        target_sid = lines[1].strip() if len(lines) >= 2 else ""
        value = lines[2].strip() if len(lines) >= 3 else None
        _list_filter = "\n".join(lines[1:]).strip()
    if not action:
        return {"error": "Missing action (rename|archive|delete|important|truncate|fork|list|switch)"}
    # `list` alias — dispatch to do_list_sessions so the agent's natural
    # first guess (every other manage_* tool has a `list` action) works.
    if action == "list":
        return await do_list_sessions(_list_filter, session_id, owner=owner)
    if not target_sid:
        return {"error": "Need a session_id (or 'current' for the active chat)"}
    # Allow "current" to refer to the active session
    if target_sid.lower() == "current" and session_id:
        target_sid = session_id
    # `switch` / `open` / `select` / `view` — the agent reaches for
    # these when the user asks to "open" or "switch to" a session.
    # There's no server-side way to make the browser navigate, so we
    # just return a clickable anchor link the user can click. The
    # frontend's chat-history click delegate routes `#session-<id>`
    # to selectSession(). The agent's reply naturally embeds this
    # result so the user sees a single clickable line.
    def _session_query(db):
        query = db.query(DbSession).filter(DbSession.id == target_sid)
        if owner is not None:
            query = query.filter(DbSession.owner == owner)
        return query
    if action in ("switch", "open", "select", "view"):
        db = SessionLocal()
        try:
            db_sess = _session_query(db).first()
            if not db_sess:
                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
            name = db_sess.name or target_sid
        finally:
            db.close()
        return {
            "action": action,
            "session_id": target_sid,
            "name": name,
            "results": f"[{name}](#session-{target_sid}) — click to open.",
        }
    db = SessionLocal()
    try:
        if action == "rename":
            if not value:
                return {"error": "rename needs a new name (the `value` arg, or line 3 in the legacy format)"}
            new_name = value
            db_sess = _session_query(db).first()
            if not db_sess:
                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
            db_sess.name = new_name
            db.commit()
            _session_manager.update_session_name(target_sid, new_name)
            return {"action": "rename", "session_id": target_sid, "name": new_name,
                    "results": f"Session renamed to '{new_name}'"}
        elif action == "archive":
            db_sess = _session_query(db).first()
            if not db_sess:
                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
            db_sess.archived = True
            db.commit()
            return {"action": "archive", "session_id": target_sid,
                    "results": f"Session '{db_sess.name}' archived"}
        elif action == "unarchive":
            db_sess = _session_query(db).first()
            if not db_sess:
                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
            db_sess.archived = False
            db.commit()
            return {"action": "unarchive", "session_id": target_sid,
                    "results": f"Session '{db_sess.name}' unarchived"}
        elif action == "delete":
            if target_sid == session_id:
                return {"error": "Cannot delete the current session while chatting in it. Delete other sessions first."}
            db_sess = _session_query(db).first()
            if not db_sess:
                return {"error": f"Session '{target_sid}' not found. Refusing to delete an unknown chat id; use the exact id from list_sessions."}
            if db_sess and db_sess.is_important:
                return {"error": f"Session '{db_sess.name}' is starred/favorited. Unstar it first before deleting."}
            try:
                ok = _session_manager.delete_session(target_sid)
                if not ok:
                    return {"error": f"Session '{target_sid}' was not deleted because it no longer exists."}
                return {"action": "delete", "session_id": target_sid,
                        "results": f"Session '{db_sess.name or target_sid}' deleted"}
            except Exception as e:
                return {"error": f"Failed to delete session: {e}"}
        elif action in ("important", "unimportant"):
            is_important = action == "important"
            db_sess = _session_query(db).first()
            if not db_sess:
                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
            # Prevent AI from unstarring sessions — only the user can do that manually
            if not is_important and db_sess.is_important:
                return {"error": f"Session '{db_sess.name}' is starred by the user. Only the user can unstar sessions manually."}
            db_sess.is_important = is_important
            db.commit()
            status = "marked as important" if is_important else "unmarked as important"
            return {"action": action, "session_id": target_sid,
                    "results": f"Session '{db_sess.name}' {status}"}
        elif action == "truncate":
            db_sess = _session_query(db).first()
            if not db_sess:
                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
            keep_count = 10
            if value:
                try:
                    keep_count = int(value)
                except ValueError:
                    pass
            success = _session_manager.truncate_messages(target_sid, keep_count)
            if success:
                return {"action": "truncate", "session_id": target_sid,
                        "results": f"Session truncated to last {keep_count} messages"}
            return {"error": f"Failed to truncate session '{target_sid}'"}
        elif action == "fork":
            db_sess = _session_query(db).first()
            if not db_sess:
                return {"error": f"Session '{target_sid}' not found. Use list_sessions and pass the exact id it returned."}
            keep_count = 0  # 0 = all messages
            if value:
                try:
                    keep_count = int(value)
                except ValueError:
                    pass
            source = _session_manager.get_session(target_sid)
            if not source:
                return {"error": f"Session '{target_sid}' not found"}
            new_sid = str(uuid.uuid4())[:8]
            _session_manager.create_session(
                session_id=new_sid,
                name=f"Fork: {source.name}",
                endpoint_url=source.endpoint_url,
                model=source.model,
                rag=False,
                owner=owner,
            )
            # Copy messages
            history = source.get_context_messages()
            if keep_count > 0:
                history = history[:keep_count]
            from core.models import ChatMessage as InMemoryMsg
            new_sess = _session_manager.get_session(new_sid)
            for msg in history:
                new_sess.add_message(InMemoryMsg(msg["role"], msg["content"]))
            try:
                from src.event_bus import fire_event
                fire_event("session_created", owner)
            except Exception:
                logger.debug("session_created event dispatch failed", exc_info=True)
            return {"action": "fork", "session_id": new_sid,
                    "source_session": target_sid, "messages_copied": len(history),
                    "results": f"Forked session '{source.name}' -> new session {new_sid} ({len(history)} messages)"}
        else:
            return {"error": f"Unknown action '{action}'. Use: list, switch, rename, archive, unarchive, delete, important, unimportant, truncate, fork"}
    except Exception as e:
        logger.error(f"manage_session failed: {e}")
        return {"error": str(e)}
    finally:
        db.close()
 # ---------------------------------------------------------------------------
 # Memory management tool
 # ---------------------------------------------------------------------------
@@ -1104,83 +453,6 @@ async def do_manage_memory(content: str, session_id: Optional[str] = None, owner
        return {"error": f"Unknown action '{action}'. Use: list, add, edit, delete, search"}
 # ---------------------------------------------------------------------------
 # List models tool
 # ---------------------------------------------------------------------------
 async def do_list_models(content: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict:
    """List all available models across configured endpoints.
    Content = optional filter keyword.
    """
    import httpx
    from src.database import SessionLocal, ModelEndpoint
    from src.llm_core import _detect_provider, ANTHROPIC_MODELS
    from src.auth_helpers import owner_filter
    keyword = content.strip().lower() if content.strip() else None
    db = SessionLocal()
    try:
        query = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True)
        if owner:
            query = owner_filter(query, ModelEndpoint, owner)
        endpoints = query.all()
        if not endpoints:
            return {"results": "No enabled model endpoints configured."}
        result_lines = []
        total_models = 0
        for ep in endpoints:
            try:
                base, api_key = resolve_endpoint_runtime(ep, owner=owner)
            except Exception:
                continue
            provider = _detect_provider(base)
            headers = build_headers(api_key, base)
            model_ids = []
            if provider == "anthropic":
                model_ids = list(ANTHROPIC_MODELS)
            else:
                try:
                    models_url = build_models_url(base)
                    if models_url:
                        r = httpx.get(models_url, headers=headers, timeout=5)
                        r.raise_for_status()
                        data = r.json()
                        model_ids = [m.get("id") for m in (data.get("data") or []) if m.get("id")]
                        if not model_ids:
                            model_ids = [
                                m.get("name") or m.get("model")
                                for m in (data.get("models") or [])
                                if m.get("name") or m.get("model")
                            ]
                    else:
                        model_ids = json.loads(ep.cached_models or "[]")
                except Exception:
                    model_ids = ["(endpoint offline)"]
            if keyword:
                model_ids = [m for m in model_ids if keyword in m.lower() or keyword in (ep.name or "").lower()]
            if model_ids:
                result_lines.append(f"\n**{ep.name or base}** ({provider}):")
                for mid in model_ids:
                    result_lines.append(f"  - `{mid}`")
                    total_models += 1
        if not result_lines:
            return {"results": "No models found" + (f" matching '{keyword}'" if keyword else "") + "."}
        header = f"Available models ({total_models} total):"
        return {"results": header + "\n".join(result_lines)}
    except Exception as e:
        logger.error(f"list_models failed: {e}")
        return {"error": str(e)}
    finally:
        db.close()
 # ---------------------------------------------------------------------------
@@ -1831,55 +1103,20 @@ async def dispatch_ai_tool(
 ) -> Tuple[str, Dict]:
    """Dispatch an AI interaction tool. Returns (description, result_dict)."""
-    if tool == "chat_with_model":
+    if tool == "pipeline":
        model_spec = content.split("\n")[0].strip()[:60]
        desc = f"chat_with_model: {model_spec}"
        result = await do_chat_with_model(content, session_id, owner=owner)
    elif tool == "create_session":
        name = content.split("\n")[0].strip()[:60]
        desc = f"create_session: {name}"
        result = await do_create_session(content, session_id, owner=owner)
    elif tool == "list_sessions":
        keyword = content.strip()[:40]
        desc = f"list_sessions{': ' + keyword if keyword else ''}"
        result = await do_list_sessions(content, session_id, owner=owner)
    elif tool == "send_to_session":
        sid = content.split("\n")[0].strip()[:20]
        desc = f"send_to_session: {sid}"
        result = await do_send_to_session(content, session_id, owner=owner)
    elif tool == "pipeline":
        desc = "pipeline: running steps"
        result = await do_pipeline(content, session_id, owner=owner)
    elif tool == "manage_session":
        action = content.split("\n")[0].strip()[:40]
        desc = f"manage_session: {action}"
        result = await do_manage_session(content, session_id, owner=owner)
    elif tool == "manage_memory":
        action = content.split("\n")[0].strip()[:40]
        desc = f"manage_memory: {action}"
        result = await do_manage_memory(content, session_id, owner=owner)
    elif tool == "list_models":
        keyword = content.strip()[:40]
        desc = f"list_models{': ' + keyword if keyword else ''}"
        result = await do_list_models(content, session_id, owner=owner)
    elif tool == "ui_control":
        action = content.split("\n")[0].strip()[:60]
        desc = f"ui_control: {action}"
        result = await do_ui_control(content, session_id, owner=owner)
    elif tool == "ask_teacher":
        problem = content.split("\n", 1)[-1].strip()[:60]
        desc = f"ask_teacher: {problem}"
        result = await do_ask_teacher(content, session_id, owner=owner)
    else:
        desc = f"unknown ai tool: {tool}"
        result = {"error": f"Unknown AI interaction tool: {tool}"}
@@ -263,10 +263,32 @@ def list_for_session(session_id: str) -> List[Dict[str, Any]]:
    return [r for r in refresh().values() if r.get("session_id") == session_id]
 def kill(job_id: str) -> Optional[Dict[str, Any]]:
    """Terminate a running job's process tree and mark it killed. Returns the
    updated record, or None if the id is unknown. Idempotent: a job that already
    finished is returned unchanged. Sets followed_up so the monitor does not also
    fire an auto-continue for a job the agent deliberately stopped."""
    jobs = _load()
    rec = jobs.get(job_id)
    if rec is None:
        return None
    if rec.get("status") == "running":
        _kill(rec.get("pid"))
        rec["status"] = "failed"
        rec["exit_code"] = -1
        rec["ended_at"] = time.time()
        rec["killed"] = True
        rec["followed_up"] = True
        _save(jobs)
    return rec
 def result_text(rec: Dict[str, Any]) -> str:
    """Human/agent-readable summary of a finished job, for the follow-up."""
    out = _read_output(rec)
-    if rec.get("timed_out"):
+    if rec.get("killed"):
        head = "Background job was killed."
    elif rec.get("timed_out"):
        head = f"Background job timed out after {rec.get('max_runtime_s')}s."
    elif rec.get("died"):
        head = "Background job process died unexpectedly (no exit code)."
@@ -102,6 +102,9 @@ SEARXNG_INSTANCE = os.getenv("SEARXNG_INSTANCE", "http://localhost:8080")
 CLEANUP_ENABLED = os.getenv("CLEANUP_ENABLED", "True").lower() == "true"
 CLEANUP_INTERVAL_HOURS = int(os.getenv("CLEANUP_INTERVAL_HOURS", "24"))
 # Auth policy
 PASSWORD_MIN_LENGTH = 8
 # Default parameters
 DEFAULT_TEMPERATURE = 1.0
 DEFAULT_MAX_TOKENS = 0
@@ -161,6 +161,32 @@ def normalize_base(url: str) -> str:
    return url
 def _validated_endpoint_base(url: str) -> str:
    """Return a base URL that is safe for endpoint path appends."""
    base = (url or "").strip().rstrip("/")
    if "?" in base or "#" in base:
        raise ValueError("Endpoint base URL must not include query or fragment")
    return urlunparse(urlparse(base)._replace(query="", fragment="")).rstrip("/")
 def _prepare_endpoint_base(base: str) -> str:
    base = _validated_endpoint_base(normalize_base(base))
    return _validated_endpoint_base(normalize_base(resolve_url(base)))
 def _append_endpoint_path(base: str, suffix: str) -> str:
    parsed = urlparse(base)
    current = (parsed.path or "").rstrip("/")
    extra = "/" + suffix.lstrip("/")
    path = f"{current}{extra}" if current else extra
    return urlunparse(parsed._replace(path=path, query="", fragment=""))
 def _pathless_host(base: str, host: str) -> bool:
    parsed = urlparse(base)
    return (parsed.hostname or "").lower() == host and not (parsed.path or "").strip("/")
 def _anthropic_api_root(base: str) -> str:
    """Return Anthropic's API root, preserving /v1 for OpenAI-compatible APIs elsewhere."""
    base = (base or "").strip().rstrip("/")
@@ -171,15 +197,17 @@ def _anthropic_api_root(base: str) -> str:
 def build_chat_url(base: str) -> str:
    """Return the correct chat endpoint URL for a given base."""
-    base = resolve_url(base)
+    base = _prepare_endpoint_base(base)
    provider = _detect_provider(base)
    if provider == "anthropic":
-        return _anthropic_api_root(base) + "/v1/messages"
+        return _append_endpoint_path(_anthropic_api_root(base), "/v1/messages")
    if provider == "ollama":
-        return _ollama_api_root(base) + "/chat"
+        return _append_endpoint_path(_ollama_api_root(base), "/chat")
    if provider == "chatgpt-subscription":
-        return base.rstrip("/") + "/responses"
+        return _append_endpoint_path(base, "/responses")
-    return base + "/chat/completions"
+    if _pathless_host(base, "api.openai.com"):
        base = _append_endpoint_path(base, "/v1")
    return _append_endpoint_path(base, "/chat/completions")
 def build_models_url(base: str) -> Optional[str]:
@@ -193,12 +221,12 @@ def build_models_url(base: str) -> Optional[str]:
    untouched (so custom prefixes like ``/openai`` or ``/api/openai/v1`` keep
    their semantics).
    """
-    base = normalize_base(resolve_url(base))
+    base = _prepare_endpoint_base(base)
    provider = _detect_provider(base)
    if provider == "anthropic":
-        return _anthropic_api_root(base) + "/v1/models"
+        return _append_endpoint_path(_anthropic_api_root(base), "/v1/models")
    if provider == "ollama":
-        return _ollama_api_root(base) + "/tags"
+        return _append_endpoint_path(_ollama_api_root(base), "/tags")
    if provider == "chatgpt-subscription":
        return None
    # Generic OpenAI-compatible fallback: local model servers with no explicit
@@ -208,10 +236,10 @@ def build_models_url(base: str) -> Optional[str]:
    parsed = urlparse(base)
    host = (parsed.hostname or "").lower()
    is_local = host in {"localhost", "127.0.0.1", "::1", "host.docker.internal"}
-    uses_v1_models_by_default = is_local or host in {"api.deepseek.com"}
+    uses_v1_models_by_default = is_local or host in {"api.deepseek.com", "api.openai.com"}
    if not parsed.path and uses_v1_models_by_default:
-        base = base + "/v1"
+        base = _append_endpoint_path(base, "/v1")
-    return base + "/models"
+    return _append_endpoint_path(base, "/models")
 def build_headers(api_key: Optional[str], base: str) -> Dict[str, str]:
@@ -4,6 +4,7 @@ import uuid
 import logging
 import re
 from typing import Dict, List, Optional, Any
 from urllib.parse import urljoin, urlparse, urlunparse
 import httpx
 from fastapi import HTTPException
@@ -202,6 +203,22 @@ def mask_integration_secret(integration: Dict[str, Any]) -> Dict[str, Any]:
    return safe
 def _normalize_integration_base_url(base_url: Any) -> str:
    if not isinstance(base_url, str) or not base_url.strip():
        raise ValueError("Integration base URL is required")
    cleaned = base_url.strip().rstrip("/")
    if "?" in cleaned or "#" in cleaned:
        raise ValueError("Integration base URL must not include query or fragment")
    parsed = urlparse(cleaned)
    if parsed.scheme.lower() not in ("http", "https") or not parsed.hostname:
        raise ValueError("Integration base URL must be an HTTP(S) URL")
    return urlunparse(parsed._replace(scheme=parsed.scheme.lower(), query="", fragment="")).rstrip("/")
 def _join_integration_url(base_url: str, path: str) -> str:
    return urljoin(base_url.rstrip("/") + "/", path.lstrip("/"))
 def load_integrations() -> List[Dict[str, Any]]:
    """Load all integrations from disk with secrets decrypted for runtime use."""
    if not os.path.exists(DATA_FILE):
@@ -261,8 +278,10 @@ def add_integration(data: Dict[str, Any]) -> Dict[str, Any]:
    if not isinstance(integration.get("name"), str) or not integration["name"].strip():
        raise HTTPException(400, "Integration name is required")
-    if not isinstance(integration.get("base_url"), str) or not integration["base_url"].strip():
+    try:
-        raise HTTPException(400, "Integration base URL is required")
+        integration["base_url"] = _normalize_integration_base_url(integration.get("base_url"))
    except ValueError as exc:
        raise HTTPException(400, str(exc)) from exc
    integrations = load_integrations()
    integrations.append(integration)
@@ -272,10 +291,14 @@ def add_integration(data: Dict[str, Any]) -> Dict[str, Any]:
 def update_integration(integration_id: str, data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
    """Update fields on an existing integration. Returns updated integration or None."""
    data = dict(data)
    if "name" in data and (not isinstance(data["name"], str) or not data["name"].strip()):
        raise HTTPException(400, "Integration name is required")
-    if "base_url" in data and (not isinstance(data["base_url"], str) or not data["base_url"].strip()):
+    if "base_url" in data:
-        raise HTTPException(400, "Integration base URL is required")
+        try:
            data["base_url"] = _normalize_integration_base_url(data["base_url"])
        except ValueError as exc:
            raise HTTPException(400, str(exc)) from exc
    integrations = load_integrations()
    for item in integrations:
@@ -341,9 +364,10 @@ async def execute_api_call(
    if not integration.get("enabled", True):
        return {"error": f"Integration '{integration.get('name')}' is disabled", "exit_code": 1}
-    base_url = integration.get("base_url", "").rstrip("/")
+    try:
-    if not base_url:
+        base_url = _normalize_integration_base_url(integration.get("base_url", ""))
-        return {"error": "Integration has no base_url configured", "exit_code": 1}
+    except ValueError as exc:
        return {"error": str(exc), "exit_code": 1}
    # Strip common API path suffixes users might accidentally include
    # (e.g. "http://host/v1/" → "http://host"). The integration's preset
@@ -366,7 +390,10 @@ async def execute_api_call(
    if re.search(r"^https?://", path) or "://" in path:
        return {"error": "Path must not contain a protocol scheme", "exit_code": 1}
-    url = base_url + path
+    if "#" in path:
        return {"error": "Path must not contain a fragment", "exit_code": 1}
    url = _join_integration_url(base_url, path)
    method = method.upper()
    # Build headers
@@ -87,7 +87,7 @@ _host_health_lock = threading.Lock()
 _model_activity: Dict[str, float] = {}
 _HARMONY_MARKER_RE = re.compile(
-    r"<\|channel\|>(analysis|final)"
+    r"<\|channel\|>(analysis|commentary|final)"
    r"|<\|start\|>(?:assistant|system|user|tool)?"
    r"|<\|message\|>"
    r"|<\|end\|>"
@@ -96,6 +96,7 @@ _HARMONY_MARKER_RE = re.compile(
 )
 _HARMONY_MARKERS = (
    "<|channel|>analysis",
    "<|channel|>commentary",
    "<|channel|>final",
    "<|start|>assistant",
    "<|start|>system",
@@ -145,7 +146,10 @@ class _HarmonyStreamRouter:
            out.append((text, False))
            return
        if self._in_message:
-            out.append((text, self._channel == "analysis"))
+            # analysis + commentary (tool-call preambles / function-arg bodies)
            # are internal, not user-facing — route them to thinking so they
            # don't leak into the visible answer; only `final` is visible.
            out.append((text, self._channel in ("analysis", "commentary")))
    def _handle_marker(self, match: re.Match[str]) -> None:
        marker = match.group(0)
@@ -17,10 +17,11 @@ import httpx
 logger = logging.getLogger(__name__)
 _LOCAL_HOSTS = {"localhost", "127.0.0.1", "0.0.0.0", "::1", "host.docker.internal"}
-_PRIVATE_PREFIXES = ("10.", "172.16.", "172.17.", "172.18.", "172.19.",
+_PRIVATE_NETWORKS = (
-                     "172.20.", "172.21.", "172.22.", "172.23.", "172.24.",
+    ipaddress.ip_network("10.0.0.0/8"),
-                     "172.25.", "172.26.", "172.27.", "172.28.", "172.29.",
+    ipaddress.ip_network("172.16.0.0/12"),
-                     "172.30.", "172.31.", "192.168.")
+    ipaddress.ip_network("192.168.0.0/16"),
 )
 # Tailscale uses the CGNAT range 100.64.0.0/10, NOT all of 100.0.0.0/8.
 # A bare "100." prefix would classify public addresses (e.g. AWS ranges
@@ -36,6 +37,14 @@ def _in_tailscale_range(host: str) -> bool:
        return False
 def _is_private_ip_literal(host: str) -> bool:
    try:
        ip = ipaddress.ip_address(host)
    except ValueError:
        return False
    return any(ip in network for network in _PRIVATE_NETWORKS)
 def _normalize_base_for_compare(url: str) -> str:
    url = (url or "").strip().rstrip("/")
    for suffix in ("/chat/completions", "/models", "/completions", "/v1/messages"):
@@ -87,7 +96,7 @@ def is_local_endpoint(url: str) -> bool:
        return True
    try:
        host = urlparse(url).hostname or ""
-        return host in _LOCAL_HOSTS or host.startswith(_PRIVATE_PREFIXES) or _in_tailscale_range(host)
+        return host in _LOCAL_HOSTS or _is_private_ip_literal(host) or _in_tailscale_range(host)
    except Exception:
        return False
@@ -322,6 +322,47 @@ class PersonalDocsManager:
        else:
            logger.info(f"Directory not in index: {directory}")
    def rename_directory(self, old_directory: str, new_directory: str, *, path_map: Dict[str, str] = None):
        """Rewrite tracked directory and excluded-file paths after an owner rename."""
        old_directory = os.path.abspath(old_directory)
        new_directory = os.path.abspath(new_directory)
        path_map = {os.path.abspath(k): os.path.abspath(v) for k, v in (path_map or {}).items()}
        def rewrite(path: str) -> str:
            abs_path = os.path.abspath(path)
            mapped = path_map.get(abs_path)
            if mapped:
                return mapped
            if abs_path == old_directory:
                return new_directory
            if abs_path.startswith(old_directory + os.sep):
                return new_directory + abs_path[len(old_directory):]
            return abs_path
        changed_dirs = False
        rewritten_dirs = []
        for directory in self.indexed_directories:
            rewritten = rewrite(directory)
            changed_dirs = changed_dirs or rewritten != os.path.abspath(directory)
            if rewritten not in rewritten_dirs:
                rewritten_dirs.append(rewritten)
        if changed_dirs:
            self.indexed_directories = rewritten_dirs
            self.save_directories()
        changed_excluded = False
        rewritten_excluded = set()
        for path in self.excluded_files:
            rewritten = rewrite(path)
            changed_excluded = changed_excluded or rewritten != os.path.abspath(path)
            rewritten_excluded.add(rewritten)
        if changed_excluded:
            self.excluded_files = rewritten_excluded
            self._save_excluded()
        if changed_dirs or changed_excluded:
            self.refresh_index()
    def get_indexed_directories(self):
        """Get the list of all indexed directories."""
        return self.indexed_directories.copy()
@@ -50,6 +50,23 @@ def _generate_doc_id(text: str, owner: str = "") -> str:
    return f"doc_{hashlib.sha256(key.encode('utf-8')).hexdigest()[:16]}"
 def _rewrite_owner_path(value: str, path_map: Dict[str, str], path_prefixes: List[tuple]) -> str:
    if not isinstance(value, str) or not value:
        return value
    abs_value = os.path.abspath(value)
    mapped = path_map.get(abs_value)
    if mapped:
        return mapped
    for old_prefix, new_prefix in path_prefixes:
        old_abs = os.path.abspath(old_prefix)
        new_abs = os.path.abspath(new_prefix)
        if abs_value == old_abs:
            return new_abs
        if abs_value.startswith(old_abs + os.sep):
            return new_abs + abs_value[len(old_abs):]
    return value
 class VectorRAG:
    """RAG system using ChromaDB vector storage with hybrid search."""
@@ -250,6 +267,75 @@ class VectorRAG:
            "failed_count": len(docs) - len(valid),
        }
    def rename_owner(
        self,
        old_owner: str,
        new_owner: str,
        *,
        path_map: Optional[Dict[str, str]] = None,
        path_prefixes: Optional[List[tuple]] = None,
    ) -> Dict[str, Any]:
        """Rewrite existing RAG metadata after an auth username rename."""
        if not self.healthy:
            return {"success": False, "updated_count": 0, "message": "Collection not initialized"}
        old_owner = (old_owner or "").strip().lower()
        new_owner = (new_owner or "").strip().lower()
        if not old_owner or not new_owner or old_owner == new_owner:
            return {"success": True, "updated_count": 0, "message": "No owner rename needed"}
        path_map = {os.path.abspath(k): os.path.abspath(v) for k, v in (path_map or {}).items()}
        path_prefixes = path_prefixes or []
        updated_ids = set()
        failed_count = 0
        for lane_name, collection in self._collections_for_delete():
            try:
                results = collection.get(
                    where={"owner": old_owner},
                    include=["metadatas"],
                )
            except Exception as e:
                logger.warning("rename_owner metadata scan failed in %s lane: %s", lane_name, e)
                failed_count += 1
                continue
            ids = results.get("ids") or []
            metadatas = results.get("metadatas") or []
            if not ids:
                continue
            new_metas = []
            selected_ids = []
            for doc_id, meta in zip(ids, metadatas):
                if not isinstance(meta, dict):
                    continue
                next_meta = dict(meta)
                if str(next_meta.get("owner", "")).strip().lower() == old_owner:
                    next_meta["owner"] = new_owner
                for key in ("source", "directory"):
                    next_meta[key] = _rewrite_owner_path(next_meta.get(key), path_map, path_prefixes)
                selected_ids.append(doc_id)
                new_metas.append(next_meta)
            if not selected_ids:
                continue
            try:
                collection.update(ids=selected_ids, metadatas=new_metas)
                updated_ids.update(selected_ids)
            except Exception as e:
                logger.warning("rename_owner metadata update failed in %s lane: %s", lane_name, e)
                failed_count += len(selected_ids)
        success = failed_count == 0
        return {
            "success": success,
            "updated_count": len(updated_ids),
            "failed_count": failed_count,
            "message": f"Updated {len(updated_ids)} RAG chunk(s)",
        }
    # ------------------------------------------------------------------
    # Search — hybrid: vector similarity + keyword overlap
    # ------------------------------------------------------------------
@@ -9,6 +9,8 @@ import uuid
 from datetime import datetime, timedelta, timezone
 from typing import Any, Awaitable, Callable, Dict, Tuple
 from core.auth import RESERVED_USERNAMES
 logger = logging.getLogger(__name__)
@@ -17,6 +19,34 @@ def _utcnow() -> datetime:
    return datetime.now(timezone.utc).replace(tzinfo=None)
 # Shell/file tools a scheduled task's agent should be offered by default,
 # mirroring the chat agent (where these are on unless a privilege or global
 # setting turns them off). The RAG tool selector + ASSISTANT_ALWAYS_AVAILABLE
 # never include bash/python, so on a host with an empty/degraded tool-embedding
 # index a task could not run shell or Python even for an admin owner. Offering
 # them here is safe: stream_agent_loop's blocked_tools_for_owner() still strips
 # this whole group for non-admin multi-user owners, and only admits it for
 # admins and single-user (AUTH_ENABLED=false) deployments.
 TASK_DEFAULT_SHELL_TOOLS = frozenset({
    "bash", "python", "read_file", "write_file", "edit_file",
    "grep", "glob", "ls", "get_workspace",
 })
 def compose_task_relevant_tools(rag_tools, assistant_always, disabled_tools):
    """Compose the relevant-tools set offered to a scheduled task's agent.
    Unions the RAG-retrieved tools, the assistant's always-available set, and
    the default shell/file group, then removes anything the task's crew
    explicitly disabled via its `enabled_tools` allowlist. Per-owner admin
    gating is applied later by stream_agent_loop (blocked_tools_for_owner).
    """
    tools = set(rag_tools) | set(assistant_always) | set(TASK_DEFAULT_SHELL_TOOLS)
    if disabled_tools:
        tools -= set(disabled_tools)
    return tools
 # ── Shared TTL cache (singleflight) ────────────────────────────────────────
 # Multiple scheduled tasks firing in the same minute often need the same
 # external data (Miniflux unreads, MCP tool snapshots, etc.). This cache
@@ -1389,15 +1419,28 @@ class TaskScheduler:
            time_str = _utcnow().strftime("%A, %B %d %Y, %H:%M UTC")
        system_prompt = f"Current time: {time_str}\n\n{system_prompt}"
-        # Compute tool filter from CrewMember.enabled_tools if set
+        # Compute the disabled-tools set: the crew's enabled_tools allowlist
-        disabled_tools = None
+        # (inverted) plus the operator's global disabled_tools setting. The
        # global list must be merged here — chat does the same merge before
        # entering the agent loop (routes/chat_routes.py) — otherwise an admin
        # or AUTH_ENABLED=false scheduled task would still see and call shell/
        # file tools after the operator disabled them globally, because the
        # prompt/schema/execution gates only enforce what is passed in.
        disabled_tools: set[str] = set()
        if crew and crew.enabled_tools:
            try:
                enabled = json.loads(crew.enabled_tools)
                if isinstance(enabled, list) and enabled:
                    from src.tool_index import BUILTIN_TOOL_DESCRIPTIONS
                    all_tools = set(BUILTIN_TOOL_DESCRIPTIONS.keys())
-                    disabled_tools = all_tools - set(enabled)
+                    disabled_tools |= all_tools - set(enabled)
            except Exception:
                pass
        try:
            from src.settings import get_setting
            _global_disabled = get_setting("disabled_tools", [])
            if isinstance(_global_disabled, list):
                disabled_tools.update(_global_disabled)
        except Exception:
            pass
@@ -1409,10 +1452,10 @@ class TaskScheduler:
            tool_idx = get_tool_index()
            if tool_idx:
                rag_tools = tool_idx.get_tools_for_query(task.prompt or "", k=8)
-                relevant_tools = (rag_tools | ASSISTANT_ALWAYS_AVAILABLE)
+                relevant_tools = compose_task_relevant_tools(
-                if disabled_tools:
+                    rag_tools, ASSISTANT_ALWAYS_AVAILABLE, disabled_tools
-                    relevant_tools -= disabled_tools
+                )
-                logger.info(f"[assistant] RAG selected {len(rag_tools)} tools + {len(ASSISTANT_ALWAYS_AVAILABLE)} always-available = {len(relevant_tools)} total for '{task.name}'")
+                logger.info(f"[assistant] RAG selected {len(rag_tools)} tools + {len(ASSISTANT_ALWAYS_AVAILABLE)} always-available + shell/file defaults = {len(relevant_tools)} total for '{task.name}'")
        except Exception as e:
            logger.warning(f"[assistant] RAG tool selection failed, using all: {e}")
@@ -1420,7 +1463,7 @@ class TaskScheduler:
        try:
            result = await self._run_agent_loop(
                endpoint_url, model, task, session_id,
-                system_prompt=system_prompt, disabled_tools=disabled_tools,
+                system_prompt=system_prompt, disabled_tools=disabled_tools or None,
                relevant_tools=relevant_tools,
            )
        except Exception as e:
@@ -2221,7 +2264,7 @@ class TaskScheduler:
        # check-ins seeded, which then double-fire alongside the human user's
        # check-ins. This was the root cause of the duplicate 'Morning check-in'
        # rows we had to manually clean up.
-        if not owner or owner in {"internal-tool", "api", "demo", "system"}:
+        if not owner or owner in RESERVED_USERNAMES:
            logger.info(f"ensure_assistant_defaults: skip synthetic owner {owner!r}")
            return
        from core.database import SessionLocal, CrewMember, ScheduledTask
@@ -323,6 +323,24 @@ _MCP_TOOL_MAP = {
    "web_fetch":      ("web_fetch",  "web_fetch"),
    "generate_image": ("image_gen",  "generate_image"),
 }
 _EMAIL_MCP_OWNER_ARG = "_odysseus_owner"
 def _parse_qualified_mcp_args(tool: str, content: str) -> tuple[Dict, Optional[str]]:
    raw = (content or "").strip()
    if not raw:
        return {}, None
    try:
        parsed = json.loads(raw)
    except (json.JSONDecodeError, TypeError):
        if tool.startswith("mcp__email__"):
            return {}, "Email MCP tool arguments must be a JSON object."
        return {}, None
    if not isinstance(parsed, dict):
        if tool.startswith("mcp__email__"):
            return {}, "Email MCP tool arguments must be a JSON object."
        return {}, None
    return parsed, None
 def _parse_generate_image(content: str) -> Dict:
@@ -453,6 +471,8 @@ async def _direct_fallback(
    tool: str,
    content: str,
    progress_cb: Optional[Callable[[Dict], Awaitable[None]]] = None,
    session_id: Optional[str] = None,
    owner: Optional[str] = None,
 ) -> Optional[Dict]:
    _subproc_env = {
        **os.environ,
@@ -466,6 +486,8 @@ async def _direct_fallback(
        ctx = {
            "progress_cb": progress_cb,
            "subproc_env": _subproc_env,
            "session_id": session_id,
            "owner": owner,
        }
        from src.agent_tools import TOOL_HANDLERS
@@ -713,10 +735,13 @@ async def _execute_tool_block_impl(
            desc = f"bash (background): {short}"
            result = {
                "output": (
-                    f"Started background job `{rec['id']}`. It is running detached — "
+                    f"Started background job `{rec['id']}`. It is running detached; "
                    f"do NOT wait for it or poll it. You will be automatically re-invoked "
                    f"with its full output when it finishes. Continue with other work, or "
-                    f"end your turn now and resume when the result arrives."
+                    f"end your turn now and resume when the result arrives. If the user "
                    f"later asks to check progress or stop it, call the manage_bg_jobs "
                    f"tool yourself (output or kill); do not tell them to run a tool "
                    f"command, and do not surface raw tool syntax in your reply."
                ),
                "exit_code": 0,
                "bg_job_id": rec["id"],
@@ -737,6 +762,11 @@ async def _execute_tool_block_impl(
        desc = f"{tool}: {first_line}"
        result = await _direct_fallback(tool, content, progress_cb=progress_cb) \
            or {"error": f"{tool}: execution failed", "exit_code": 1}
    elif tool == "manage_bg_jobs":
        # Inspect/kill detached `bash` jobs; needs session_id to scope to chat.
        desc = f"manage_bg_jobs: {content.split(chr(10))[0][:80]}"
        result = await _direct_fallback(tool, content, session_id=session_id, owner=owner) \
            or {"error": "manage_bg_jobs: execution failed", "exit_code": 1}
    elif tool in ("create_document", "update_document", "edit_document",
                  "suggest_document", "manage_documents"):
        desc = f"{tool}: {content.split(chr(10))[0][:80]}"
@@ -748,10 +778,24 @@ async def _execute_tool_block_impl(
        query = content.split("\n")[0].strip()
        desc = f"search_chats: {query[:80]}"
        result = await do_search_chats(query, owner=owner)
-    elif tool in ("chat_with_model", "create_session", "list_sessions",
+    elif tool in ("chat_with_model", "ask_teacher", "list_models"):
-                  "send_to_session", "pipeline",
+        # Migrated to the agent_tools registry (#3629): dispatched through
-                  "manage_session", "manage_memory", "list_models",
+        # TOOL_HANDLERS with the owner/session ctx these tools need, instead
-                  "ui_control", "ask_teacher"):
+        # of the legacy dispatch_ai_tool elif. The impls live in
        # src/agent_tools/model_interaction_tools.py.
        first_line = content.split(chr(10))[0].strip()[:60]
        desc = f"{tool}: {first_line}" if first_line else tool
        result = await _document_tool_dispatch(tool, content, session_id, owner) \
            or {"error": f"{tool}: execution failed", "exit_code": 1}
    elif tool in ("create_session", "list_sessions", "send_to_session", "manage_session"):
        # Migrated to the agent_tools registry (#3629): dispatched through
        # TOOL_HANDLERS with the owner/session ctx these tools need. The impls
        # live in src/agent_tools/session_tools.py.
        first_line = content.split(chr(10))[0].strip()[:60]
        desc = f"{tool}: {first_line}" if first_line else tool
        result = await _document_tool_dispatch(tool, content, session_id, owner) \
            or {"error": f"{tool}: execution failed", "exit_code": 1}
    elif tool in ("pipeline", "manage_memory", "ui_control"):
        from src.ai_interaction import dispatch_ai_tool
        desc, result = await dispatch_ai_tool(tool, content, session_id, owner=owner)
    elif tool == "manage_tasks":
@@ -858,11 +902,14 @@ async def _execute_tool_block_impl(
        # MCP tool dispatch
        mcp = get_mcp_manager()
        if mcp:
            try:
                args = json.loads(content) if content.strip().startswith("{") else {}
            except (json.JSONDecodeError, TypeError):
                args = {}
            desc = f"mcp: {tool}"
            args, parse_error = _parse_qualified_mcp_args(tool, content)
            if parse_error:
                result = {"error": parse_error, "exit_code": 1}
            else:
                if tool.startswith("mcp__email__") and owner:
                    args = dict(args)
                    args[_EMAIL_MCP_OWNER_ARG] = owner
                result = await mcp.call_tool(tool, args)
        else:
            desc = f"mcp: {tool}"
@@ -12,12 +12,24 @@ import os
 import re
 from typing import Any, Dict, List, Optional
 from fastapi import HTTPException
 from src.constants import MAX_READ_CHARS, DEEP_RESEARCH_DIR, VAULT_FILE
 from src.tool_utils import get_mcp_manager
 from core.constants import internal_api_base
 from routes._validators import validate_remote_host, validate_ssh_port
 logger = logging.getLogger(__name__)
 def _string_arg(value: Any) -> str:
    return "" if value is None else str(value).strip()
 def _validate_cookbook_ssh_target(remote_host: Any, ssh_port: Any = "") -> tuple[str, str]:
    remote = validate_remote_host(_string_arg(remote_host) or None) or ""
    sport = validate_ssh_port(_string_arg(ssh_port) or None) or ""
    return remote, sport
 # ---------------------------------------------------------------------------
 # Active email state
 # ---------------------------------------------------------------------------
@@ -645,6 +657,137 @@ async def do_manage_endpoints(content: str, owner: Optional[str] = None) -> Dict
 # MCP server management tool
 # ---------------------------------------------------------------------------
 # Parallel to routes/cookbook_helpers._validate_serve_cmd but deliberately the
 # opposite policy: that gate guards an admin-only serve command and allows
 # interpreters (python3/etc) because model-serving needs them, whereas this is
 # the model/prompt-injection-reachable manage_mcp path, so interpreters and
 # runners are denied here.
 #
 # Commands that can execute arbitrary code regardless of their arguments. These
 # are NEVER accepted on the manage_mcp agent path, even if an operator lists one
 # in ODYSSEUS_MCP_ALLOWED_COMMANDS -- a stdio server that genuinely needs an
 # interpreter or package runner must be registered via the trusted admin route.
 _MCP_DENIED_COMMANDS = frozenset({
    "sh", "bash", "zsh", "fish", "dash", "ksh", "csh", "tcsh", "ash", "busybox",
    "cmd", "command.com", "powershell", "pwsh",
    "python", "pypy", "node", "nodejs", "deno", "bun", "ruby", "jruby",
    "perl", "raku", "php", "lua", "luajit", "tclsh", "wish", "expect", "rscript",
    "groovy", "scala", "elixir", "erl", "iex", "java", "javac", "jshell", "jbang",
    "kotlin", "kotlinc", "dotnet", "mono", "swift", "osascript", "tsx", "ts-node",
    "npx", "bunx", "uvx", "pipx", "npm", "pnpm", "yarn", "pip", "uv",
    "gem", "cargo", "go", "bundle", "poetry", "conda", "mamba", "brew",
    "apt", "apt-get", "yum", "dnf", "pacman", "apk",
    "env", "xargs", "nohup", "setsid", "nice", "ionice", "time", "timeout",
    "watch", "stdbuf", "unbuffer", "script", "ssh", "scp", "sshpass", "sudo",
    "doas", "su", "make", "cmake", "docker", "podman", "kubectl", "find",
    "awk", "gawk", "sed", "vi", "vim", "nvim", "emacs", "ed", "tee", "eval",
 })
 # Argv flags that make even an allowlisted binary execute inline code. Matched
 # by prefix so glued forms (-cimport os, --eval=...) are caught, not just the
 # exact-token form.
 _MCP_CODE_EXEC_SHORT_FLAGS = ("-c", "-e", "-m")
 _MCP_CODE_EXEC_LONG_FLAGS = ("--eval", "--exec", "--print", "--module", "--command", "--require")
 _MCP_URL_SCHEMES = ("http://", "https://", "ftp://", "ftps://", "file://", "data:", "jar:", "blob:")
 # Shell metacharacters refused in command/args. Args are passed as an argv list
 # (no shell), but refusing these keeps the surface narrow and obvious.
 _MCP_SHELL_METACHARS = set(";|&$`><\n\r")
 # Env vars that let a child process load attacker-supplied code before main().
 _MCP_DANGEROUS_ENV = frozenset({
    "LD_PRELOAD", "LD_LIBRARY_PATH", "LD_AUDIT", "DYLD_INSERT_LIBRARIES",
    "DYLD_LIBRARY_PATH", "DYLD_FRAMEWORK_PATH", "PYTHONPATH", "PYTHONSTARTUP",
    "PYTHONHOME", "PYTHONEXECUTABLE", "NODE_OPTIONS", "NODE_PATH", "BASH_ENV",
    "ENV", "SHELLOPTS", "PERL5LIB", "PERL5OPT", "RUBYOPT", "RUBYLIB", "GEM_PATH",
    "R_PROFILE", "R_HOME", "PATH", "IFS", "PROMPT_COMMAND",
 })
 def _mcp_allowed_commands() -> set:
    """Operator-configured allowlist of safe MCP launcher basenames for the agent
    path. Empty by default; set ODYSSEUS_MCP_ALLOWED_COMMANDS (comma-separated)
    to opt specific trusted binaries in. Denied commands are rejected even if
    listed here."""
    raw = os.environ.get("ODYSSEUS_MCP_ALLOWED_COMMANDS", "")
    return {c.strip().lower() for c in raw.split(",") if c.strip()}
 def _validate_mcp_command(command, args, env) -> Optional[str]:
    """Validate a model-supplied stdio MCP registration. Returns an error string
    if it must be rejected, else None.
    Closes the RCE where manage_mcp 'add' passed prompt-injection-controlled
    command/args/env straight to a subprocess spawn (issue #438): a payload
    smuggled into a skill description, memory entry, fetched page, or email body
    could register a stdio server running arbitrary code as the app UID.
    """
    if not isinstance(command, str) or not command.strip():
        return "command must be a non-empty string"
    command = command.strip()
    if "/" in command or "\\" in command:
        return "command must be a bare executable name, not a path"
    if any(ch in _MCP_SHELL_METACHARS for ch in command):
        return "command contains shell metacharacters"
    base = command.lower()
    if base.endswith(".exe") or base.endswith(".cmd") or base.endswith(".bat"):
        base = base.rsplit(".", 1)[0]
    # Canonicalize a trailing version suffix so versioned aliases collapse to the
    # family name (python3.11 -> python, node18 -> node, pip3 -> pip); both the
    # raw basename and the canonical form are denied, so an operator cannot
    # accidentally allowlist a runtime alias back into the path.
    canon = re.sub(r"[-_.]?\d+(?:\.\d+)*$", "", base)
    if base in _MCP_DENIED_COMMANDS or canon in _MCP_DENIED_COMMANDS:
        return (
            f"command '{command}' is not allowed on the agent MCP path: "
            "interpreters, runtimes, package runners, and shells can execute "
            "arbitrary code. Register such a server via the admin route instead."
        )
    if base not in _mcp_allowed_commands():
        return (
            f"command '{command}' is not in the MCP allowlist. Add it to "
            "ODYSSEUS_MCP_ALLOWED_COMMANDS if you trust it, or register the "
            "server via the admin route."
        )
    if args is not None:
        if isinstance(args, str):
            try:
                args = json.loads(args)
            except Exception:
                return "args must be a JSON list"
        if not isinstance(args, list):
            return "args must be a list"
        for a in args:
            if not isinstance(a, str):
                return "args must all be strings"
            s = a.strip()
            low = s.lower()
            if any(s == f or s.startswith(f) for f in _MCP_CODE_EXEC_SHORT_FLAGS):
                return f"arg '{a}' is a code-execution flag and is not allowed"
            if any(low == f or low.startswith(f + "=") for f in _MCP_CODE_EXEC_LONG_FLAGS):
                return f"arg '{a}' is a code-execution flag and is not allowed"
            if any(low.startswith(u) for u in _MCP_URL_SCHEMES):
                return f"arg '{a}' is a remote URL and is not allowed"
            if any(ch in _MCP_SHELL_METACHARS for ch in a):
                return f"arg '{a}' contains shell metacharacters"
    if env:
        if isinstance(env, str):
            try:
                env = json.loads(env)
            except Exception:
                return "env must be a JSON object"
        if not isinstance(env, dict):
            return "env must be an object"
        for k in env:
            if str(k).strip().upper() in _MCP_DANGEROUS_ENV:
                return f"env var '{k}' can inject code into the child process and is not allowed"
    return None
 async def do_manage_mcp(content: str, owner: Optional[str] = None) -> Dict:
    """Manage MCP servers: list, add, delete, enable, disable, reconnect."""
    try:
@@ -684,6 +827,12 @@ async def do_manage_mcp(content: str, owner: Optional[str] = None) -> Dict:
        env = args.get("env", {})
        if not name or not command:
            return {"error": "name and command are required", "exit_code": 1}
        # Validate BEFORE any DB write or spawn: a rejected registration must
        # leave no enabled row (which would otherwise auto-reconnect on restart)
        # and must not attempt a connection.
        _mcp_err = _validate_mcp_command(command, cmd_args, env)
        if _mcp_err:
            return {"error": f"manage_mcp: refused unsafe server registration: {_mcp_err}", "exit_code": 1}
        sid = str(_uuid.uuid4())[:8]
        db = SessionLocal()
        try:
@@ -2888,6 +3037,10 @@ async def _cookbook_kill_session(session_id: str, *, remote_host: str = "",
            break
    if remote:
        try:
            remote, sport = _validate_cookbook_ssh_target(remote, sport)
        except HTTPException as e:
            return {"error": str(getattr(e, "detail", e)), "exit_code": 1}
        _pf = f"-p {shlex.quote(str(sport))} " if sport and str(sport) != "22" else ""
        cmd = (
            f"ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "
@@ -2976,8 +3129,8 @@ async def do_tail_serve_output(content: str, owner: Optional[str] = None) -> Dic
        tail = 400
    tail = max(20, min(tail, 4000))
    headers = _internal_headers()
-    remote = (args.get("remote_host") or args.get("host") or "").strip()
+    remote = _string_arg(args.get("remote_host") or args.get("host"))
-    sport = (args.get("ssh_port") or "").strip()
+    sport = _string_arg(args.get("ssh_port"))
    # Resolve host from cookbook state if caller didn't pass one — same
    # lookup _cookbook_kill_session uses.
    if not remote:
@@ -2995,6 +3148,12 @@ async def do_tail_serve_output(content: str, owner: Optional[str] = None) -> Dic
                    if not sport:
                        sport = t.get("sshPort") or ""
                    break
    if remote:
        try:
            remote, sport = _validate_cookbook_ssh_target(remote, sport)
        except HTTPException as e:
            return {"error": str(getattr(e, "detail", e)), "exit_code": 1}
    # Prefer the persisted /tmp/odysseus-tmux/SESSION.log file over the
    # live tmux pane. The pane is what the user would see scrolling on
    # their screen — including the post-crash neofetch banner and the
@@ -3172,7 +3331,7 @@ async def do_adopt_served_model(content: str, owner: Optional[str] = None) -> Di
    except ValueError:
        return {"error": "Invalid JSON arguments", "exit_code": 1}
-    host = (args.get("host") or args.get("remote_host") or "").strip()
+    host = _string_arg(args.get("host") or args.get("remote_host"))
    sess = (args.get("tmux_session") or args.get("session_id") or "").strip()
    model = (args.get("model") or args.get("repo_id") or "").strip()
    port = args.get("port") or 8000
@@ -3183,6 +3342,12 @@ async def do_adopt_served_model(content: str, owner: Optional[str] = None) -> Di
        return {"error": "tmux_session and model are required", "exit_code": 1}
    # Verify tmux session exists on the target host
    if host:
        try:
            host, _ = _validate_cookbook_ssh_target(host)
        except HTTPException as e:
            return {"error": str(getattr(e, "detail", e)), "exit_code": 1}
    headers = _internal_headers()
    if host:
        check = f"ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no {shlex.quote(host)} 'tmux has-session -t {shlex.quote(sess)} 2>&1'"
@@ -94,6 +94,7 @@ BUILTIN_TOOL_DESCRIPTIONS: Dict[str, str] = {
    "manage_endpoints": "Endpoint management: list, add, delete, enable, or disable model API endpoints.",
    "manage_mcp": "MCP server management: list, add, delete, reconnect servers, or list available tools.",
    "manage_webhooks": "Webhook management: list, add, delete, enable, or disable webhooks.",
    "api_call": "Call a configured API integration by name (Home Assistant, Miniflux, Gitea, Linkding, Jellyfin, RSS reader, git forge, bookmark manager, smart home, or any other registered service). Make a GET/POST/PUT/PATCH/DELETE request to the integration's endpoint path, with an optional JSON body. Use whenever the user asks to query or control one of their connected integrations/services.",
    "manage_tokens": "API token management: list, create, or delete API access tokens.",
    "manage_documents": "List, read, delete, or tidy documents in the editor panel. action='list' returns clickable rows (most-recent first) so the user can open any doc by clicking. action='read' (aka view/open/get) with document_id returns the content; supports offset=<N> + limit=<N> to page through large docs (response includes next_offset when more remains, so you can keep calling with offset=next_offset). action='delete' with document_id removes a doc (only way to delete). Use this for ANY 'show/read/list/open my documents/docs/files/notes' request — never shell or curl.",
    "manage_research": "List, read/open, or delete saved DEEP RESEARCH results from the Library. action='list' returns clickable [query](#research-<id>) rows (most-recent first). action='read' (aka open/view/get) with id returns the report + sources. action='delete' with id removes it. Use this for ANY 'open/read/find/delete my research / that report / the research on X' request. NOTE: this is for EXISTING research; to START new research use trigger_research.",
@@ -134,6 +135,7 @@ BUILTIN_TOOL_DESCRIPTIONS: Dict[str, str] = {
    "app_api": "Generic loopback to allowed Odysseus internal endpoints. Use this when the user wants something the UI can do but there's no named tool for it. Covers calendar, gallery, library/documents, memory, notes, tasks, settings, research, compare, cookbook GPUs/state — allowed UI buttons hit /api/* endpoints and you can hit them too. Sensitive auth/user/admin/shell paths and host-control Cookbook mutation routes are blocked; do NOT use app_api for shell commands, package installs, engine rebuilds, or PID signalling. Use named command tooling for shell commands. action='endpoints' with filter=<keyword> lists available endpoints. action='call' takes method+path+body. Hits same routes the UI uses — auth flows free. NOTE: themes are NOT an API endpoint — use the ui_control tool (create_theme / set_theme), not app_api. SESSIONS/CHATS: do NOT use app_api for these — GET /api/sessions returns EMPTY for tool calls (it's owner-filtered and tool calls authenticate as a different identity). EMAIL ACCOUNTS: do NOT use /api/email/accounts via app_api; use list_email_accounts, list_emails, and read_email instead. To list/rename/archive/delete/fork chats use the list_sessions and manage_session tools instead.",
    "edit_image": "Edit an image in the gallery: upscale (increase resolution), remove background (rembg), inpaint (fill selected area), or harmonize (blend edits). Specify image ID and action.",
    "trigger_research": "Start a deep research job on any topic — appears in the Deep Research sidebar, streams progress, produces a detailed report. Use for 'research X', 'look into Y', 'do deep research on Z', 'investigate'. NOT a scheduled task — it runs now and surfaces in the sidebar.",
    "manage_bg_jobs": "Inspect and control detached background `bash` jobs (the ones started with a `#!bg` marker). action='list' shows this chat's jobs (id/status/age/command); action='output' returns a job's captured output so far (check on a long-running job, or re-read a finished one); action='kill' stops a runaway job by id. Use for 'is the background job done', 'check on that job', 'show the build output', 'kill the background job', 'stop the bg task'. output/kill need a job_id from list.",
 }
@@ -348,6 +350,12 @@ class ToolIndex:
            {"list_email_accounts", "list_emails", "read_email", "send_email", "reply_to_email", "bulk_email", "delete_email", "archive_email", "mark_email_read", "resolve_contact", "ui_control"},
        frozenset({"calendar", "event", "meeting", "schedule", "appointment"}):
            {"manage_calendar"},
        # Detached background `bash` jobs (#!bg): check on / read output / kill.
        frozenset({"background job", "background jobs", "bg job", "bg jobs",
                   "background task", "is the job done", "check the job",
                   "check on that job", "job output", "kill the job",
                   "kill the background", "stop the background", "running job"}):
            {"manage_bg_jobs"},
        frozenset({"note", "todo", "reminder", "remind", "checklist", "remember to"}):
            {"manage_notes"},
        # Chat/session management. "rename" alone maps to documents below, so a
@@ -414,6 +422,14 @@ class ToolIndex:
                   "my settings", "change setting", "change a setting", "set setting",
                   "preference", "preferences", "configure"}):
            {"manage_settings", "ui_control"},
        # API-integration intent → the api_call tool. Mirrors the agent-loop
        # "integrations" domain so api_call still surfaces on the retrieval and
        # keyword-fallback paths (not just the deterministic domain seed) when a
        # user names a connected service.
        frozenset({"api_call", "api call", "integration", "integrations",
                   "home assistant", "homeassistant", "miniflux", "gitea",
                   "linkding", "jellyfin"}):
            {"api_call"},
        # Managing EXISTING research in the Library — open/read/find/delete.
        frozenset({"my research", "the research", "research on", "open research",
                   "read research", "find research", "delete research",
@@ -175,6 +175,9 @@ _TOOL_NAME_MAP = {
    "notes": "manage_notes",
    "todo": "manage_notes",
    "todos": "manage_notes",
    "manage_bg_jobs": "manage_bg_jobs",
    "bg_jobs": "manage_bg_jobs",
    "background_jobs": "manage_bg_jobs",
 }
 _MISFENCED_WEB_TOOL_NAMES = {
@@ -1188,6 +1188,21 @@ FUNCTION_TOOL_SCHEMAS = [
            }
        }
    },
    {
        "type": "function",
        "function": {
            "name": "manage_bg_jobs",
            "description": "Inspect and control detached background `bash` jobs (started with the `#!bg` marker). action='list' shows this chat's jobs with id/status/age/command; action='output' returns a job's captured output so far (use for a still-running job, or to re-read a finished one); action='kill' terminates a runaway job's process tree instead of waiting out its max-runtime. output and kill need job_id from list.",
            "parameters": {
                "type": "object",
                "properties": {
                    "action": {"type": "string", "enum": ["list", "output", "kill"], "description": "list | output | kill (default: list)"},
                    "job_id": {"type": "string", "description": "Background job id (required for output/kill; from action='list')"},
                },
                "required": ["action"]
            }
        }
    },
 ]
@@ -1206,23 +1221,26 @@ def function_call_to_tool_block(name: str, arguments: str) -> Optional[ToolBlock
        logger.error(f"Failed to parse function call arguments for {name}: {arguments}")
        return None
    tool_type = _TOOL_NAME_MAP.get(name, name)
    _BUILTIN_EMAIL_TOOLS = {"list_email_accounts", "send_email", "list_emails", "read_email", "reply_to_email",
                            "archive_email", "delete_email", "mark_email_read", "bulk_email", "download_attachment"}
    # Some models emit valid JSON that isn't an object (e.g. a bare array
-    # ["ls -la"], string, or number) as the function arguments. Every branch
+    # ["ls -la"], string, or number) as function arguments. Most local tools keep
-    # below assumes a dict and calls args.get(...), so a non-dict would raise
+    # the legacy empty-object coercion for stream robustness, but email MCP tools
-    # AttributeError and abort the whole agent stream. Coerce to {} instead.
+    # must fail closed so a malformed call cannot read the default mailbox.
    if not isinstance(args, dict):
        if tool_type.startswith("mcp__email__") or name in _BUILTIN_EMAIL_TOOLS:
            logger.warning(f"Non-object email function call arguments for {name}: {args!r}; rejecting")
            return None
        logger.warning(f"Non-object function call arguments for {name}: {args!r}; treating as empty")
        args = {}
    tool_type = _TOOL_NAME_MAP.get(name, name)
    # Allow MCP tools through (namespaced as mcp__serverid__toolname)
    if tool_type.startswith("mcp__"):
        content = json.dumps(args) if args else "{}"
        return ToolBlock(tool_type, content)
    # Email tools are implemented as MCP — route them to email
    _BUILTIN_EMAIL_TOOLS = {"list_email_accounts", "send_email", "list_emails", "read_email", "reply_to_email",
                            "archive_email", "delete_email", "mark_email_read", "bulk_email", "download_attachment"}
    if name in _BUILTIN_EMAIL_TOOLS:
        return ToolBlock(f"mcp__email__{name}", json.dumps(args) if args else "{}")
    if tool_type not in TOOL_TAGS:
@@ -14,6 +14,7 @@ logger = logging.getLogger(__name__)
 NON_ADMIN_BLOCKED_TOOLS = {
    "bash",
    "python",
    "manage_bg_jobs",
    "read_file",
    "write_file",
    "edit_file",
@@ -114,6 +115,8 @@ _PLAN_MODE_KNOWN_MUTATORS = {
    # Shell is never read-only-safe; block it explicitly so it stays out of plan
    # mode even if the schema list fails to load.
    "bash", "python",
    # Controls shell processes (kill); plan mode can't run bash anyway.
    "manage_bg_jobs",
 }
@@ -1913,7 +1913,7 @@
            <h2><svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="vertical-align:-2px;margin-right:5px;opacity:0.6"><rect x="3" y="11" width="18" height="11" rx="2" ry="2"/><path d="M7 11V7a5 5 0 0 1 10 0v4"/></svg>Change Password</h2>
            <div class="settings-col">
              <input id="settings-pw-current" type="password" placeholder="Current password" autocomplete="current-password" style="padding:6px 8px;background:var(--bg);border:1px solid var(--border);border-radius:4px;color:var(--fg);font-family:inherit;font-size:12px;">
-              <input id="settings-pw-new" type="password" placeholder="New password (min 8)" autocomplete="new-password" style="padding:6px 8px;background:var(--bg);border:1px solid var(--border);border-radius:4px;color:var(--fg);font-family:inherit;font-size:12px;">
+              <input id="settings-pw-new" type="password" placeholder="New password" autocomplete="new-password" style="padding:6px 8px;background:var(--bg);border:1px solid var(--border);border-radius:4px;color:var(--fg);font-family:inherit;font-size:12px;">
              <input id="settings-pw-confirm" type="password" placeholder="Confirm new password" autocomplete="new-password" style="padding:6px 8px;background:var(--bg);border:1px solid var(--border);border-radius:4px;color:var(--fg);font-family:inherit;font-size:12px;">
              <div class="settings-row" style="margin-top:2px;justify-content:flex-end;">
                <span id="settings-pw-msg" style="font-size:11px;margin-right:auto;"></span>
@@ -2049,7 +2049,7 @@
            <h2><svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="vertical-align:-2px;margin-right:5px;opacity:0.6"><path d="M16 21v-2a4 4 0 0 0-4-4H5a4 4 0 0 0-4 4v2"/><circle cx="8.5" cy="7" r="4"/><line x1="20" y1="8" x2="20" y2="14"/><line x1="23" y1="11" x2="17" y2="11"/></svg>Add User</h2>
            <div class="admin-add-form">
              <input id="adm-newUsername" type="text" placeholder="Username">
-              <input id="adm-newPassword" type="password" placeholder="Password (min 8)">
+              <input id="adm-newPassword" type="password" placeholder="Password">
              <div class="admin-switch-inline" title="Grant full admin access"><label class="admin-switch"><input type="checkbox" id="adm-newIsAdmin"><span class="admin-slider"></span></label> Admin</div>
            </div>
            <div class="settings-row" style="margin-top:6px;">
@@ -13,6 +13,7 @@ let modalEl = null;
 // the endpoints list can flash a glow on that row. Cleared once the
 // animation fires.
 let _recentlyAddedEpId = null;
 let _authPolicy = { password_min_length: 8, reserved_usernames: [] };
 function el(id) { return document.getElementById(id); }
 function esc(s) { return uiModule.esc(s); }
@@ -343,6 +344,15 @@ function initSignupToggle() {
 }
 function initAddUser() {
  fetch('/api/auth/policy', { credentials: 'same-origin' })
    .then(r => r.ok ? r.json() : null)
    .then(policy => {
      if (!policy) return;
      _authPolicy = policy;
      const admPw = el('adm-newPassword');
      if (admPw) admPw.placeholder = `Password (min ${policy.password_min_length})`;
    })
    .catch(() => {});
  el('adm-addBtn').addEventListener('click', async () => {
    const msg = el('adm-addMsg');
    msg.textContent = ''; msg.className = '';
@@ -350,7 +360,8 @@ function initAddUser() {
    const password = el('adm-newPassword').value;
    const is_admin = el('adm-newIsAdmin').checked;
    if (!username) { msg.textContent = 'Username required'; msg.className = 'admin-error'; return; }
-    if (password.length < 8) { msg.textContent = 'Password must be at least 8 characters'; msg.className = 'admin-error'; return; }
+    if (password.length < _authPolicy.password_min_length) { msg.textContent = `Password must be at least ${_authPolicy.password_min_length} characters`; msg.className = 'admin-error'; return; }
    if (_authPolicy.reserved_usernames.includes(username.toLowerCase())) { msg.textContent = 'This username is reserved'; msg.className = 'admin-error'; return; }
    el('adm-addBtn').disabled = true;
    try {
      const res = await fetch('/api/auth/users', { method: 'POST', credentials: 'same-origin', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ username, password, is_admin }) });
@@ -1745,7 +1756,6 @@ const TOOL_META = {
  manage_skills:     { name: 'Skills',           desc: 'Learn and use procedures',        cat: 'Knowledge',  ctx: '~200' },
  manage_rag:        { name: 'RAG / Docs',       desc: 'Query indexed documents',         cat: 'Knowledge',  ctx: '~150' },
  chat_with_model:   { name: 'Chat with Model',  desc: 'Talk to another AI model',        cat: 'Multi-Agent', ctx: '~200' },
  second_opinion:    { name: 'Second Opinion',   desc: 'Get another model\'s take',       cat: 'Multi-Agent', ctx: '~150' },
  pipeline:          { name: 'Pipeline',         desc: 'Multi-step AI workflows',         cat: 'Multi-Agent', ctx: '~200' },
  ask_teacher:       { name: 'Ask Teacher',      desc: 'Query a more capable model',      cat: 'Multi-Agent', ctx: '~150' },
  send_to_session:   { name: 'Send to Session',  desc: 'Send message to another chat',    cat: 'Sessions',   ctx: '~100' },
@@ -125,7 +125,7 @@ const TOOL_GROUPS = {
  'Knowledge': ['web_search', 'read_file', 'manage_memory', 'manage_rag', 'search_chats'],
  'Code': ['bash', 'python', 'write_file'],
  'Documents': ['create_document', 'edit_document', 'update_document', 'suggest_document'],
-  'AI & Models': ['chat_with_model', 'second_opinion', 'ask_teacher', 'pipeline', 'list_models', 'generate_image'],
+  'AI & Models': ['chat_with_model', 'ask_teacher', 'pipeline', 'list_models', 'generate_image'],
  'System': ['manage_session', 'manage_endpoints', 'manage_mcp', 'manage_settings', 'manage_skills', 'manage_webhooks', 'manage_tokens', 'manage_documents', 'create_session', 'list_sessions', 'send_to_session', 'ui_control'],
 };
@@ -413,7 +413,7 @@ function _calEventFg(ev) {
 // Returns '' for normal solid-color events.
 function _calItemBgStyle(ev) {
  if (!_isCalBgImage(ev.color)) return '';
-  const url = _calBgImageUrl(ev.color).replace(/'/g, "\\'");
+  const url = _calBgImageUrl(ev.color).replace(/'/g, "\\'").replace(/"/g, "%22");
  return `background-image: linear-gradient(color-mix(in srgb, var(--bg) 70%, transparent), color-mix(in srgb, var(--bg) 70%, transparent)), url('${url}'); background-size: cover; background-position: center;`;
 }
@@ -1260,7 +1260,7 @@ async function _renderWeek() {
      // events keep the original tinted treatment.
      let bgDecl;
      if (_isCalBgImage(ev.color)) {
-        const _url = _calBgImageUrl(ev.color).replace(/'/g, "\\'");
+        const _url = _calBgImageUrl(ev.color).replace(/'/g, "\\'").replace(/"/g, "%22");
        bgDecl = `background-image: linear-gradient(color-mix(in srgb, var(--bg) 55%, transparent), color-mix(in srgb, var(--bg) 55%, transparent)), url('${_url}'); background-size: cover; background-position: center;`;
      } else {
        bgDecl = `background:color-mix(in srgb, ${_calColor(ev)} 18%, var(--bg));`;
@@ -635,8 +635,8 @@ export function applyModelColor(roleEl, modelName) {
      popup.className = 'ctx-popup';
      let html = '<div style="font-weight:600;margin-bottom:6px;color:var(--fg);display:flex;align-items:center;gap:6px;">';
      if (logoHtml) html += '<span class="role-provider-logo" style="opacity:0.7">' + logoHtml + '</span>';
-      html += short + '</div>';
+      html += uiModule.esc(short) + '</div>';
-      html += '<div><span class="ctx-label">Model</span> ' + modelName.split('/').pop() + '</div>';
+      html += '<div><span class="ctx-label">Model</span> ' + uiModule.esc(modelName.split('/').pop()) + '</div>';
      // Provider = the serving endpoint, distinct from the model vendor/logo
      // (e.g. the same model via OpenRouter vs Copilot vs Anthropic direct).
      const _epUrl = (window.sessionModule && window.sessionModule.getCurrentEndpointUrl)
@@ -56,7 +56,7 @@ const _RECIPES = [
    match: () => true,
    variants: {
      pip:    { commands: ['CMAKE_ARGS="-DGGML_CUDA=on" uv pip install -U "llama-cpp-python[server]"'] },
-      docker: { commands: ['docker pull ghcr.io/ggerganov/llama.cpp:server-cuda'] },
+      docker: { commands: ['docker pull ghcr.io/ggml-org/llama.cpp:server-cuda'] },
    },
  },
 ];
@@ -578,24 +578,50 @@ export const ERROR_PATTERNS = [
    ],
  },
  {
-    // Tail-only + healthy-server suppression. tmux capture-pane returns the
+    // Dependency-install (pip) build failure — a required package failed to
-    // entire scrollback every poll, so a one-shot startup traceback would
+    // build its wheel (common when an old sdist's setup.py breaks on a newer
-    // otherwise stick on the panel forever even while the server happily
+    // Python, e.g. basicsr on 3.13). This is an install problem, NOT a serve
-    // serves /v1/models. Only fire if the traceback is in recent output AND
+    // problem, so it must never suggest killing vLLM.
-    // the server isn't currently logging healthy traffic.
+    match: (text) => {
      const TAIL = text.slice(-6000);
      // A serve script can run a fallback build and then start serving fine —
      // don't flag a stale build error once the server is up.
      if (/Application startup complete|"(?:GET|POST)\s+\/v1\/[^"]+ HTTP\/[\d.]+"\s*2\d\d|Uvicorn running on|server is listening on https?:\/\//i.test(TAIL)) return false;
      return /Failed to build\b|subprocess-exited-with-error|Could not build wheels|metadata-generation-failed/i.test(TAIL);
    },
    message: 'A dependency failed to build during install — usually an older package whose build breaks on this Python version, not a server problem. The install did not finish.',
    suggestion: 'Suggested action: check the captured output for the package that failed to build; it may need a newer release or a patch to install on this Python version.',
    fixes: [],
  },
  {
    // vLLM-specific traceback: only offer the kill-processes recovery when the
    // output is actually about vLLM. Tail-only + healthy-server suppression so
    // a one-shot startup traceback doesn't stick on the panel forever while
    // the server happily serves /v1/models.
    match: (text) => {
      const TAIL = text.slice(-4096);
      if (!/Traceback \(most recent call last\)/i.test(TAIL)) return false;
      // Healthy markers in the tail mean whatever blew up has been recovered
      // from — the server is up and answering requests.
      if (/Application startup complete|"GET \/v1\/[^"]+ HTTP\/[\d.]+" 2\d\d|Uvicorn running on/i.test(TAIL)) return false;
-      return true;
+      return /vllm/i.test(TAIL);
    },
-    message: 'Python traceback detected — may be a handled error, check logs.',
+    message: 'A vLLM process hit a Python traceback and may be wedged.',
    fixes: [
      { label: 'Kill vLLM processes', action: (panel) => _runQuickCmd(panel, 'pkill -f vllm') },
    ],
  },
  {
    // Generic traceback (not vLLM, not a pip build): surface it without
    // suggesting an unrelated vLLM kill. Same tail-only + healthy suppression.
    match: (text) => {
      const TAIL = text.slice(-4096);
      if (!/Traceback \(most recent call last\)/i.test(TAIL)) return false;
      if (/Application startup complete|"GET \/v1\/[^"]+ HTTP\/[\d.]+" 2\d\d|Uvicorn running on/i.test(TAIL)) return false;
      return true;
    },
    message: 'Python traceback detected — check the captured output below for the underlying error.',
    suggestion: 'Suggested action: read the captured output for the failing step; copy the troubleshooting bundle if you need help.',
    fixes: [],
  },
 ];
 export function _diagnose(text) {
@@ -757,7 +783,7 @@ export function _showDiagnosis(panel, diagnosis, sourceText) {
      });
      row.appendChild(btn);
    }
-    body.appendChild(row);
+    diag.appendChild(row);
  }
 }
@@ -2462,10 +2462,13 @@ export async function open(opts) {
  // returned before hydration — and since close/reopen doesn't reset the page,
  // only a full reload recovered it. Re-rendering is cheap and the in-progress
  // Running tab is rendered separately just below.
-  _renderRecipes();
+  // Guard the render passes: a single broken task card must not throw out of
  // open() and leave the modal stuck hidden (it has no catch, so the panel
  // would silently never appear). Show the window regardless; log and move on.
  try { _renderRecipes(); } catch (e) { console.error('[cookbook] renderRecipes failed', e); }
  _rendered = true;
  _clearCookbookNotif();
-  _renderRunningTab();
+  try { _renderRunningTab(); } catch (e) { console.error('[cookbook] renderRunningTab failed', e); }
  // Self-heal: revive any download tasks whose tmux session is still alive
  // but were persisted as done/error (covers the "restarted server while a
  // big multi-shard download was in flight" case — the task survived in
@@ -784,40 +784,47 @@ function _winSessionCmd(task, tmuxArgs) {
    const ps = host
      ? `Get-Content '${sd}\\${sid}.log' -Tail ${lines} -ErrorAction SilentlyContinue`
      : `Get-Content (Join-Path $env:TEMP 'odysseus-tmux\\${sid}.log') -Tail ${lines} -ErrorAction SilentlyContinue`;
-    return host ? `ssh ${pf}${host} "powershell -Command \\"${ps}\\""` : `powershell -Command "${ps}"`;
+    return _winPowerShellCmd(task, ps);
  }
  if (tmuxArgs.includes('has-session')) {
    const ps = host
      ? `$p = Get-Content '${sd}\\${sid}.pid' -ErrorAction SilentlyContinue; if ($p) { Get-Process -Id $p -ErrorAction SilentlyContinue | Out-Null; if ($?) { exit 0 } else { exit 1 } } else { exit 1 }`
      : `$p = Get-Content (Join-Path $env:TEMP 'odysseus-tmux\\${sid}.pid') -ErrorAction SilentlyContinue; if ($p) { Get-Process -Id $p -ErrorAction SilentlyContinue | Out-Null; if ($?) { exit 0 } else { exit 1 } } else { exit 1 }`;
-    return host ? `ssh ${pf}${host} "powershell -Command \\"${ps}\\""` : `powershell -Command "${ps}"`;
+    return _winPowerShellCmd(task, ps);
  }
  if (tmuxArgs.includes('kill-session')) {
-    const stopTree = `function Stop-Tree([int]$Id) { Get-CimInstance Win32_Process -Filter "ParentProcessId = $Id" -ErrorAction SilentlyContinue | ForEach-Object { Stop-Tree ([int]$_.ProcessId) }; Stop-Process -Id $Id -Force -ErrorAction SilentlyContinue }`;
+    const ps = _winSessionStopTreePs(task);
-    const ps = host
+    return _winPowerShellCmd(task, ps);
      ? `${stopTree}; $p = Get-Content '${sd}\\${sid}.pid' -ErrorAction SilentlyContinue; if ($p -match '^\\d+$') { Stop-Tree ([int]$p) }; Remove-Item '${sd}\\${sid}.*' -Force -ErrorAction SilentlyContinue`
      : `${stopTree}; $p = Get-Content (Join-Path $env:TEMP 'odysseus-tmux\\${sid}.pid') -ErrorAction SilentlyContinue; if ($p -match '^\\d+$') { Stop-Tree ([int]$p) }; Remove-Item (Join-Path $env:TEMP 'odysseus-tmux\\${sid}.*') -Force -ErrorAction SilentlyContinue`;
    return host ? `ssh ${pf}${host} "powershell -Command \\"${ps}\\""` : `powershell -Command "${ps}"`;
  }
  if (tmuxArgs.includes('send-keys') && tmuxArgs.includes('C-c')) {
    const ps = host
      ? `$p = Get-Content '${sd}\\${sid}.pid' -ErrorAction SilentlyContinue; if ($p) { Stop-Process -Id $p -ErrorAction SilentlyContinue }`
      : `$p = Get-Content (Join-Path $env:TEMP 'odysseus-tmux\\${sid}.pid') -ErrorAction SilentlyContinue; if ($p) { Stop-Process -Id $p -ErrorAction SilentlyContinue }`;
-    return host ? `ssh ${pf}${host} "powershell -Command \\"${ps}\\""` : `powershell -Command "${ps}"`;
+    return _winPowerShellCmd(task, ps);
  }
  return host ? `ssh ${pf}${host} 'tmux ${tmuxArgs}' 2>/dev/null` : `tmux ${tmuxArgs} 2>/dev/null`;
 }
-export function _tmuxGracefulKill(task) {
+function _winPowerShellCmd(task, ps) {
-  if (_isWindows(task)) {
+  const command = `powershell -Command "${ps}"`;
  if (!task.remoteHost) return command;
  return `ssh ${_sshPrefix(_getPort(task))}${task.remoteHost} ${_shQuote(command)}`;
 }
 function _winSessionStopTreePs(task) {
  const host = task.remoteHost;
  const sd = host ? '$env:TEMP\\odysseus-sessions' : '$env:TEMP\\odysseus-tmux';
  const sid = task.sessionId;
-    const pf = _sshPrefix(_getPort(task));
+  const stopTree = `function Stop-Tree([int]$Id) { Get-CimInstance Win32_Process -Filter ('ParentProcessId = ' + $Id) -ErrorAction SilentlyContinue | ForEach-Object { Stop-Tree ([int]$_.ProcessId) }; Stop-Process -Id $Id -Force -ErrorAction SilentlyContinue }`;
-    const ps = host
+  return host
-      ? `$p = Get-Content '${sd}\\${sid}.pid' -ErrorAction SilentlyContinue; if ($p) { Stop-Process -Id $p -Force -ErrorAction SilentlyContinue }; Remove-Item '${sd}\\${sid}.*' -Force -ErrorAction SilentlyContinue`
+    ? `${stopTree}; $p = Get-Content '${sd}\\${sid}.pid' -ErrorAction SilentlyContinue; if ($p -match '^\\d+$') { Stop-Tree ([int]$p) }; Remove-Item '${sd}\\${sid}.*' -Force -ErrorAction SilentlyContinue`
-      : `$p = Get-Content (Join-Path $env:TEMP 'odysseus-tmux\\${sid}.pid') -ErrorAction SilentlyContinue; if ($p) { Stop-Process -Id $p -Force -ErrorAction SilentlyContinue }; Remove-Item (Join-Path $env:TEMP 'odysseus-tmux\\${sid}.*') -Force -ErrorAction SilentlyContinue`;
+    : `${stopTree}; $p = Get-Content (Join-Path $env:TEMP 'odysseus-tmux\\${sid}.pid') -ErrorAction SilentlyContinue; if ($p -match '^\\d+$') { Stop-Tree ([int]$p) }; Remove-Item (Join-Path $env:TEMP 'odysseus-tmux\\${sid}.*') -Force -ErrorAction SilentlyContinue`;
-    return host ? `ssh ${pf}${host} "powershell -Command \\"${ps}\\""` : `powershell -Command "${ps}"`;
+}
 export function _tmuxGracefulKill(task) {
  if (_isWindows(task)) {
    const ps = _winSessionStopTreePs(task);
    return _winPowerShellCmd(task, ps);
  }
  if (task.remoteHost) {
    return `ssh ${_sshPrefix(_getPort(task))}${task.remoteHost} 'tmux send-keys -t ${task.sessionId} C-c 2>/dev/null; sleep 2; tmux kill-session -t ${task.sessionId} 2>/dev/null'`;
@@ -116,13 +116,28 @@ function _selectedServeTarget(panel) {
    : (server?.name || 'local server');
  return {
    host,
-    port: host ? (_getPort(host) || server?.port || '') : '',
+    port: host ? (server?.port || _getPort(host) || '') : '',
    env: server?.env || '',
    venv,
    platform: server?.platform || _envState.platform || '',
    label,
  };
 }
 function _remoteWindowsDiffusersUnsupported(target) {
  return !!(target?.host && target?.platform === 'windows');
 }
 function _backendChoicesForTarget(target) {
  if (target?.platform === 'windows') {
    if (_remoteWindowsDiffusersUnsupported(target)) return [['llamacpp','llama.cpp']];
    return [['llamacpp','llama.cpp'],['diffusers','Diffusers']];
  }
  return _isMetal()
    ? [['llamacpp','llama.cpp'],['ollama','Ollama']]
    : [['vllm','vLLM'],['sglang','SGLang'],['llamacpp','llama.cpp'],['ollama','Ollama'],['diffusers','Diffusers']];
 }
 async function _fetchServeRuntimePackage(panel, backend) {
  const packageByBackend = {
    vllm: 'vllm',
@@ -529,13 +544,14 @@ function _rerenderCachedModels() {
      const ss = (_byRepo[repo] && typeof _byRepo[repo] === 'object')
        ? _byRepo[repo]
        : (_lastUsed || (_isLegacyFlat ? _allSs : {}));
      const _serveTarget = _selectedServeTarget();
      const _backendChoices = _backendChoicesForTarget(_serveTarget);
      const _allowedBackends = new Set(_backendChoices.map(([v]) => v));
      const detectedBackend = _detectBackend(m).backend;
-      const _allowedBackends = new Set(_isWindows()
+      let defaultBackend = (ss._forceBackend && ss.backend && _allowedBackends.has(ss.backend))
        ? ['llamacpp', 'diffusers']
        : (_isMetal() ? ['llamacpp', 'ollama'] : ['vllm', 'sglang', 'llamacpp', 'ollama', 'diffusers']));
      const defaultBackend = (ss._forceBackend && ss.backend && _allowedBackends.has(ss.backend))
        ? ss.backend
        : detectedBackend;
      if (!_allowedBackends.has(defaultBackend)) defaultBackend = _backendChoices[0]?.[0] || detectedBackend;
      const savedMatchesBackend = !!ss._forceBackend || (ss.backend || 'vllm') === detectedBackend;
      const sv = (k, def) => (ss[k] !== undefined && savedMatchesBackend) ? ss[k] : def;
      const defaultTp = defaultBackend === 'llamacpp' ? '1' : sv('tp', '1');
@@ -607,12 +623,6 @@ function _rerenderCachedModels() {
      }
      // Row 1: Backend + Server + Env
      panelHtml += `<div class="hwfit-serve-row">`;
      const _backendChoices = _isWindows()
        ? [['llamacpp','llama.cpp'],['diffusers','Diffusers']]
        : _isMetal()
        // Diffusers (diffusion_server.py) is CUDA-only — omit it on Metal.
        ? [['llamacpp','llama.cpp'],['ollama','Ollama']]
        : [['vllm','vLLM'],['sglang','SGLang'],['llamacpp','llama.cpp'],['ollama','Ollama'],['diffusers','Diffusers']];
      const backendOpts = _backendChoices.map(([v,l]) => `<option value="${v}"${defaultBackend===v?' selected':''}>${l}</option>`).join('');
      // Custom Backend picker — native <select> can't host SVG inside
      // options, so we render a button + menu that show the backend logo
@@ -1971,6 +1981,12 @@ function _rerenderCachedModels() {
          else serveState[el.dataset.field] = el.value;
        });
        serveState.backend = serveState.backend || (_detectBackend(m).backend) || 'vllm';
        const launchTarget = _selectedServeTarget(panel);
        if (serveState.backend === 'diffusers' && _remoteWindowsDiffusersUnsupported(launchTarget)) {
          _restoreLaunchBtn();
          uiModule.showToast('Diffusers serving is not supported on remote Windows servers yet. Use local Windows or a Linux server.', 9000);
          return;
        }
        // Pre-launch: check our own task list for a serve already running
        // on this host. Offer to stop+launch as the default action — the
@@ -1979,7 +1995,7 @@ function _rerenderCachedModels() {
        // common case instantly without waiting for a network round-trip.
        try {
          const _runningMod = await import('./cookbookRunning.js');
-          const _hostStr = _envState.remoteHost || '';
+          const _hostStr = launchTarget.host || '';
          const _active = (_runningMod._loadTasks ? _runningMod._loadTasks() : []).filter(t =>
            t && t.type === 'serve'
            && (t.remoteHost || '') === _hostStr
@@ -2033,12 +2049,11 @@ function _rerenderCachedModels() {
          || (serveState.backend === 'diffusers');
        if (_needsGpu) {
          try {
-            const _probeHost = (_envState.remoteHost || '').trim();
+            const _probeHost = (launchTarget.host || '').trim();
            const _probeParams = new URLSearchParams();
            if (_probeHost) {
              _probeParams.set('host', _probeHost);
-              const _sp = (_serverByVal?.(_envState.remoteServerKey || _probeHost) || {}).port;
+              if (launchTarget.port) _probeParams.set('ssh_port', launchTarget.port);
              if (_sp) _probeParams.set('ssh_port', _sp);
            }
            const _probeRes = await fetch('/api/cookbook/gpus' + (_probeParams.toString() ? '?' + _probeParams : ''), { credentials: 'same-origin' });
            const _probeData = await _probeRes.json();
@@ -2071,10 +2086,10 @@ function _rerenderCachedModels() {
            || launchCmd.match(/OLLAMA_HOST=[^:\s]+:(\d{2,5})\b/);
          const _port = _portMatch ? _portMatch[1] : '';
          if (_port) {
-            const _portHost = (_envState.remoteHost || '').trim();
+            const _portHost = (launchTarget.host || '').trim();
            const _checkInner = `ss -tlnp 2>/dev/null | awk '$4 ~ /:${_port}$/ {print; exit}' || netstat -tlnp 2>/dev/null | awk '$4 ~ /:${_port}$/ {print; exit}'`;
            const _cmd = _portHost
-              ? `ss h ${_portHost} <<<"" 2>/dev/null; ssh -o ConnectTimeout=4 -o StrictHostKeyChecking=no ${_portHost} ${JSON.stringify(_checkInner)}`
+              ? `ssh -o ConnectTimeout=4 -o StrictHostKeyChecking=no ${_sshPrefix(launchTarget.port)}${_portHost} ${JSON.stringify(_checkInner)}`
              : _checkInner;
            const _res = await fetch('/api/shell/exec', {
              method: 'POST', credentials: 'same-origin',
@@ -2131,20 +2146,8 @@ function _rerenderCachedModels() {
        // Resolve the target host from the visible Server dropdown — the reliable
        // source. Relying on _envState.remoteHost silently sent serves to Local
        // when that value was stale/empty. Pass it explicitly to the launcher.
-        let serveHost = _envState.remoteHost || '';
+        let serveHost = launchTarget.host || '';
-        let _srvEnv = '', _srvEnvPath = '';
+        let _srvEnv = launchTarget.env || '', _srvEnvPath = launchTarget.venv || '';
        const _ssEl = document.getElementById('hwfit-server-select') || document.getElementById('hwfit-dl-server');
        if (_ssEl && _ssEl.value != null) {
          if (_ssEl.value === 'local') serveHost = '';
          else {
            const _srv = _serverByVal?.(_ssEl.value) || _envState.servers[parseInt(_ssEl.value)];
            if (_srv) {
              serveHost = _srv.host;
              _srvEnv = _srv.env || '';
              _srvEnvPath = _srv.envPath || '';
            }
          }
        }
        // The venv field wins; otherwise fall back to the env configured for the
        // selected server in Settings, so the activation isn't silently dropped
        // when the field is left blank (the per-server venv wasn't being applied).
@@ -284,8 +284,8 @@ import * as Modals from './modalManager.js';
        ? langIcon(doc.language, 12, { style: 'opacity:0.65;flex-shrink:0;color:currentColor;margin-right:4px;' })
        : '';
      const langChip = `<span class="doc-tab-lang">${lic}</span>`;
-      html += `<div class="doc-tab${isActive ? ' active' : ''}" draggable="true" data-doc-id="${id}" title="${title}">
+      html += `<div class="doc-tab${isActive ? ' active' : ''}" draggable="true" data-doc-id="${id}" title="${_esc(title)}">
-        ${verChip}${langChip}<span class="doc-tab-title">${shortTitle}</span>
+        ${verChip}${langChip}<span class="doc-tab-title">${_esc(shortTitle)}</span>
        <button class="doc-tab-close" data-doc-id="${id}" title="Unlink from chat (kept in the Library)">&times;</button>
      </div>`;
    }
@@ -12,8 +12,8 @@ export function canvasCoords(e, canvas) {
  const rect = canvas.getBoundingClientRect();
  const scaleX = canvas.width / rect.width;
  const scaleY = canvas.height / rect.height;
-  const clientX = e.touches ? e.touches[0].clientX : e.clientX;
+  const clientX = e.touches && e.touches.length ? e.touches[0].clientX : e.clientX;
-  const clientY = e.touches ? e.touches[0].clientY : e.clientY;
+  const clientY = e.touches && e.touches.length ? e.touches[0].clientY : e.clientY;
  return {
    x: (clientX - rect.left) * scaleX,
    y: (clientY - rect.top) * scaleY,
@@ -28,6 +28,7 @@
 import { previewZoneAt, clearPreview, snapModalToZone } from './tileManager.js';
 import { suspendDock, resumeDock, clearRightDock, applyEdgeDock } from './modalSnap.js';
 import { dismissOrRemove } from './escMenuStack.js';
 import { nextToolWindowZ } from './toolWindowZOrder.js';
 const _state = new Map(); // id -> { restoreFn, closeFn, railBtnId, isMinimized, restoreMinHeight }
@@ -63,7 +64,14 @@ function _applyRememberedDock(id) {
 // those statics and bump on every bring-to-front.
 let _modalTopZ = 300;
 function _bringToFront(modal) {
-  if (modal) modal.style.setProperty('z-index', String(++_modalTopZ), 'important');
+  if (!modal) return;
  const z = nextToolWindowZ({
    exclude: modal,
    current: getComputedStyle(modal).zIndex,
    floor: _modalTopZ,
  });
  _modalTopZ = Math.max(_modalTopZ, z);
  modal.style.setProperty('z-index', String(z), 'important');
 }
 function _emitModalOpened(id, modal) {
@@ -10,6 +10,7 @@ import { attachColorPicker } from './colorPicker.js';
 import { makeWindowDraggable } from './windowDrag.js';
 import { snapModalToZone } from './tileManager.js';
 import { applyEdgeDock, clearDockSide } from './modalSnap.js';
 import { topToolWindowZ } from './toolWindowZOrder.js';
 const API_BASE = window.location.origin;
 let _open = false;
@@ -200,6 +201,23 @@ function _restoreNotesSidebarDock(pane) {
  applyEdgeDock(pane, 'right');
 }
 // Notes is not a `.modal`; its backdrop is the top-level stacking surface.
 function _topToolWindowZ(exclude = null) {
  return topToolWindowZ({ exclude });
 }
 function _bringNotesToFront(pane = document.getElementById('notes-pane')) {
  if (!pane) return;
  const backdrop = document.getElementById('notes-pane-backdrop') || pane.parentElement;
  const z = _topToolWindowZ(backdrop) + 1;
  if (backdrop) backdrop.style.setProperty('z-index', String(z), 'important');
  try {
    window.dispatchEvent(new CustomEvent('odysseus:modal-opened', {
      detail: { id: 'notes-panel', modal: pane },
    }));
  } catch (_) {}
 }
 function _loadPendingHighlights() {
  try { return new Set(JSON.parse(localStorage.getItem(REMINDER_PENDING_HIGHLIGHT_KEY) || '[]')); }
  catch { return new Set(); }
@@ -1096,7 +1114,10 @@ export async function refreshDueBadge(opts = {}) {
 // ---- Panel ----
 export function openPanel() {
-  if (_open) return;
+  if (_open) {
    _bringNotesToFront();
    return;
  }
  _open = true;
  _editingId = null;
  // Reset the search filter — the rebuilt pane's search input renders empty, so a
@@ -1192,6 +1213,7 @@ export function openPanel() {
  document.body.appendChild(backdrop);
  _wireNotesWindow(pane);
  _restoreNotesSidebarDock(pane);
  _bringNotesToFront(pane);
  // Events
  // (Close chevron removed — swipe down on mobile, tool-rail toggle on desktop.)
@@ -1202,6 +1224,9 @@ export function openPanel() {
  _wireNotesSwipeDismiss(pane.querySelector('.notes-mobile-grabber'), pane);
  _wireNotesSwipeDismiss(pane.querySelector('.notes-pane-header'), pane);
  pane.addEventListener('pointerdown', () => _bringNotesToFront(pane), true);
  pane.addEventListener('focusin', () => _bringNotesToFront(pane), true);
  const minBtn = document.getElementById('notes-minimize-btn');
  if (minBtn) minBtn.addEventListener('click', (e) => {
    e.preventDefault();
@@ -11,6 +11,7 @@ import { isAltGrEvent } from './platform.js';
 let initialized = false;
 let modalEl = null;
 let _authPolicy = { password_min_length: 8 };
 function el(id) { return document.getElementById(id); }
 function esc(s) { return uiModule.esc(s); }
@@ -2160,6 +2161,16 @@ function initAccount() {
      }
    }).catch(() => {});
  // Update password placeholder and policy from server
  fetch('/api/auth/policy', { credentials: 'same-origin' })
    .then(r => r.ok ? r.json() : null)
    .then(policy => {
      if (!policy) return;
      _authPolicy = policy;
      const pwNew = el('settings-pw-new');
      if (pwNew) pwNew.placeholder = `New password (min ${policy.password_min_length})`;
    }).catch(() => {});
  // Change password
  const saveBtn = el('settings-pw-save');
  const msgEl = el('settings-pw-msg');
@@ -2170,7 +2181,7 @@ function initAccount() {
      const conf = el('settings-pw-confirm').value;
      msgEl.style.color = '';
      if (!cur || !nw) { msgEl.textContent = 'Fill in all fields'; msgEl.style.color = 'var(--red)'; return; }
-      if (nw.length < 8) { msgEl.textContent = 'Min 8 characters'; msgEl.style.color = 'var(--red)'; return; }
+      if (nw.length < _authPolicy.password_min_length) { msgEl.textContent = `Min ${_authPolicy.password_min_length} characters`; msgEl.style.color = 'var(--red)'; return; }
      if (nw !== conf) { msgEl.textContent = 'Passwords don\'t match'; msgEl.style.color = 'var(--red)'; return; }
      saveBtn.disabled = true;
      try {
@@ -0,0 +1,29 @@
 export const TOOL_WINDOW_SELECTOR = 'body > .modal, body > .research-overlay, body > .notes-pane-backdrop';
 export function topToolWindowZ(options = {}) {
  const {
    exclude = null,
    root = globalThis.document,
    getStyle = globalThis.getComputedStyle,
    floor = 250,
  } = options;
  let top = floor;
  if (!root || typeof root.querySelectorAll !== 'function' || typeof getStyle !== 'function') return top;
  root.querySelectorAll(TOOL_WINDOW_SELECTOR).forEach(el => {
    if (!el || el === exclude) return;
    if (el.classList?.contains('hidden') || el.classList?.contains('modal-minimized')) return;
    const cs = getStyle(el);
    if (cs.display === 'none' || cs.visibility === 'hidden') return;
    const z = parseInt(cs.zIndex, 10);
    if (Number.isFinite(z)) top = Math.max(top, z);
  });
  return top;
 }
 export function nextToolWindowZ(options = {}) {
  const { current = null } = options;
  const top = topToolWindowZ(options);
  const currentZ = parseInt(current, 10);
  if (Number.isFinite(currentZ) && currentZ > top) return currentZ;
  return top + 1;
 }
@@ -8,6 +8,7 @@ import themeModule from './theme.js';
 import * as Modals from './modalManager.js';
 import spinnerModule from './spinner.js';
 import { registerMenuDismiss, dismissTopMenu, dismissOrRemove } from './escMenuStack.js';
 import { nextToolWindowZ, topToolWindowZ } from './toolWindowZOrder.js';
 let toastEl = null;
 let autoScrollEnabled = true;
@@ -1088,14 +1089,22 @@ if ('ontouchstart' in window) {
 // ---- Bring modal to front on click ----
 {
-  let topModalZ = 250;
+  const raiseModalToFront = (modal, floor = 250) => {
    const z = nextToolWindowZ({
      exclude: modal,
      current: getComputedStyle(modal).zIndex,
      floor,
    });
    modal.style.setProperty('z-index', String(z), 'important');
    return z;
  };
  document.addEventListener('mousedown', (e) => {
    const modalContent = e.target.closest('.modal-content');
    if (!modalContent) return;
    const modal = modalContent.closest('.modal');
    if (!modal) return;
-    topModalZ += 1;
+    raiseModalToFront(modal);
    modal.style.zIndex = topModalZ;
  });
  // Backdrop tap to close — delegated for all modals
@@ -1190,9 +1199,15 @@ if (!window._odyEscExpandGuard) {
    // Re-entry guard: setting style.zIndex itself fires the observer that
    // calls us back. Skip if this element is already pinned to the top
    // (matches the current counter) so we don't spin into an infinite loop.
-    const cur = parseInt(m.style.zIndex, 10) || 0;
+    const cur = parseInt(getComputedStyle(m).zIndex, 10) || 0;
-    if (cur === _zCounter) return;
+    if (cur === _zCounter && cur > topToolWindowZ({ exclude: m })) return;
-    m.style.zIndex = String(++_zCounter);
+    const z = nextToolWindowZ({
      exclude: m,
      current: cur,
      floor: _zCounter,
    });
    _zCounter = Math.max(_zCounter, z);
    if (z !== cur) m.style.setProperty('z-index', String(z), 'important');
  };
  new MutationObserver((muts) => {
    for (const m of muts) {
@@ -328,6 +328,7 @@
  let mode = 'login'; // 'login' | 'signup' | 'setup'
  let signupAllowed = false;
  let policy = { password_min_length: 8, reserved_usernames: [] };
  const rememberToggle = document.getElementById('rememberToggle');
@@ -360,10 +361,12 @@
    }
  }
-  // Check auth status
+  // Check auth status and fetch policy in parallel, but don't block the
  // authenticated redirect on the policy response.
  const policyPromise = fetch('/api/auth/policy', { credentials: 'same-origin' }).catch(() => null);
  try {
-    const res = await fetch('/api/auth/status', { credentials: 'same-origin' });
+    const statusRes = await fetch('/api/auth/status', { credentials: 'same-origin' });
-    const data = await res.json();
+    const data = await statusRes.json();
    if (data.authenticated) {
      window.location.replace('/');
      return;
@@ -374,6 +377,10 @@
    } else {
      setMode('login');
    }
    const policyRes = await policyPromise;
    if (policyRes && policyRes.ok) {
      policy = await policyRes.json();
    }
  } catch (e) {
    setMode('login');
  }
@@ -426,8 +433,14 @@
        submitBtn.disabled = false;
        return;
      }
-      if (password.length < 8) {
+      if (password.length < policy.password_min_length) {
-        errEl.textContent = 'Password must be at least 8 characters';
+        errEl.textContent = `Password must be at least ${policy.password_min_length} characters`;
        errEl.style.display = 'block';
        submitBtn.disabled = false;
        return;
      }
      if (policy.reserved_usernames.includes(username.toLowerCase())) {
        errEl.textContent = 'This username is reserved';
        errEl.style.display = 'block';
        submitBtn.disabled = false;
        return;
@@ -119,7 +119,7 @@ Read-only checks, run from the repo root on this branch. Note the real API is
 ```bash
 # Compute the area_cli set and confirm test_backup_cli_security.py is
 # area_security. Expected: 28 files, then "security".
-.venv/bin/python - <<'PY'
+./venv/bin/python - <<'PY'
 from pathlib import Path
 from tests._taxonomy import classify_test_path
@@ -138,7 +138,7 @@ rg -n "TestClient|FastAPI|create_app|SessionLocal|sqlite|dependency_overrides" \
  tests/test_*cli*.py tests/test_sessions_cli.py
 # Hard-coded flat paths to the exact CLI files outside tests/. Expected: no matches.
-.venv/bin/python - <<'PY2' > /tmp/area_cli_paths.txt
+./venv/bin/python - <<'PY2' > /tmp/area_cli_paths.txt
 from pathlib import Path
 from tests._taxonomy import classify_test_path
@@ -158,26 +158,26 @@ tokens only (plus the `tests/helpers/` directory rule), so the markers of the
 ## Validation for the future move PR
-Run with the project venv (`.venv/bin/python`); system `python3` may miss
+Run with the project venv (`./venv/bin/python`); system `python3` may miss
 pinned deps. Before the move, record the baseline; after, compare:
 ```bash
 # Selection must match the 28 files before and after the move.
-.venv/bin/python tests/run_focus.py --dry-run --area cli
+./venv/bin/python tests/run_focus.py --dry-run --area cli
-.venv/bin/python -m pytest -m area_cli -q
+./venv/bin/python -m pytest -m area_cli -q
 # Moved files pass when targeted directly.
-.venv/bin/python -m pytest tests/cli/ -q
+./venv/bin/python -m pytest tests/cli/ -q
 # Whole-suite collection still succeeds (catches import/path breakage).
-.venv/bin/python -m pytest --collect-only -q
+./venv/bin/python -m pytest --collect-only -q
 # Taxonomy/runner infrastructure is unaffected.
-.venv/bin/python -m pytest tests/test_taxonomy.py tests/test_run_focus.py -q
+./venv/bin/python -m pytest tests/test_taxonomy.py tests/test_run_focus.py -q
 # No stale flat-path references to the moved files. Expected: no matches
 # outside tests/cli/ itself.
-.venv/bin/python - <<'PY2' > /tmp/area_cli_paths.txt
+./venv/bin/python - <<'PY2' > /tmp/area_cli_paths.txt
 from pathlib import Path
 from tests._taxonomy import classify_test_path
@@ -0,0 +1,326 @@
 # Oversized Test File Split Plan
 ## Purpose
 This document plans future oversized test-file splits using current repo data.
 It does not move files, rewrite assertions, extract helpers, or change CI.
 ## Roadmap context
 - Issue: #3983
 - Parent tracker: #2523
 - Follows #3973 / #3982, the report-only order-sensitivity diagnostics slice.
 ## Methodology
 Metrics were generated from the current test tree using:
 - physical line counts for every recursive `test_*.py` file under `tests/`;
 - AST counts for `test_*` functions and `Test*` classes;
 - one `pytest --collect-only -q tests` run to count collected items per file;
 - current taxonomy classification from `tests._taxonomy.classify_test_path`; and
 - static setup-signal scans for route/API, DB/session, import-state, security, filesystem, subprocess/script, async/threading, and UI/static indicators.
 Static signals are not proof of risk. They are review prompts.
 Future split PRs must still inspect each file manually before editing.
 ## Current summary
 - test files scanned: 583
 - collected pytest items counted: 3586
 - large-file threshold: 300 lines
 - large-collected threshold: 20 collected items
 Area distribution:
 | Value | Files |
 |---|---:|
 | cli | 28 |
 | helpers | 1 |
 | js | 39 |
 | routes | 23 |
 | security | 77 |
 | services | 144 |
 | uncategorized | 234 |
 | unit | 37 |
 Sub-area distribution:
 | Value | Files |
 |---|---:|
 | api | 6 |
 | atomic | 3 |
 | auth | 9 |
 | calendar | 10 |
 | cli | 28 |
 | confinement | 7 |
 | cookbook | 13 |
 | document | 11 |
 | email | 12 |
 | embedding | 3 |
 | gallery | 5 |
 | history | 3 |
 | js | 39 |
 | llm | 16 |
 | mcp | 8 |
 | memory | 15 |
 | nondict | 7 |
 | nonstring | 22 |
 | owner | 14 |
 | owner_scope | 23 |
 | parse | 4 |
 | provider | 6 |
 | research | 16 |
 | route | 6 |
 | routes | 9 |
 | scheduler | 3 |
 | scope | 5 |
 | security | 9 |
 | session | 16 |
 | ssrf | 3 |
 | webhook | 3 |
 | xss | 5 |
 Values below 2 files: 244 values covering 244 files.
 ## Top files by collected pytest items
 | File | Lines | Collected tests | Test defs | Test classes | Area | Sub-area | Signals |
 |---|---:|---:|---:|---:|---|---|---|
 | `tests/test_model_routes.py` | 1778 | 139 | 116 | 10 | routes | routes | route/api, db/session, import-state, async/threading |
 | `tests/test_security_regressions.py` | 1224 | 92 | 68 | 0 | security | security | route/api, db/session, import-state, security, filesystem, async/threading, ui/static |
 | `tests/test_provider_classification.py` | 188 | 67 | 21 | 4 | services | provider | - |
 | `tests/test_cookbook_helpers.py` | 912 | 65 | 65 | 0 | services | cookbook | route/api, filesystem, subprocess/script, async/threading, ui/static |
 | `tests/test_shell_routes.py` | 481 | 63 | 48 | 8 | routes | routes | route/api, import-state, filesystem |
 | `tests/test_pr_blocker_audit.py` | 964 | 58 | 58 | 0 | uncategorized | pr_blocker_audit | import-state, security, filesystem |
 | `tests/test_provider_endpoints.py` | 241 | 58 | 18 | 1 | services | provider | subprocess/script |
 | `tests/test_agent_loop.py` | 469 | 52 | 52 | 5 | uncategorized | agent_loop | db/session, import-state |
 | `tests/test_service_health.py` | 472 | 47 | 42 | 0 | uncategorized | service_health | async/threading |
 | `tests/test_run_focus.py` | 399 | 47 | 44 | 0 | uncategorized | run_focus | security, filesystem, subprocess/script, ui/static |
 | `tests/test_llm_core_temperature.py` | 196 | 41 | 17 | 0 | services | llm | - |
 | `tests/test_endpoint_probing.py` | 411 | 34 | 30 | 6 | uncategorized | endpoint_probing | route/api, db/session, import-state |
 | `tests/test_llm_core_anthropic_temp_omit.py` | 94 | 32 | 6 | 0 | services | llm | db/session |
 | `tests/test_chat_helpers.py` | 264 | 31 | 18 | 0 | uncategorized | chat_helpers | route/api |
 | `tests/test_provider_detection.py` | 148 | 31 | 31 | 5 | services | provider | - |
 | `tests/test_model_context.py` | 251 | 30 | 30 | 4 | uncategorized | model_context | db/session, import-state |
 | `tests/test_endpoint_resolver.py` | 148 | 30 | 30 | 6 | uncategorized | endpoint_resolver | - |
 | `tests/test_embedding_lanes.py` | 1104 | 29 | 29 | 0 | services | embedding | filesystem |
 | `tests/test_upload_limits_centralized.py` | 110 | 29 | 5 | 0 | uncategorized | upload_limits_centralized | import-state, filesystem |
 | `tests/test_email_oauth.py` | 580 | 28 | 25 | 0 | services | email | route/api, db/session, security, async/threading |
 | `tests/test_review_regressions.py` | 930 | 26 | 26 | 0 | uncategorized | review_regressions | route/api, db/session, import-state, filesystem, async/threading |
 | `tests/test_rename_user_owner_sync.py` | 686 | 26 | 26 | 0 | security | owner | route/api, db/session, import-state, filesystem, async/threading |
 | `tests/test_helpers_import_state.py` | 426 | 26 | 26 | 0 | helpers | helpers | route/api, db/session, import-state |
 | `tests/test_taxonomy.py` | 145 | 26 | 16 | 0 | uncategorized | taxonomy | security, ui/static |
 | `tests/test_tool_path_confinement.py` | 282 | 24 | 24 | 0 | security | confinement | import-state, filesystem, async/threading |
 | `tests/test_copilot.py` | 170 | 23 | 16 | 0 | uncategorized | copilot | - |
 | `tests/test_research_utils.py` | 97 | 23 | 23 | 2 | services | research | - |
 | `tests/test_api_chat_security.py` | 401 | 22 | 8 | 0 | security | security | route/api, db/session, import-state, filesystem, async/threading |
 | `tests/test_tool_support_heuristic.py` | 166 | 22 | 22 | 3 | uncategorized | tool_support_heuristic | - |
 | `tests/test_platform_compat.py` | 318 | 21 | 21 | 0 | uncategorized | platform_compat | import-state, filesystem, subprocess/script |
 ## Top files by physical line count
 | File | Lines | Collected tests | Test defs | Test classes | Area | Sub-area | Signals |
 |---|---:|---:|---:|---:|---|---|---|
 | `tests/test_model_routes.py` | 1778 | 139 | 116 | 10 | routes | routes | route/api, db/session, import-state, async/threading |
 | `tests/test_security_regressions.py` | 1224 | 92 | 68 | 0 | security | security | route/api, db/session, import-state, security, filesystem, async/threading, ui/static |
 | `tests/test_embedding_lanes.py` | 1104 | 29 | 29 | 0 | services | embedding | filesystem |
 | `tests/test_pr_blocker_audit.py` | 964 | 58 | 58 | 0 | uncategorized | pr_blocker_audit | import-state, security, filesystem |
 | `tests/test_review_regressions.py` | 930 | 26 | 26 | 0 | uncategorized | review_regressions | route/api, db/session, import-state, filesystem, async/threading |
 | `tests/test_cookbook_helpers.py` | 912 | 65 | 65 | 0 | services | cookbook | route/api, filesystem, subprocess/script, async/threading, ui/static |
 | `tests/test_rename_user_owner_sync.py` | 686 | 26 | 26 | 0 | security | owner | route/api, db/session, import-state, filesystem, async/threading |
 | `tests/test_email_oauth.py` | 580 | 28 | 25 | 0 | services | email | route/api, db/session, security, async/threading |
 | `tests/test_api_token_routes.py` | 578 | 17 | 17 | 0 | routes | api_routes | route/api, db/session, import-state, async/threading |
 | `tests/test_shell_routes.py` | 481 | 63 | 48 | 8 | routes | routes | route/api, import-state, filesystem |
 | `tests/test_email_owner_scope.py` | 474 | 9 | 9 | 0 | security | owner_scope | route/api, db/session, filesystem, async/threading |
 | `tests/test_service_health.py` | 472 | 47 | 42 | 0 | uncategorized | service_health | async/threading |
 | `tests/test_agent_loop.py` | 469 | 52 | 52 | 5 | uncategorized | agent_loop | db/session, import-state |
 | `tests/test_kv_cache_invalidation_2927.py` | 463 | 8 | 8 | 0 | uncategorized | kv_cache_invalidation_2927 | route/api, db/session, import-state, async/threading |
 | `tests/test_helpers_import_state.py` | 426 | 26 | 26 | 0 | helpers | helpers | route/api, db/session, import-state |
 | `tests/test_endpoint_owner_scope_followup.py` | 414 | 11 | 11 | 0 | security | owner_scope | route/api, db/session, filesystem |
 | `tests/test_endpoint_probing.py` | 411 | 34 | 30 | 6 | uncategorized | endpoint_probing | route/api, db/session, import-state |
 | `tests/test_imap_leak_fixes.py` | 404 | 15 | 15 | 0 | uncategorized | imap_leak_fixes | route/api, db/session, security, filesystem |
 | `tests/test_companion_readonly.py` | 402 | 17 | 17 | 0 | uncategorized | companion_readonly | db/session, import-state |
 | `tests/test_api_chat_security.py` | 401 | 22 | 8 | 0 | security | security | route/api, db/session, import-state, filesystem, async/threading |
 | `tests/test_upload_handler_atomicity.py` | 401 | 9 | 9 | 0 | uncategorized | upload_handler_atomicity | filesystem, async/threading |
 | `tests/test_run_focus.py` | 399 | 47 | 44 | 0 | uncategorized | run_focus | security, filesystem, subprocess/script, ui/static |
 | `tests/test_auth_regressions.py` | 375 | 15 | 15 | 0 | security | auth | route/api, db/session, import-state, async/threading |
 | `tests/test_calendar_owner_scope.py` | 345 | 7 | 7 | 0 | security | owner_scope | route/api, db/session, import-state, filesystem, async/threading, ui/static |
 | `tests/test_null_owner_gates.py` | 342 | 20 | 20 | 0 | security | owner | route/api, db/session, import-state |
 | `tests/test_agent_migration_manifest.py` | 340 | 15 | 15 | 0 | uncategorized | agent_migration_manifest | import-state, filesystem |
 | `tests/test_calendar_recurrence.py` | 338 | 19 | 19 | 0 | services | calendar | - |
 | `tests/test_tool_policy.py` | 330 | 13 | 13 | 0 | uncategorized | tool_policy | import-state, async/threading |
 | `tests/test_workspace_confine.py` | 328 | 18 | 18 | 0 | uncategorized | workspace_confine | route/api, filesystem, subprocess/script, async/threading |
 | `tests/test_diffusion_server_security.py` | 325 | 14 | 14 | 0 | security | security | route/api, import-state, security, filesystem, async/threading, ui/static |
 ## Split planning candidates
 This section is generated from metrics, not from manual judgement.
 Files are included when they meet at least one threshold:
 - at least 300 physical lines; or
 - at least 20 collected pytest items.
 These are planning candidates only. A later split PR still needs a focused manual review of each file before moving tests.
 | File | Why included | Setup/risk signals | Suggested handling |
 |---|---|---|---|
 | `tests/test_model_routes.py` | 1778 lines, 139 collected tests | route/api, db/session, import-state, async/threading | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_security_regressions.py` | 1224 lines, 92 collected tests | route/api, db/session, import-state, security, filesystem, async/threading, ui/static | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_provider_classification.py` | 67 collected tests | No obvious setup signals from static scan. | Good first manual-review candidate if test themes are cohesive. |
 | `tests/test_cookbook_helpers.py` | 912 lines, 65 collected tests | route/api, filesystem, subprocess/script, async/threading, ui/static | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_shell_routes.py` | 481 lines, 63 collected tests | route/api, import-state, filesystem | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_pr_blocker_audit.py` | 964 lines, 58 collected tests | import-state, security, filesystem | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_provider_endpoints.py` | 58 collected tests | subprocess/script | Good first manual-review candidate if test themes are cohesive. |
 | `tests/test_agent_loop.py` | 469 lines, 52 collected tests | db/session, import-state | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_service_health.py` | 472 lines, 47 collected tests | async/threading | Good first manual-review candidate if test themes are cohesive. |
 | `tests/test_run_focus.py` | 399 lines, 47 collected tests | security, filesystem, subprocess/script, ui/static | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_llm_core_temperature.py` | 41 collected tests | No obvious setup signals from static scan. | Good first manual-review candidate if test themes are cohesive. |
 | `tests/test_endpoint_probing.py` | 411 lines, 34 collected tests | route/api, db/session, import-state | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_llm_core_anthropic_temp_omit.py` | 32 collected tests | db/session | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_chat_helpers.py` | 31 collected tests | route/api | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_provider_detection.py` | 31 collected tests | No obvious setup signals from static scan. | Good first manual-review candidate if test themes are cohesive. |
 | `tests/test_model_context.py` | 30 collected tests | db/session, import-state | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_endpoint_resolver.py` | 30 collected tests | No obvious setup signals from static scan. | Good first manual-review candidate if test themes are cohesive. |
 | `tests/test_embedding_lanes.py` | 1104 lines, 29 collected tests | filesystem | Good first manual-review candidate if test themes are cohesive. |
 | `tests/test_upload_limits_centralized.py` | 29 collected tests | import-state, filesystem | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_email_oauth.py` | 580 lines, 28 collected tests | route/api, db/session, security, async/threading | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_review_regressions.py` | 930 lines, 26 collected tests | route/api, db/session, import-state, filesystem, async/threading | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_rename_user_owner_sync.py` | 686 lines, 26 collected tests | route/api, db/session, import-state, filesystem, async/threading | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_helpers_import_state.py` | 426 lines, 26 collected tests | route/api, db/session, import-state | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_taxonomy.py` | 26 collected tests | security, ui/static | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_tool_path_confinement.py` | 24 collected tests | import-state, filesystem, async/threading | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_copilot.py` | 23 collected tests | No obvious setup signals from static scan. | Good first manual-review candidate if test themes are cohesive. |
 | `tests/test_research_utils.py` | 23 collected tests | No obvious setup signals from static scan. | Good first manual-review candidate if test themes are cohesive. |
 | `tests/test_api_chat_security.py` | 401 lines, 22 collected tests | route/api, db/session, import-state, filesystem, async/threading | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_tool_support_heuristic.py` | 22 collected tests | No obvious setup signals from static scan. | Good first manual-review candidate if test themes are cohesive. |
 | `tests/test_platform_compat.py` | 318 lines, 21 collected tests | import-state, filesystem, subprocess/script | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_context_compactor.py` | 21 collected tests | db/session, import-state, async/threading | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_prompt_security.py` | 21 collected tests | No obvious setup signals from static scan. | Good first manual-review candidate if test themes are cohesive. |
 | `tests/test_null_owner_gates.py` | 342 lines, 20 collected tests | route/api, db/session, import-state | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_youtube_handler_consolidation.py` | 20 collected tests | route/api, import-state | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_calendar_recurrence.py` | 338 lines | No obvious setup signals from static scan. | Plan split boundaries before editing. |
 | `tests/test_workspace_confine.py` | 328 lines | route/api, filesystem, subprocess/script, async/threading | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_api_token_routes.py` | 578 lines | route/api, db/session, import-state, async/threading | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_companion_readonly.py` | 402 lines | db/session, import-state | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_set_admin.py` | 317 lines | route/api, import-state, filesystem, async/threading | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_imap_leak_fixes.py` | 404 lines | route/api, db/session, security, filesystem | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_auth_regressions.py` | 375 lines | route/api, db/session, import-state, async/threading | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_agent_migration_manifest.py` | 340 lines | import-state, filesystem | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_diffusion_server_security.py` | 325 lines | route/api, import-state, security, filesystem, async/threading, ui/static | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_tool_policy.py` | 330 lines | import-state, async/threading | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_endpoint_owner_scope_followup.py` | 414 lines | route/api, db/session, filesystem | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_upload_routes_owner_scope.py` | 315 lines | route/api, filesystem, async/threading | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_email_owner_scope.py` | 474 lines | route/api, db/session, filesystem, async/threading | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_upload_handler_atomicity.py` | 401 lines | filesystem, async/threading | Plan split boundaries before editing. |
 | `tests/test_kv_cache_invalidation_2927.py` | 463 lines | route/api, db/session, import-state, async/threading | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_calendar_owner_scope.py` | 345 lines | route/api, db/session, import-state, filesystem, async/threading, ui/static | Defer mechanical split until setup/risk boundaries are mapped. |
 | `tests/test_skills_manager_owner_isolation.py` | 306 lines | import-state, filesystem | Defer mechanical split until setup/risk boundaries are mapped. |
 ## Taxonomy coverage gaps among split candidates
 `uncategorized` is a current taxonomy area, not a builder failure.
 This plan does not reclassify tests because taxonomy changes should be reviewed separately from oversized-file split planning.
 Before using any of these files as a split target, first decide whether the taxonomy should be refined in a separate focused issue/PR.
 | File | Lines | Collected tests | Sub-area | Signals | Suggested follow-up |
 |---|---:|---:|---|---|---|
 | `tests/test_pr_blocker_audit.py` | 964 | 58 | pr_blocker_audit | import-state, security, filesystem | Review taxonomy and setup/risk boundaries before any split. |
 | `tests/test_agent_loop.py` | 469 | 52 | agent_loop | db/session, import-state | Review taxonomy and setup/risk boundaries before any split. |
 | `tests/test_service_health.py` | 472 | 47 | service_health | async/threading | Review taxonomy mapping before using as a split target. |
 | `tests/test_run_focus.py` | 399 | 47 | run_focus | security, filesystem, subprocess/script, ui/static | Review taxonomy and setup/risk boundaries before any split. |
 | `tests/test_endpoint_probing.py` | 411 | 34 | endpoint_probing | route/api, db/session, import-state | Review taxonomy and setup/risk boundaries before any split. |
 | `tests/test_chat_helpers.py` | 264 | 31 | chat_helpers | route/api | Review taxonomy and setup/risk boundaries before any split. |
 | `tests/test_model_context.py` | 251 | 30 | model_context | db/session, import-state | Review taxonomy and setup/risk boundaries before any split. |
 | `tests/test_endpoint_resolver.py` | 148 | 30 | endpoint_resolver | - | Review taxonomy mapping before using as a split target. |
 | `tests/test_upload_limits_centralized.py` | 110 | 29 | upload_limits_centralized | import-state, filesystem | Review taxonomy and setup/risk boundaries before any split. |
 | `tests/test_review_regressions.py` | 930 | 26 | review_regressions | route/api, db/session, import-state, filesystem, async/threading | Review taxonomy and setup/risk boundaries before any split. |
 | `tests/test_taxonomy.py` | 145 | 26 | taxonomy | security, ui/static | Review taxonomy and setup/risk boundaries before any split. |
 | `tests/test_copilot.py` | 170 | 23 | copilot | - | Review taxonomy mapping before using as a split target. |
 | `tests/test_tool_support_heuristic.py` | 166 | 22 | tool_support_heuristic | - | Review taxonomy mapping before using as a split target. |
 | `tests/test_platform_compat.py` | 318 | 21 | platform_compat | import-state, filesystem, subprocess/script | Review taxonomy and setup/risk boundaries before any split. |
 | `tests/test_context_compactor.py` | 233 | 21 | context_compactor | db/session, import-state, async/threading | Review taxonomy and setup/risk boundaries before any split. |
 | `tests/test_youtube_handler_consolidation.py` | 104 | 20 | youtube_handler_consolidation | route/api, import-state | Review taxonomy and setup/risk boundaries before any split. |
 | `tests/test_workspace_confine.py` | 328 | 18 | workspace_confine | route/api, filesystem, subprocess/script, async/threading | Review taxonomy and setup/risk boundaries before any split. |
 | `tests/test_companion_readonly.py` | 402 | 17 | companion_readonly | db/session, import-state | Review taxonomy and setup/risk boundaries before any split. |
 | `tests/test_set_admin.py` | 317 | 17 | set_admin | route/api, import-state, filesystem, async/threading | Review taxonomy and setup/risk boundaries before any split. |
 | `tests/test_imap_leak_fixes.py` | 404 | 15 | imap_leak_fixes | route/api, db/session, security, filesystem | Review taxonomy and setup/risk boundaries before any split. |
 | `tests/test_agent_migration_manifest.py` | 340 | 15 | agent_migration_manifest | import-state, filesystem | Review taxonomy and setup/risk boundaries before any split. |
 | `tests/test_tool_policy.py` | 330 | 13 | tool_policy | import-state, async/threading | Review taxonomy and setup/risk boundaries before any split. |
 | `tests/test_upload_handler_atomicity.py` | 401 | 9 | upload_handler_atomicity | filesystem, async/threading | Review taxonomy mapping before using as a split target. |
 | `tests/test_kv_cache_invalidation_2927.py` | 463 | 8 | kv_cache_invalidation_2927 | route/api, db/session, import-state, async/threading | Review taxonomy and setup/risk boundaries before any split. |
 ## Suggested first manual-review candidates
 These are not automatic split approvals. They are categorized candidates with enough size/collection value and no route/API, DB/session, import-state, or security signal from the static scan.
 Files still in the `uncategorized` taxonomy area are listed separately below so taxonomy review does not get mixed into the first split decision.
 | File | Lines | Collected tests | Area | Sub-area | Signals | Why this is a candidate |
 |---|---:|---:|---|---|---|---|
 | `tests/test_provider_classification.py` | 188 | 67 | services | provider | - | 67 collected tests |
 | `tests/test_provider_endpoints.py` | 241 | 58 | services | provider | subprocess/script | 58 collected tests |
 | `tests/test_llm_core_temperature.py` | 196 | 41 | services | llm | - | 41 collected tests |
 | `tests/test_provider_detection.py` | 148 | 31 | services | provider | - | 31 collected tests |
 | `tests/test_embedding_lanes.py` | 1104 | 29 | services | embedding | filesystem | 1104 lines, 29 collected tests |
 | `tests/test_research_utils.py` | 97 | 23 | services | research | - | 23 collected tests |
 | `tests/test_prompt_security.py` | 203 | 21 | security | security | - | 21 collected tests |
 | `tests/test_calendar_recurrence.py` | 338 | 19 | services | calendar | - | 338 lines |
 ## High-risk candidates to defer first
 These files may still be split later, but not as the first implementation slice without a separate manual boundary review.
 | File | Lines | Collected tests | High-risk signals |
 |---|---:|---:|---|
 | `tests/test_model_routes.py` | 1778 | 139 | db/session, import-state, route/api |
 | `tests/test_security_regressions.py` | 1224 | 92 | db/session, import-state, route/api, security |
 | `tests/test_cookbook_helpers.py` | 912 | 65 | route/api |
 | `tests/test_shell_routes.py` | 481 | 63 | import-state, route/api |
 | `tests/test_pr_blocker_audit.py` | 964 | 58 | import-state, security |
 | `tests/test_agent_loop.py` | 469 | 52 | db/session, import-state |
 | `tests/test_run_focus.py` | 399 | 47 | security |
 | `tests/test_endpoint_probing.py` | 411 | 34 | db/session, import-state, route/api |
 | `tests/test_llm_core_anthropic_temp_omit.py` | 94 | 32 | db/session |
 | `tests/test_chat_helpers.py` | 264 | 31 | route/api |
 | `tests/test_model_context.py` | 251 | 30 | db/session, import-state |
 | `tests/test_upload_limits_centralized.py` | 110 | 29 | import-state |
 | `tests/test_email_oauth.py` | 580 | 28 | db/session, route/api, security |
 | `tests/test_review_regressions.py` | 930 | 26 | db/session, import-state, route/api |
 | `tests/test_rename_user_owner_sync.py` | 686 | 26 | db/session, import-state, route/api |
 ## Rules for future split PRs
 - One file or one coherent file-family per PR.
 - No assertion rewrites mixed with file moves.
 - No helper extraction mixed with file moves.
 - No production code changes.
 - No CI workflow changes.
 - Preserve existing markers and taxonomy unless the split issue explicitly says otherwise.
 - Validate the original file's collected tests before and after the split.
 - Validate any neighboring taxonomy/focused-runner behavior if paths change.
 - Treat files with route/API, DB/session, import-state, or security signals as higher-risk until manually reviewed.
 ## Suggested next step
 Use this plan to choose the first actual oversized-file split issue.
 The first split should prefer a file with high review value and low setup risk.
 Do not start a split PR from this planning issue alone if the file's boundaries are still ambiguous.
 ## Reproduction command
 This document was generated with:
 ```bash
 .venv/bin/python tests/tools/build_oversized_test_split_plan.py
 ```
 ## Freshness check
 After editing the builder or rebasing the branch, regenerate the plan and confirm no unexpected plan drift:
 ```bash
 .venv/bin/python tests/tools/build_oversized_test_split_plan.py
 git diff --exit-code -- tests/OVERSIZED_TEST_SPLIT_PLAN.md
 ```
@@ -22,8 +22,8 @@ markers only - it moves no files and changes no test behavior. Use them to run a
 focused slice:
 ```bash
-python3 -m pytest -m area_security
+./venv/bin/python -m pytest -m area_security
-python3 -m pytest -m "area_services and sub_cookbook"
+./venv/bin/python -m pytest -m "area_services and sub_cookbook"
 ```
 Areas are `security`, `routes`, `services`, `cli`, `js`, `helpers`, `unit`, and
@@ -38,13 +38,13 @@ sub-area names, accepts sub-areas with or without the `sub_` prefix, and passes
 extra pytest arguments after `--`:
 ```bash
-python3 tests/run_focus.py --area security
+./venv/bin/python tests/run_focus.py --area security
-python3 tests/run_focus.py --area services --sub-area cookbook
+./venv/bin/python tests/run_focus.py --area services --sub-area cookbook
-python3 tests/run_focus.py --sub-area sub_cookbook
+./venv/bin/python tests/run_focus.py --sub-area sub_cookbook
-python3 tests/run_focus.py --keyword taxonomy
+./venv/bin/python tests/run_focus.py --keyword taxonomy
-python3 tests/run_focus.py --last-failed
+./venv/bin/python tests/run_focus.py --last-failed
-python3 tests/run_focus.py --dry-run --area services --sub-area cookbook
+./venv/bin/python tests/run_focus.py --dry-run --area services --sub-area cookbook
-python3 tests/run_focus.py --area services -- --maxfail=1 -q
+./venv/bin/python tests/run_focus.py --area services -- --maxfail=1 -q
 ```
 ### Fast lane and duration visibility
@@ -61,15 +61,15 @@ so you can see where time goes. They are reporting only and do not count as a
 focus selector, so `--durations` must be combined with a real selector
 (`--area`, `--sub-area`, `--keyword`, `--last-failed`, or `--fast`).
-Activate or otherwise use the project Python environment before running these
+Use the project Python environment before running these commands. The examples
-commands. The examples use `python3` intentionally to avoid hard-coding a local
+use the repo's documented `./venv/bin/python` path so they do not accidentally
-venv path.
+fall back to system Python.
 ```bash
-python3 tests/run_focus.py --fast
+./venv/bin/python tests/run_focus.py --fast
-python3 tests/run_focus.py --area services --fast
+./venv/bin/python tests/run_focus.py --area services --fast
-python3 tests/run_focus.py --area services --durations 25
+./venv/bin/python tests/run_focus.py --area services --durations 25
-python3 tests/run_focus.py --area services --fast --durations 25 --durations-min 0.05
+./venv/bin/python tests/run_focus.py --area services --fast --durations 25 --durations-min 0.05
 ```
 The `slow` marker is opt-in. Mark a test `slow` only with duration evidence
@@ -79,8 +79,8 @@ replace the full suite before merge. A `slow` mark only excludes a test from the
 fast lane; the test stays runnable directly, e.g.:
 ```bash
-python3 -m pytest tests/test_auth_config_lock_concurrency.py
+./venv/bin/python -m pytest tests/test_auth_config_lock_concurrency.py
-python3 -m pytest -m slow
+./venv/bin/python -m pytest -m slow
 ```
 ## Order-sensitivity reporting (report-only)
@@ -93,8 +93,8 @@ ordering - the shuffle exists only inside this runner. The seed is always
 printed, and pytest targets/options go after a literal `--`:
 ```bash
-python3 tests/run_order_report.py --seed 123 -- tests/cli/ -q
+./venv/bin/python tests/run_order_report.py --seed 123 -- tests/cli/ -q
-python3 tests/run_order_report.py -- tests/cli/ -q   # generates and prints a seed
+./venv/bin/python tests/run_order_report.py -- tests/cli/ -q   # generates and prints a seed
 ```
 The same seed reproduces the same order when the reported working directory,
@@ -108,7 +108,7 @@ A generated-seed run starts with output like:
 [order-report] working directory: /path/to/odysseus
 [order-report] shuffling test order with seed 284734921
 [order-report] reproduce from this working directory with the same test environment:
-[order-report] reproduce with: /path/to/odysseus/.venv/bin/python /path/to/odysseus/tests/run_order_report.py --seed 284734921 -- tests/cli/ -q
+[order-report] reproduce with: /path/to/odysseus/venv/bin/python /path/to/odysseus/tests/run_order_report.py --seed 284734921 -- tests/cli/ -q
 ```
 Run the printed command from the reported working directory to reproduce the
@@ -118,7 +118,7 @@ same fixed-seed order:
 [order-report] working directory: /path/to/odysseus
 [order-report] shuffling test order with seed 284734921
 [order-report] reproduce from this working directory with the same test environment:
-[order-report] reproduce with: /path/to/odysseus/.venv/bin/python /path/to/odysseus/tests/run_order_report.py --seed 284734921 -- tests/cli/ -q
+[order-report] reproduce with: /path/to/odysseus/venv/bin/python /path/to/odysseus/tests/run_order_report.py --seed 284734921 -- tests/cli/ -q
 ```
 Pytest output remains visible between the report header and footer. A failing
@@ -237,10 +237,10 @@ helpers:
 Run validation locally before opening or approving a PR. Practical checks:
 - `git diff --check` - catch whitespace and conflict-marker errors.
- `python3 -m py_compile <changed files>` - confirm changed files compile.
+- `./venv/bin/python -m py_compile <changed files>` - confirm changed files compile.
- Focused `pytest` on the changed test files.
+- Focused `./venv/bin/python -m pytest` on the changed test files.
- `pytest` on neighboring or order-sensitive test groups that share import
+- `./venv/bin/python -m pytest` on neighboring or order-sensitive test groups
-  state with the changed files.
+  that share import state with the changed files.
 - `grep` for the old boilerplate when replacing it, to confirm no stragglers
  remain.
 - A fresh audit worktree when changing the helpers themselves, so stale
@@ -24,7 +24,7 @@ The goal is not only to reorganize `tests/`. The goal is for the suite to be a
 reliable foundation for future development: deterministic, modular, informative,
 behavior-focused, and complete enough to replace manual QA wherever practical.
-Run tests with the project virtualenv interpreter (`.venv/bin/python -m pytest`).
+Run tests with the project virtualenv interpreter (`./venv/bin/python -m pytest`).
 The system `python3` may be missing pinned dependencies (e.g. `nh3`), which
 shows up as import/collection errors that are environmental, not real failures.
@@ -172,10 +172,10 @@ Prefer tests that exercise real behavior over tests that inspect source code.
 Run locally before opening or approving a refactor PR:
 - `git diff --check` - whitespace and conflict-marker errors.
- `python3 -m py_compile <changed .py files>` - changed files compile.
+- `./venv/bin/python -m py_compile <changed .py files>` - changed files compile.
- Focused `pytest` on the changed files (use `.venv/bin/python -m pytest`).
+- Focused `./venv/bin/python -m pytest` on the changed files.
- `pytest` on neighboring / order-sensitive groups that share import state with
+- `./venv/bin/python -m pytest` on neighboring / order-sensitive groups that
-  the changed files.
+  share import state with the changed files.
 - When replacing boilerplate, `grep` for the old pattern to confirm no stragglers.
 - When changing a helper itself, validate in a fresh worktree so stale
  `__pycache__` or import state cannot mask a regression.
@@ -3,6 +3,7 @@ import inspect
 import pytest
 from src import ai_interaction
 from src.agent_tools import model_interaction_tools
 def _source(fn) -> str:
@@ -18,7 +19,8 @@ def test_model_resolver_applies_owner_filter():
 def test_model_listing_and_image_fallback_are_owner_scoped():
-    list_body = _source(ai_interaction.do_list_models)
+    # list_models moved to agent_tools.model_interaction_tools (#3629).
    list_body = _source(model_interaction_tools.list_models)
    image_body = _source(ai_interaction.do_generate_image)
    assert "owner: Optional[str] = None" in list_body
@@ -28,12 +30,13 @@ def test_model_listing_and_image_fallback_are_owner_scoped():
    assert "_resolve_model(model_spec, owner=owner)" in image_body
 # chat_with_model, list_models and ask_teacher moved to the registry (#3629)
 # and no longer route through dispatch_ai_tool; their owner threading is covered
 # by tests/test_model_interaction_registry.py. The remaining model-ish tools
 # still dispatched here:
@pytest.mark.parametrize("tool,content", [
    ("chat_with_model", "gpt-test\nhello"),
    ("pipeline", "gpt-test | summarize this"),
    ("list_models", ""),
    ("ui_control", "switch_model gpt-test"),
    ("ask_teacher", "gpt-test\nhelp me"),
 ])
 async def test_dispatch_passes_owner_to_model_tools(monkeypatch, tool, content):
    seen = {}
@@ -42,31 +45,16 @@ async def test_dispatch_passes_owner_to_model_tools(monkeypatch, tool, content):
        seen[name] = {"content": content, "session_id": session_id, "owner": owner}
        return {"ok": True}
    monkeypatch.setattr(
        ai_interaction,
        "do_chat_with_model",
        lambda content, session_id=None, owner=None: capture("chat_with_model", content, session_id, owner),
    )
    monkeypatch.setattr(
        ai_interaction,
        "do_pipeline",
        lambda content, session_id=None, owner=None: capture("pipeline", content, session_id, owner),
    )
    monkeypatch.setattr(
        ai_interaction,
        "do_list_models",
        lambda content, session_id=None, owner=None: capture("list_models", content, session_id, owner),
    )
    monkeypatch.setattr(
        ai_interaction,
        "do_ui_control",
        lambda content, session_id=None, owner=None: capture("ui_control", content, session_id, owner),
    )
    monkeypatch.setattr(
        ai_interaction,
        "do_ask_teacher",
        lambda content, session_id=None, owner=None: capture("ask_teacher", content, session_id, owner),
    )
    _desc, result = await ai_interaction.dispatch_ai_tool(tool, content, session_id="sid1", owner="alice")
@@ -0,0 +1,78 @@
 """Regression: api_call reaches the model for API-integration intent (#3794).
 The repro prompt — "Use the api_call tool to call Home Assistant GET
 /api/states" — matched no domain in ``_classify_agent_request``, so it was
 treated as low-signal. The agent loop then skipped retrieval and the function
 schema filter sent only the always-available tools (manage_memory / ask_user /
 update_plan); ``api_call`` was never advertised to the model even though the
 ToolIndex description existed. Adding the registry description alone did not fix
 runtime selection.
 These tests drive the real path the agent uses — classifier -> domain tool map
 (relevant tools) -> FUNCTION_TOOL_SCHEMAS filter — using the actual functions and
 constants, so they would fail on the pre-fix code (empty domains -> low-signal ->
 no api_call). They skip locally when the agent's heavy deps (httpx/embeddings)
 are absent, and run in CI where they are installed.
 """
 import pytest
 agent_loop = pytest.importorskip("src.agent_loop")
 REPRO = "Use the api_call tool to call Home Assistant GET /api/states"
 def _selected_tools(domains):
    """Mirror agent_loop's deterministic domain seeding (see the loop over
    `_intent['domains']` that updates `_relevant_tools` from `_DOMAIN_TOOL_MAP`)."""
    tools = set()
    for domain in domains:
        tools |= agent_loop._DOMAIN_TOOL_MAP.get(domain, set())
    return tools
 def _schema_names_sent(tools):
    """Mirror the api-model schema filter that keeps only selected tools."""
    return {
        s.get("function", {}).get("name")
        for s in agent_loop.FUNCTION_TOOL_SCHEMAS
        if s.get("function", {}).get("name") in tools
    }
@pytest.mark.parametrize(
    "prompt",
    [
        REPRO,
        "check my home assistant lights",
        "fetch the latest unread from miniflux via the api_call tool",
        "call my gitea integration to list repos",
    ],
 )
 def test_integration_prompts_are_not_low_signal(prompt):
    intent = agent_loop._classify_agent_request([], prompt)
    assert intent["low_signal"] is False, intent
    assert "integrations" in intent["domains"], intent
 def test_repro_selects_and_sends_api_call_schema():
    intent = agent_loop._classify_agent_request([], REPRO)
    selected = _selected_tools(intent["domains"])
    assert "api_call" in selected, selected
    # The schema filter must actually advertise api_call to the model.
    assert "api_call" in _schema_names_sent(selected), "api_call schema must reach the model"
 def test_integrations_domain_has_a_rule_pack():
    # _domain_rules_for_tools indexes _DOMAIN_RULES[domain] directly, so a domain
    # present in _DOMAIN_TOOL_MAP without a _DOMAIN_RULES entry would KeyError the
    # moment api_call is selected.
    rules = agent_loop._domain_rules_for_tools({"api_call"})
    assert any("api_call" in r for r in rules), rules
 def test_plain_greeting_does_not_pull_api_call():
    # Guard against over-matching: an unrelated message stays low-signal and must
    # not drag the integration tool into context.
    intent = agent_loop._classify_agent_request([], "hey there, how are you")
    assert "integrations" not in intent["domains"], intent
    assert "api_call" not in _selected_tools(intent["domains"])
@@ -219,6 +219,9 @@ class _WebhookManager:
    async def fire(self, event, payload):
        return None
    def fire_and_forget(self, event, payload):
        return None
 def _install_sync_chat_stubs(monkeypatch):
    # FastAPI checks for python_multipart at import time when Form is used;
@@ -0,0 +1,272 @@
 """Tests for auth policy endpoint and password length validation."""
 import asyncio
 import importlib
 import sys
 import types
 from pathlib import Path
 from types import SimpleNamespace
 from unittest.mock import MagicMock
 import pytest
 from fastapi import HTTPException
 from tests.helpers.import_state import clear_module
 def _real_core_package():
    root = Path(__file__).resolve().parent.parent
    core_path = str(root / "core")
    core = sys.modules.get("core")
    if core is None:
        core = types.ModuleType("core")
        sys.modules["core"] = core
    core.__path__ = [core_path]
    clear_module("core.auth")
    return core
 def _auth_module():
    _real_core_package()
    return importlib.import_module("core.auth")
 def _make_manager(tmp_path):
    auth_mod = _auth_module()
    auth_mod._hash_password = lambda password: f"hash:{password}"
    auth_mod._verify_password = lambda password, hashed: hashed == f"hash:{password}"
    auth_path = tmp_path / "auth.json"
    mgr = auth_mod.AuthManager(str(auth_path))
    return mgr
 async def _immediate_to_thread(fn, *args, **kwargs):
    return fn(*args, **kwargs)
 # ── AuthManager.policy() ───────────────────────────────────────────────
 def test_policy_returns_password_min_length(tmp_path):
    mgr = _make_manager(tmp_path)
    policy = mgr.policy()
    assert policy["password_min_length"] == 8
 def test_policy_returns_reserved_usernames(tmp_path):
    mgr = _make_manager(tmp_path)
    policy = mgr.policy()
    assert "internal-tool" in policy["reserved_usernames"]
    assert "api" in policy["reserved_usernames"]
    assert "demo" in policy["reserved_usernames"]
    assert "system" in policy["reserved_usernames"]
    assert isinstance(policy["reserved_usernames"], list)
 def test_policy_returns_signup_enabled(tmp_path):
    mgr = _make_manager(tmp_path)
    policy = mgr.policy()
    assert policy["signup_enabled"] is False  # default
 def test_policy_returns_session_days(tmp_path):
    mgr = _make_manager(tmp_path)
    policy = mgr.policy()
    assert policy["session_days"] == 7
 # ── GET /api/auth/policy endpoint ──────────────────────────────────────
 def _policy_endpoint(auth_manager):
    sys.modules.pop("routes.auth_routes", None)
    _real_core_package()
    from routes.auth_routes import setup_auth_routes
    router = setup_auth_routes(auth_manager)
    for route in router.routes:
        if getattr(route, "path", None) == "/api/auth/policy":
            return route.endpoint
    raise AssertionError("policy route not found")
 def test_policy_endpoint_returns_dict(tmp_path):
    mgr = _make_manager(tmp_path)
    endpoint = _policy_endpoint(mgr)
    result = asyncio.run(endpoint())
    assert isinstance(result, dict)
    assert "password_min_length" in result
    assert "reserved_usernames" in result
    assert "signup_enabled" in result
    assert "session_days" in result
 def test_policy_endpoint_values_match_manager(tmp_path):
    mgr = _make_manager(tmp_path)
    endpoint = _policy_endpoint(mgr)
    result = asyncio.run(endpoint())
    assert result == mgr.policy()
 # ── Password length validation ─────────────────────────────────────────
 def _setup_endpoint(auth_manager):
    sys.modules.pop("routes.auth_routes", None)
    _real_core_package()
    from routes.auth_routes import SetupRequest, setup_auth_routes
    router = setup_auth_routes(auth_manager)
    for route in router.routes:
        if getattr(route, "path", None) == "/api/auth/setup":
            return route.endpoint, SetupRequest
    raise AssertionError("setup route not found")
 def _signup_endpoint(auth_manager):
    sys.modules.pop("routes.auth_routes", None)
    _real_core_package()
    from routes.auth_routes import SignupRequest, setup_auth_routes
    router = setup_auth_routes(auth_manager)
    for route in router.routes:
        if getattr(route, "path", None) == "/api/auth/signup":
            return route.endpoint, SignupRequest
    raise AssertionError("signup route not found")
 def _change_password_endpoint(auth_manager):
    sys.modules.pop("routes.auth_routes", None)
    _real_core_package()
    from routes.auth_routes import ChangePasswordRequest, setup_auth_routes
    router = setup_auth_routes(auth_manager)
    for route in router.routes:
        if getattr(route, "path", None) == "/api/auth/change-password":
            return route.endpoint, ChangePasswordRequest
    raise AssertionError("change-password route not found")
 def test_setup_rejects_short_password(tmp_path):
    mgr = _make_manager(tmp_path)
    endpoint, SetupRequest = _setup_endpoint(mgr)
    request = SimpleNamespace(client=SimpleNamespace(host="127.0.0.1"))
    body = SetupRequest(username="admin", password="short")
    with pytest.raises(HTTPException) as exc:
        asyncio.run(endpoint(body=body, request=request))
    assert exc.value.status_code == 400
    assert "8 characters" in exc.value.detail
 def test_signup_rejects_short_password(tmp_path):
    mgr = _make_manager(tmp_path)
    mgr.create_user("admin", "admin-password", is_admin=True)
    mgr.signup_enabled = True
    endpoint, SignupRequest = _signup_endpoint(mgr)
    request = SimpleNamespace(client=SimpleNamespace(host="127.0.0.1"))
    body = SignupRequest(username="newuser", password="short")
    with pytest.raises(HTTPException) as exc:
        asyncio.run(endpoint(body=body, request=request))
    assert exc.value.status_code == 400
    assert "8 characters" in exc.value.detail
 def test_change_password_rejects_short_password(tmp_path):
    mgr = _make_manager(tmp_path)
    mgr.create_user("alice", "old-password", is_admin=False)
    endpoint, ChangePasswordRequest = _change_password_endpoint(mgr)
    request = SimpleNamespace(
        cookies={"odysseus_session": "current-token"},
        client=SimpleNamespace(host="127.0.0.1"),
    )
    # Mock get_username_for_token to return alice
    mgr.get_username_for_token = MagicMock(return_value="alice")
    body = ChangePasswordRequest(current_password="old-password", new_password="short")
    with pytest.raises(HTTPException) as exc:
        asyncio.run(endpoint(body=body, request=request))
    assert exc.value.status_code == 400
    assert "8 characters" in exc.value.detail
 def test_setup_accepts_exactly_min_length_password(tmp_path):
    mgr = _make_manager(tmp_path)
    endpoint, SetupRequest = _setup_endpoint(mgr)
    request = SimpleNamespace(client=SimpleNamespace(host="127.0.0.1"))
    body = SetupRequest(username="admin", password="12345678")
    result = asyncio.run(endpoint(body=body, request=request))
    assert result == {"ok": True, "message": "Admin account created"}
 def test_setup_rejects_seven_char_password(tmp_path):
    mgr = _make_manager(tmp_path)
    endpoint, SetupRequest = _setup_endpoint(mgr)
    request = SimpleNamespace(client=SimpleNamespace(host="127.0.0.1"))
    body = SetupRequest(username="admin", password="1234567")
    with pytest.raises(HTTPException) as exc:
        asyncio.run(endpoint(body=body, request=request))
    assert exc.value.status_code == 400
 # ── Login "remember me" cookie lifetime ────────────────────────────────
 class _CapturingResponse:
    """Stand-in for fastapi.Response that records set_cookie kwargs."""
    def __init__(self):
        self.cookie_kwargs = None
    def set_cookie(self, **kwargs):
        self.cookie_kwargs = kwargs
 def _login_endpoint(auth_manager):
    sys.modules.pop("routes.auth_routes", None)
    _real_core_package()
    from routes.auth_routes import LoginRequest, setup_auth_routes
    router = setup_auth_routes(auth_manager)
    for route in router.routes:
        if getattr(route, "path", None) == "/api/auth/login":
            return route.endpoint, LoginRequest
    raise AssertionError("login route not found")
 def test_remember_cookie_max_age_matches_token_ttl(tmp_path):
    auth_mod = _auth_module()
    mgr = _make_manager(tmp_path)
    mgr.create_user("alice", "alice-password", is_admin=False)
    endpoint, LoginRequest = _login_endpoint(mgr)
    request = SimpleNamespace(client=SimpleNamespace(host="127.0.0.1"))
    response = _CapturingResponse()
    body = LoginRequest(username="alice", password="alice-password", remember=True)
    result = asyncio.run(endpoint(body=body, request=request, response=response))
    assert result == {"ok": True, "username": "alice"}
    # The persistent cookie must outlive neither more nor less than the token.
    assert response.cookie_kwargs["max_age"] == auth_mod.TOKEN_TTL
 def test_no_remember_omits_cookie_max_age(tmp_path):
    mgr = _make_manager(tmp_path)
    mgr.create_user("bob", "bob-password", is_admin=False)
    endpoint, LoginRequest = _login_endpoint(mgr)
    request = SimpleNamespace(client=SimpleNamespace(host="127.0.0.1"))
    response = _CapturingResponse()
    body = LoginRequest(username="bob", password="bob-password", remember=False)
    asyncio.run(endpoint(body=body, request=request, response=response))
    # Without "remember", the cookie is a session cookie (no max_age).
    assert "max_age" not in response.cookie_kwargs
@@ -80,6 +80,16 @@ def test_password_change_allows_new_password_and_blocks_old_password(tmp_path):
    assert mgr.create_session("alice", "new-password") is not None
 def test_create_session_trusted_rejects_username_renamed_after_verification(tmp_path):
    mgr = _make_manager(tmp_path)
    assert mgr.create_user("admin", "admin-password", is_admin=True)
    assert mgr.verify_password("alice", "old-password") is True
    assert mgr.rename_user("alice", "alice2", "admin") is True
    assert mgr.create_session_trusted("alice") is None
 def _change_password_endpoint(auth_manager):
    sys.modules.pop("routes.auth_routes", None)
    _real_core_package()
@@ -92,6 +102,39 @@ def _change_password_endpoint(auth_manager):
    raise AssertionError("change-password route not found")
 def _login_endpoint(auth_manager):
    sys.modules.pop("routes.auth_routes", None)
    _real_core_package()
    from routes.auth_routes import LoginRequest, setup_auth_routes
    router = setup_auth_routes(auth_manager)
    for route in router.routes:
        if getattr(route, "path", None) == "/api/auth/login":
            return route.endpoint, LoginRequest
    raise AssertionError("login route not found")
 def test_login_route_does_not_set_cookie_when_trusted_session_rejects_stale_user(monkeypatch):
    auth = MagicMock()
    auth.verify_password.return_value = True
    auth.totp_enabled.return_value = False
    auth.create_session_trusted.return_value = None
    endpoint, LoginRequest = _login_endpoint(auth)
    monkeypatch.setattr(
        "routes.auth_routes.asyncio.to_thread",
        lambda fn, *args, **kwargs: _immediate_to_thread(fn, *args, **kwargs),
    )
    request = SimpleNamespace(client=SimpleNamespace(host="127.0.0.1"))
    response = MagicMock()
    body = LoginRequest(username="alice", password="old-password")
    with pytest.raises(HTTPException) as exc:
        asyncio.run(endpoint(body=body, request=request, response=response))
    assert exc.value.status_code == 401
    response.set_cookie.assert_not_called()
 def test_change_password_route_revokes_other_sessions_after_success(monkeypatch):
    auth = MagicMock()
    auth.get_username_for_token.return_value = "alice"
@@ -0,0 +1,174 @@
 """Tests for bg_jobs.kill and the manage_bg_jobs agent tool.
 Process-free: the store/dir are redirected to tmp, _pid_alive is forced True so
 seeded "running" jobs stay running through refresh(), and _kill is stubbed so no
 real signal is sent. Jobs are scoped to a chat (session_id), which is the main
 invariant under test.
 """
 import asyncio
 import json
 import time
 import pytest
 from src import bg_jobs
 from src.agent_tools.bg_job_tools import ManageBgJobsTool
@pytest.fixture
 def store(tmp_path, monkeypatch):
    jobs_dir = tmp_path / "bg_jobs"
    jobs_dir.mkdir()
    monkeypatch.setattr(bg_jobs, "_STORE", tmp_path / "bg_jobs.json")
    monkeypatch.setattr(bg_jobs, "_JOBS_DIR", jobs_dir)
    monkeypatch.setattr(bg_jobs, "_pid_alive", lambda pid: True)
    killed: list = []
    monkeypatch.setattr(bg_jobs, "_kill", lambda pid: killed.append(pid))
    return {"dir": jobs_dir, "killed": killed}
 def _seed(session_id="sess-a", status="running", job_id="job0001", output="", pid=4321):
    rec = {
        "id": job_id, "session_id": session_id, "command": "sleep 60",
        "status": status, "pid": pid, "started_at": time.time(),
        "ended_at": None if status == "running" else time.time(),
        "exit_code": None if status == "running" else 0,
        "max_runtime_s": 3600, "followed_up": False,
        "log_path": str(bg_jobs._JOBS_DIR / f"{job_id}.log"),
        "exit_path": str(bg_jobs._JOBS_DIR / f"{job_id}.exit"),
    }
    if output:
        (bg_jobs._JOBS_DIR / f"{job_id}.log").write_text(output, encoding="utf-8")
    jobs = bg_jobs._load()
    jobs[job_id] = rec
    bg_jobs._save(jobs)
    return rec
 def _run(args, session_id="sess-a"):
    return asyncio.run(ManageBgJobsTool().execute(json.dumps(args), {"session_id": session_id, "owner": None}))
 # ── bg_jobs.kill ────────────────────────────────────────────────────────────
 def test_kill_marks_killed_and_suppresses_followup(store):
    _seed(job_id="job0001", pid=4321)
    rec = bg_jobs.kill("job0001")
    assert rec["status"] == "failed"
    assert rec["killed"] is True
    assert rec["exit_code"] == -1
    # followed_up True so the monitor won't ALSO auto-continue a deliberate kill.
    assert rec["followed_up"] is True
    assert store["killed"] == [4321]
 def test_kill_unknown_job_returns_none(store):
    assert bg_jobs.kill("nope") is None
 def test_kill_finished_job_is_noop(store):
    _seed(job_id="done01", status="done")
    rec = bg_jobs.kill("done01")
    assert rec["status"] == "done"
    assert store["killed"] == []  # no signal sent to an already-finished job
 def test_result_text_reports_killed(store):
    rec = _seed(job_id="job0001")
    bg_jobs.kill("job0001")
    assert "killed" in bg_jobs.result_text(bg_jobs.get("job0001")).lower()
 # ── manage_bg_jobs tool ─────────────────────────────────────────────────────
 def test_no_session_is_rejected(store):
    out = asyncio.run(ManageBgJobsTool().execute('{"action":"list"}', {"session_id": None}))
    assert "error" in out
 def test_list_empty(store):
    assert "No background jobs" in _run({"action": "list"})["output"]
 def test_list_scoped_to_session(store):
    _seed(session_id="sess-a", job_id="aaaa")
    _seed(session_id="sess-b", job_id="bbbb")
    out = _run({"action": "list"}, session_id="sess-a")["output"]
    assert "aaaa" in out and "bbbb" not in out
 def test_output_returns_captured_log(store):
    _seed(job_id="job0001", output="hello from the job\n")
    out = _run({"action": "output", "job_id": "job0001"})["output"]
    assert "hello from the job" in out
 def test_output_cross_session_denied(store):
    _seed(session_id="sess-a", job_id="job0001", output="secret")
    out = _run({"action": "output", "job_id": "job0001"}, session_id="sess-b")
    assert "error" in out and "secret" not in out.get("error", "")
 def test_kill_via_tool(store):
    _seed(job_id="job0001", pid=999)
    out = _run({"action": "kill", "job_id": "job0001"})
    assert "Killed" in out["output"]
    assert store["killed"] == [999]
    assert bg_jobs.get("job0001")["killed"] is True
 def test_kill_cross_session_denied(store):
    _seed(session_id="sess-a", job_id="job0001")
    out = _run({"action": "kill", "job_id": "job0001"}, session_id="sess-b")
    assert "error" in out
    assert store["killed"] == []  # never touched another chat's job
 def test_kill_requires_job_id(store):
    assert "error" in _run({"action": "kill"})
 def test_unknown_action(store):
    assert "error" in _run({"action": "frobnicate"})
 def test_action_aliases(store):
    _seed(job_id="job0001", output="aliased")
    # 'read' aliases to output, 'jobs' to list, 'stop' to kill
    assert "aliased" in _run({"action": "read", "job_id": "job0001"})["output"]
    assert "job0001" in _run({"action": "jobs"})["output"]
    assert "Killed" in _run({"action": "stop", "job_id": "job0001"})["output"]
 # ── intent classifier: short bg-job commands must not be dropped as low-signal ─
 # A short imperative ("kill that job") otherwise trips the low-signal gate, which
 # skips tool retrieval entirely and never surfaces manage_bg_jobs (the live bug
 # this feature hit). These lock in that bg-job control reaches the files domain.
@pytest.mark.parametrize("msg", [
    "stop the job",
    "kill that job",
    "Now kill that background job.",
    "is the job done?",
    "check the job output",
    "list my jobs",
    "kill the bg task",
 ])
 def test_bg_job_commands_are_not_low_signal(msg):
    from src.agent_loop import _classify_agent_request, _DOMAIN_TOOL_MAP
    r = _classify_agent_request([{"role": "user", "content": msg}], msg)
    assert r["low_signal"] is False
    assert "files" in r["domains"]
    # files domain seeds manage_bg_jobs, so it gets offered to the model.
    assert "manage_bg_jobs" in _DOMAIN_TOOL_MAP["files"]
@pytest.mark.parametrize("msg", [
    "run this in the background",   # launching, not managing
    "find me a job listing",        # unrelated use of "job"
 ])
 def test_non_bg_messages_do_not_trip_files_domain(msg):
    from src.agent_loop import _classify_agent_request
    r = _classify_agent_request([{"role": "user", "content": msg}], msg)
    assert "files" not in r["domains"]
@@ -1,5 +1,6 @@
 """Regression tests for owner-scoped model resolution in scheduled actions."""
 import sqlite3
 from datetime import datetime
 from types import SimpleNamespace
@@ -138,6 +139,108 @@ async def test_learn_sender_signatures_resolves_llm_for_task_owner(monkeypatch):
    assert imap_owners == ["alice"]
@pytest.mark.asyncio
 async def test_learn_sender_signatures_writes_owner_scoped_cache(monkeypatch, tmp_path):
    from routes import email_helpers
    from src import endpoint_resolver, llm_core
    from src.builtin_actions import action_learn_sender_signatures
    db_path = tmp_path / "scheduled_emails.db"
    monkeypatch.setattr(email_helpers, "SCHEDULED_DB", db_path)
    email_helpers._init_scheduled_db()
    conn = sqlite3.connect(db_path)
    try:
        conn.execute(
            """
            INSERT INTO sender_signatures
            (from_address, owner, signature_text, sample_count, last_built_at, model_used, source)
            VALUES (?, ?, ?, ?, ?, ?, ?)
            """,
            (
                "writer@example.com",
                "bob",
                "bob cached signature",
                3,
                "2999-01-01T00:00:00",
                "old-model",
                "llm",
            ),
        )
        conn.commit()
    finally:
        conn.close()
    class FakeImap:
        def select(self, *_args, **_kwargs):
            return "OK", []
        def search(self, *_args, **_kwargs):
            return "OK", [b"1 2 3"]
        def fetch(self, uid, query):
            if "HEADER.FIELDS" in query:
                return "OK", [(None, b"From: Writer <writer@example.com>\r\n\r\n")]
            return "OK", [
                (
                    None,
                    (
                        b"Thanks for the update.\r\n\r\n"
                        b"Regards,\r\n"
                        b"Writer Example\r\n"
                        b"Example Co.\r\n"
                        + str(uid).encode()
                    ),
                )
            ]
        def logout(self):
            return None
    imap_owners = []
    def fake_imap_connect(_account_id=None, owner=""):
        imap_owners.append(owner)
        return FakeImap()
    monkeypatch.setattr(email_helpers, "_imap_connect", fake_imap_connect)
    monkeypatch.setattr(
        endpoint_resolver,
        "resolve_endpoint",
        lambda kind, *args, **kwargs: ("http://llm", "alice-model", {}),
    )
    async def fake_llm_call_async(**_kwargs):
        return "Writer Example\nExample Co.\nwriter@example.com"
    monkeypatch.setattr(llm_core, "llm_call_async", fake_llm_call_async)
    message, ok = await action_learn_sender_signatures("alice")
    assert ok is True
    assert message.startswith("Learned sigs: 1 found")
    assert imap_owners == ["alice", "alice"]
    conn = sqlite3.connect(db_path)
    try:
        rows = conn.execute(
            """
            SELECT owner, signature_text, model_used
            FROM sender_signatures
            WHERE from_address = ?
            ORDER BY owner
            """,
            ("writer@example.com",),
        ).fetchall()
    finally:
        conn.close()
    assert rows == [
        ("alice", "Writer Example\nExample Co.\nwriter@example.com", "alice-model"),
        ("bob", "bob cached signature", "old-model"),
    ]
@pytest.mark.asyncio
 async def test_check_email_urgency_resolves_llm_candidates_for_task_owner(monkeypatch, tmp_path):
    from core import database
@@ -38,6 +38,16 @@ def test_unknown_public_host_gets_no_affinity_fields(monkeypatch):
    assert payload == {}
@pytest.mark.parametrize("url", [
    "https://10.example-cloud.com/v1",
    "https://172.16.example-cloud.com/v1",
    "https://192.168.example-cloud.com/v1",
 ])
 def test_private_prefix_dns_host_gets_no_affinity_fields(monkeypatch, url):
    payload = _affinity_fields(url, monkeypatch)
    assert payload == {}
 def test_localhost_server_gets_affinity_fields(monkeypatch):
    payload = _affinity_fields("http://localhost:8080/v1", monkeypatch)
    assert payload == {"session_id": "sess-123", "cache_prompt": True}
--- a/Show More
+++ b/Show More