fix(logging): avoid logging raw URLs in llm_core error paths

Drop the raw url/base_chat_url from the Ollama-detection and model-list-fetch warning logs added by this sweep, since these values can contain private hostnames, internal IPs, credentials, or other deployment details. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
log: pass exception via exc_info instead of string interpolation
2026-06-16 01:35:36 -04:00 · 2026-06-15 17:31:43 +01:00 · 2026-06-15 17:31:43 +01:00 · 2026-06-15 17:31:43 +01:00 · 2026-06-15 17:31:43 +01:00 · 2026-06-15 17:31:42 +01:00
565 changed files with 62416 additions and 8585 deletions
@@ -10,6 +10,12 @@ dist/
 build/
 .env
 .env.bak.*
+# Secrets: keep plaintext and every transient secrets.env variant out of
+# the build context. If an encrypted secrets.env is used, it is mounted
+# at runtime — never baked into the image. Mirrored in .gitignore.
+secrets.env
+secrets.env.*
+!secrets.env.example
 /data/
 /logs/
 .git/
@@ -56,6 +56,13 @@ SEARXNG_INSTANCE=http://localhost:8080
 # SQLite database path (default: sqlite:///./data/app.db)
 # DATABASE_URL=sqlite:///./data/app.db

+# ============================================================
+# Data directory
+# ============================================================
+# Move everything that lives under data/ - settings, sessions, database, auth,
+# cache, uploads, etc. - to another path:
+# ODYSSEUS_DATA_DIR=C:\path\to\dir
+
 # ============================================================
 # Auth & Security
 # ============================================================
@@ -112,6 +119,9 @@ SEARXNG_INSTANCE=http://localhost:8080
 # Default: http://{LLM_HOST}:11434/v1/embeddings (ollama)
 # EMBEDDING_URL=http://localhost:11434/v1/embeddings

+# Embedding API key (if there's one)
+# EMBEDDING_API_KEY=embedding_api_key_here
+
 # Embedding model name (must be available at the endpoint above)
 # EMBEDDING_MODEL=all-minilm:l6-v2

@@ -144,6 +154,21 @@ SEARXNG_INSTANCE=http://localhost:8080
 # if you intentionally want scheduled scripts to run remotely.
 # ODYSSEUS_SCRIPT_HOST=localhost

+# Chat / agent attachment size cap in bytes (default: 10 MB).
+# Raise this for local installs that need larger PDFs or text documents.
+# Example: 52428800 = 50 MB.
+# ODYSSEUS_CHAT_UPLOAD_MAX_BYTES=10485760
+
+# Other per-feature upload size caps in bytes. All are validated and optional;
+# defaults shown. An invalid value (non-integer or < 1) fails fast at startup.
+# ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES=104857600            # gallery image upload (100 MB)
+# ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES=26214400   # gallery transform input (25 MB)
+# ODYSSEUS_MEMORY_IMPORT_MAX_BYTES=10485760              # memory import file (10 MB)
+# ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES=26214400            # personal document upload (25 MB)
+# ODYSSEUS_EMAIL_COMPOSE_UPLOAD_MAX_BYTES=26214400       # email compose attachment (25 MB)
+# ODYSSEUS_STT_MAX_AUDIO_BYTES=26214400                  # speech-to-text audio (25 MB)
+# ODYSSEUS_ICS_MAX_BYTES=10485760                        # calendar .ics import (10 MB)
+
 # ============================================================
 # GPU support (Docker Compose)
 # ============================================================
@@ -165,3 +190,10 @@ SEARXNG_INSTANCE=http://localhost:8080
 # These overlays only expose the GPU devices. The slim Odysseus image
 # still needs CUDA/ROCm userspace via Cookbook -> Dependencies (vLLM,
 # llama-cpp-python, etc.) before models can actually serve on GPU.
+
+# ============================================================
+# Storage Paths (Docker Compose)
+# ============================================================
+
+# APP_DATA_DIR=./data
+# APP_LOGS_DIR=./logs
@@ -0,0 +1,9 @@
+# Code owners.
+#
+# Intentionally empty for now. The catch-all rule that mapped every path to a
+# single owner froze all merges the moment "Require review from Code Owners"
+# was enabled, because no other maintainer's approval could satisfy the gate.
+# A per-area ownership map (security/auth, CI, frontend, agent internals, with
+# multiple named owners per line) is being worked out in issue #593; once
+# agreed it replaces this file. Until then, required reviews and the security
+# CI gate (docs/security-ci.md) remain in force via branch protection.
@@ -23,7 +23,7 @@ body:
          required: true
        - label: This is **not** a security vulnerability. (Vulnerabilities go to [GitHub Security Advisories](https://github.com/pewdiepie-archdaemon/odysseus/security/advisories/new) — see [SECURITY.md](https://github.com/pewdiepie-archdaemon/odysseus/blob/main/SECURITY.md).)
          required: true
-        - label: I am running the latest code from `main`.
+        - label: I am running the latest code from the `dev` branch (the default branch you get on clone, where fixes land first) and the bug still reproduces there. Please `git pull` the latest `dev` before filing.
          required: true

  - type: dropdown
@@ -0,0 +1,48 @@
+# Dependabot keeps dependencies and pinned action versions current.
+#
+# Why this matters for security: every workflow in this repo pins its GitHub
+# Actions to an exact commit (a SHA), which is safe but freezes them in time.
+# Dependabot opens a small, reviewable pull request whenever a newer version
+# exists -- for Python packages, npm packages, the Docker base image, and the
+# pinned Actions themselves -- so staying patched does not require manual work.
+# Updates are grouped so a week's bumps arrive as one PR per ecosystem, not a
+# flood of separate ones.
+
+version: 2
+updates:
+  # Python dependencies (requirements.txt + requirements-optional.txt).
+  - package-ecosystem: pip
+    directory: "/"
+    schedule:
+      interval: weekly
+    open-pull-requests-limit: 5
+    groups:
+      python:
+        patterns: ["*"]
+
+  # Frontend / tooling npm packages (package.json).
+  - package-ecosystem: npm
+    directory: "/"
+    schedule:
+      interval: weekly
+    open-pull-requests-limit: 5
+    groups:
+      npm:
+        patterns: ["*"]
+
+  # The pinned action SHAs used across .github/workflows.
+  - package-ecosystem: github-actions
+    directory: "/"
+    schedule:
+      interval: weekly
+    open-pull-requests-limit: 5
+    groups:
+      actions:
+        patterns: ["*"]
+
+  # The Docker base image in the Dockerfile.
+  - package-ecosystem: docker
+    directory: "/"
+    schedule:
+      interval: weekly
+    open-pull-requests-limit: 5
@@ -0,0 +1,123 @@
+# Pull Request Review Template
+
+Use this shape as a copyable reference for substantive PR reviews; GitHub does
+not auto-apply this file to review comments. Omit sections that do not add
+useful signal. Lead with confirmed findings; keep speculative notes out of the
+public review unless they are framed as a concrete open question.
+
+## Small PR Path
+
+For narrow docs, typo, test-only, or obvious local fixes, a short review is
+enough:
+
+```md
+LGTM after checking:
+- scope:
+- validation:
+- residual risk:
+```
+
+Use the fuller structure below for larger, risky, multi-finding, or
+security-sensitive reviews.
+
+## Findings
+
+**<sub><sub>![P2 Badge](https://img.shields.io/badge/P2-yellow?style=flat)</sub></sub> issue (test): Short issue title**
+
+- **Problem:** Concrete broken flow, contract, input, or risk.
+
+- **Impact:** Why this matters to users, CI, maintainers, data, security, or scale.
+
+- **Ask:** Smallest practical correction or decision the author should make.
+
+- **Location:** `path:line`
+
+## Open Questions
+
+- **question (scope, non-blocking): Short author question** Ask the concrete
+  intent, scope, or tradeoff question.
+
+## Validation
+
+- Ran:
+- Not run:
+- Residual risk:
+
+## PR Hygiene
+
+- Target/template/checks:
+- Related, duplicate, or superseding context:
+
+## No Findings Variant
+
+```md
+## Findings
+
+none confirmed
+
+## Validation
+
+- Ran:
+- Not run:
+- Residual risk:
+```
+
+## Legend
+
+- **Findings:** Verified, author-actionable issues that should be fixed or
+  consciously accepted before merge.
+- **Priority badges:** The shields.io badges below are optional formatting for
+  priority labels. Plain `P0`, `P1`, `P2`, or `P3` text is also acceptable when
+  an external image dependency is undesirable or may not render.
+  - **P0:** `![P0 Badge](https://img.shields.io/badge/P0-red?style=flat)` -
+    release-blocking or actively dangerous.
+  - **P1:** `![P1 Badge](https://img.shields.io/badge/P1-orange?style=flat)` -
+    serious bug, security risk, data-loss risk, or broken primary flow.
+  - **P2:** `![P2 Badge](https://img.shields.io/badge/P2-yellow?style=flat)` -
+    meaningful correctness, test, maintainability, or edge-case issue.
+  - **P3:** `![P3 Badge](https://img.shields.io/badge/P3-lightgrey?style=flat)` -
+    minor polish or low-risk cleanup.
+- **Intent labels:**
+  - **`issue`:** A confirmed defect, regression, broken contract, or concrete
+    risk.
+  - **`suggestion`:** A non-blocking improvement that would make the PR clearer,
+    safer, or easier to maintain.
+  - **`nit`:** A tiny, non-blocking cleanup or style note. Use it only when the
+    author can safely ignore it without changing the review outcome.
+  - **`question`:** A real author-facing clarification about intent, scope, or
+    tradeoffs. Do not use questions to hide an issue that should be stated
+    directly.
+  - **`LGTM`:** "Looks good to me." Use only when the review found no blocking
+    issues, or when any remaining notes are clearly optional.
+- **Decorations:** Optional labels in parentheses that clarify the finding type,
+  scope, or merge impact.
+  - **`security`:** Auth, authorization, ownership, secrets, SSRF, injection,
+    unsafe external input, or other trust-boundary concerns.
+  - **`test`:** Missing, failing, misleading, brittle, or insufficient tests.
+  - **`scope`:** PR scope, feature boundaries, unrelated churn, or work that
+    should be split into a separate issue or PR.
+  - **`ci`:** CI configuration, workflow failures, flaky checks, or validation
+    signal quality.
+  - **`api`:** Route, request/response, public function, schema, persistence, or
+    integration contract changes.
+  - **`docs`:** User-facing docs, contributor docs, examples, or comments that
+    need to change with the code.
+  - **`non-blocking`:** Useful feedback that should not prevent merge by
+    itself.
+- **Finding fields:**
+  - **Problem:** What is wrong, what contract is ambiguous, or what risk the PR
+    introduces.
+  - **Impact:** Why the problem matters in practical terms.
+  - **Ask:** The smallest concrete fix, test, or decision requested from the PR
+    author.
+  - **Location:** The most useful repo-relative file and line reference for the
+    finding, using `path:line`.
+- **Optional sections:**
+  - **Open Questions:** Genuine scope or intent questions; omit when there are
+    no real questions.
+  - **Validation:** What the reviewer ran, what was intentionally not run, and
+    what risk remains after review.
+  - **PR Hygiene:** Target-branch, template, CI/check, duplicate, related-work,
+    or superseding-PR notes.
+- **`none confirmed`:** Use only when no review-worthy findings were confirmed;
+  still list validation gaps or residual risk when relevant.
@@ -103,14 +103,21 @@ module.exports = async ({ github, context, core }) => {

  async function swapLabel(num, add, remove) {
    if (await labelExists(add)) {
-      await github.rest.issues.addLabels({ owner, repo, issue_number: num, labels: [add] });
+      try {
+        await github.rest.issues.addLabels({ owner, repo, issue_number: num, labels: [add] });
+      } catch (e) {
+        // Fail soft on a token that can't write labels so a label permission
+        // problem never masks the actual description verdict.
+        if (e.status !== 403) throw e;
+        core.warning(`Could not add "${add}" — token lacks label write here; skipping.`);
+      }
    } else {
      core.warning(`Label "${add}" does not exist in the repo — skipping. Create it once to enable labelling.`);
    }
    try {
      await github.rest.issues.removeLabel({ owner, repo, issue_number: num, name: remove });
    } catch (e) {
-      if (e.status !== 404 && e.status !== 410) throw e;
+      if (e.status !== 404 && e.status !== 410 && e.status !== 403) throw e;
    }
  }

@@ -19,8 +19,10 @@ jobs:
    name: Python syntax (compileall)
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
-      - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065  # v5
+      - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
+        with:
+          persist-credentials: false
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
        with:
          python-version: "3.11"
      # Byte-compile sources — catches syntax errors without installing deps.
@@ -30,8 +32,10 @@ jobs:
    name: JS syntax (node --check)
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
-      - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020  # v4
+      - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
+        with:
+          persist-credentials: false
+      - uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e  # v6.4.0
        with:
          node-version: "20"
      # Syntax-check our own JS (skip vendored libs in static/lib).
@@ -50,11 +54,41 @@ jobs:
    # ROADMAP "fresh install smoke tests" item; make this required once green.
    continue-on-error: true
    steps:
-      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5  # v4
-      - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065  # v5
+      - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
+        with:
+          fetch-depth: 0
+          persist-credentials: false
+
+      # Detect whether this PR only touches documentation files.
+      # If so, skip the expensive pytest run while still reporting a passing check.
+      - name: Check for docs-only changes
+        id: docs-check
+        run: |
+          if [ "${{ github.event_name }}" = "pull_request" ]; then
+            BASE="${{ github.event.pull_request.base.sha }}"
+            HEAD="${{ github.event.pull_request.head.sha }}"
+          else
+            BASE="${{ github.event.before }}"
+            HEAD="${{ github.sha }}"
+          fi
+          # List all changed files; if every file matches docs/markdown patterns, skip pytest.
+          changed=$(git diff --name-only "$BASE" "$HEAD" 2>/dev/null || git diff --name-only HEAD~1 HEAD)
+          non_docs=$(echo "$changed" | grep -Ev '^(docs/|.*\.md$|\.github/[^/]+\.md$)' || true)
+          if [ -z "$non_docs" ]; then
+            echo "docs_only=true" >> "$GITHUB_OUTPUT"
+            echo "Docs-only change detected — skipping pytest."
+          else
+            echo "docs_only=false" >> "$GITHUB_OUTPUT"
+          fi
+
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
+        if: steps.docs-check.outputs.docs_only != 'true'
        with:
          python-version: "3.11"
          cache: pip
      - run: pip install -r requirements.txt
+        if: steps.docs-check.outputs.docs_only != 'true'
      - run: mkdir -p data  # sqlite DB lives at ./data/app.db
+        if: steps.docs-check.outputs.docs_only != 'true'
      - run: python -m pytest -q
+        if: steps.docs-check.outputs.docs_only != 'true'
@@ -0,0 +1,52 @@
+# Container security: Dockerfile lint
+#
+# Purpose: the Docker image is how most people run Odysseus, so it is part of
+# the attack surface. hadolint lints the Dockerfile for mistakes and insecure
+# patterns (running as root longer than needed, unpinned base image, bad apt
+# usage). Blocking.
+#
+# The image vulnerability scan (Trivy, advisory) lives in its own file,
+# container-trivy.yml. Keeping it separate lets that advisory scan be
+# path-filtered and held to a read-only token on pull requests without
+# weakening this blocking gate, which must always report so a required check
+# never hangs.
+#
+# Note: a separate open PR (#120) proposes a local `scripts/scan_image.py`.
+# This job is complementary -- it is a CI gate, not a script a contributor has
+# to remember to run.
+
+name: Container scan
+
+on:
+  pull_request:
+  push:
+    branches: [main]
+  workflow_dispatch:
+
+permissions: {}
+
+concurrency:
+  group: container-scan-${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  hadolint:
+    name: hadolint (Dockerfile lint)
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
+        with:
+          persist-credentials: false
+
+      - name: Lint Dockerfile
+        uses: hadolint/hadolint-action@2332a7b74a6de0dda2e2221d575162eba76ba5e5  # v3.3.0
+        with:
+          dockerfile: Dockerfile
+          # DL3008: pinning apt package versions is impractical on a -slim base
+          # image. Debian purges old package versions from its repos, so a
+          # pinned version breaks future rebuilds. The base image itself is
+          # what should be pinned (tracked by Dependabot's docker ecosystem).
+          ignore: DL3008
@@ -0,0 +1,125 @@
+# Container image vulnerability scan (advisory)
+#
+# Trivy builds the application image and scans it for known-vulnerable OS and
+# Python packages. Advisory only -- it reports findings to the repo's Security
+# tab without blocking a merge, because the image inevitably contains
+# already-known CVEs in upstream packages that are not this project's bug.
+#
+# Split from the Dockerfile lint (container-scan.yml) for two reasons:
+#
+#   - Least privilege. The image build runs Dockerfile instructions, which on a
+#     pull request are attacker-influenceable. That path (the `scan` job) is
+#     held to a read-only token and never publishes results. Only `publish`,
+#     which runs on push to main (curated, fast-forwarded from reviewed dev),
+#     gets security-events:write to upload SARIF.
+#   - Cost. Docs-only changes do not rebuild the image (paths-ignore below),
+#     matching docker-publish.yml. hadolint stays on the broad trigger in
+#     container-scan.yml so the blocking gate always reports.
+
+name: Container scan (Trivy)
+
+on:
+  pull_request:
+    paths-ignore:
+      - '**.md'
+      - 'docs/**'
+      - '.github/ISSUE_TEMPLATE/**'
+  push:
+    branches: [main]
+    paths-ignore:
+      - '**.md'
+      - 'docs/**'
+      - '.github/ISSUE_TEMPLATE/**'
+  workflow_dispatch:
+
+permissions: {}
+
+concurrency:
+  group: container-trivy-${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  # Pull requests and manual runs: build and scan under a read-only token.
+  # The build executes PR-supplied Dockerfile instructions, so this job must
+  # not hold any write scope, and it does not upload to the Security tab.
+  scan:
+    name: Trivy (image scan, advisory)
+    if: github.event_name != 'push'
+    runs-on: ubuntu-latest
+    # Advisory: a CVE in an upstream package must not block a PR.
+    continue-on-error: true
+    permissions:
+      contents: read
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
+        with:
+          persist-credentials: false
+
+      - name: Set up Buildx
+        uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5  # v4.1.0
+
+      # Build without pushing so a broken Dockerfile is caught here, and the
+      # exact image we ship is what gets scanned.
+      - name: Build image
+        uses: docker/build-push-action@f9f3042f7e2789586610d6e8b85c8f03e5195baf  # v7.2.0
+        with:
+          context: .
+          push: false
+          load: true
+          tags: odysseus:ci
+
+      - name: Scan image with Trivy
+        uses: aquasecurity/trivy-action@ed142fd0673e97e23eac54620cfb913e5ce36c25  # v0.36.0
+        with:
+          image-ref: odysseus:ci
+          format: table
+          ignore-unfixed: true
+        env:
+          # Pin the vuln DB source to GHCR to avoid rate-limited Docker Hub
+          # mirrors that flake on shared runners.
+          TRIVY_DB_REPOSITORY: ghcr.io/aquasecurity/trivy-db:2
+
+  # Push to main only: build, scan, and publish SARIF to the Security tab.
+  # This is the only path that runs trusted code, so it is the only one granted
+  # security-events:write.
+  publish:
+    name: Trivy (image scan + SARIF upload)
+    if: github.event_name == 'push'
+    runs-on: ubuntu-latest
+    continue-on-error: true
+    permissions:
+      contents: read
+      security-events: write  # upload SARIF to the Security tab
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
+        with:
+          persist-credentials: false
+
+      - name: Set up Buildx
+        uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5  # v4.1.0
+
+      - name: Build image
+        uses: docker/build-push-action@f9f3042f7e2789586610d6e8b85c8f03e5195baf  # v7.2.0
+        with:
+          context: .
+          push: false
+          load: true
+          tags: odysseus:ci
+
+      - name: Scan image with Trivy
+        uses: aquasecurity/trivy-action@ed142fd0673e97e23eac54620cfb913e5ce36c25  # v0.36.0
+        with:
+          image-ref: odysseus:ci
+          format: sarif
+          output: trivy-results.sarif
+          ignore-unfixed: true
+        env:
+          TRIVY_DB_REPOSITORY: ghcr.io/aquasecurity/trivy-db:2
+
+      - name: Upload Trivy results
+        uses: github/codeql-action/upload-sarif@8aad20d150bbac5944a9f9d289da16a4b0d87c1e  # v4.36.2
+        with:
+          sarif_file: trivy-results.sarif
+          category: trivy-image
@@ -0,0 +1,71 @@
+# Supply-chain review
+#
+# Purpose: defend against "side-chain" / supply-chain attacks -- a pull request
+# that adds (or bumps) a dependency to a version with a known vulnerability or a
+# disallowed license. Two layers:
+#
+#   - dependency-review: runs ONLY on pull requests. It compares the
+#     dependencies before and after the PR and blocks the merge if the change
+#     pulls in a package with a known security advisory. This is the gate.
+#   - pip-audit: scans the project's current Python requirements against the
+#     advisory database. Advisory only (it never blocks a merge), because it can
+#     flag a pre-existing issue in an already-shipped dependency.
+
+name: Dependency review
+
+on:
+  pull_request:
+  push:
+    branches: [main]
+  workflow_dispatch:
+
+# Default-deny token; jobs grant only read access.
+permissions: {}
+
+concurrency:
+  group: dependency-review-${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  dependency-review:
+    name: dependency-review (PR gate)
+    # Only meaningful on a pull request -- it needs a base..head diff to review.
+    if: github.event_name == 'pull_request'
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
+        with:
+          persist-credentials: false
+
+      - name: Review dependency changes
+        uses: actions/dependency-review-action@a1d282b36b6f3519aa1f3fc636f609c47dddb294  # v5.0.0
+        with:
+          # Fail the PR on any newly introduced moderate-or-worse advisory.
+          fail-on-severity: moderate
+
+  pip-audit:
+    name: pip-audit (advisory)
+    runs-on: ubuntu-latest
+    # Advisory: report known-vulnerable Python deps without blocking the merge.
+    continue-on-error: true
+    permissions:
+      contents: read
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
+        with:
+          persist-credentials: false
+
+      - name: Set up Python
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
+        with:
+          python-version: '3.12'
+
+      - name: Run pip-audit on requirements
+        run: |
+          set -euo pipefail
+          pip install pip-audit==2.10.0
+          pip-audit -r requirements.txt -r requirements-optional.txt --strict
@@ -0,0 +1,140 @@
+name: ci / docker publish
+
+# Build the Odysseus image and publish to GHCR.
+#   push to main -> :latest, :X.Y.Z            (curated release; main is fast-forwarded at releases)
+#   push to dev  -> :dev,    :X.Y.Z-dev.<sha>  (rolling dev + an immutable, traceable pin)
+# Multi-arch (linux/amd64 + linux/arm64): each arch builds on its own native
+# runner and pushes by digest, then a merge job stitches the digests into one
+# manifest list and applies the tags (faster + cleaner than QEMU emulation).
+# Registry: ghcr.io/<owner>/<repo>.
+
+on:
+  push:
+    branches: [dev, main]
+    paths-ignore:
+      - '**.md'
+      - 'docs/**'
+      - '.github/ISSUE_TEMPLATE/**'
+
+concurrency:
+  group: docker-publish-${{ github.ref }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+env:
+  REGISTRY: ghcr.io
+  IMAGE_NAME: ${{ github.repository }}
+
+jobs:
+  build:
+    name: build (${{ matrix.arch }})
+    runs-on: ${{ matrix.runner }}
+    permissions:
+      contents: read
+      packages: write
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - platform: linux/amd64
+            arch: amd64
+            runner: ubuntu-latest
+          - platform: linux/arm64
+            arch: arm64
+            runner: ubuntu-24.04-arm
+    steps:
+      - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
+        with:
+          persist-credentials: false
+      - name: Set up Buildx
+        uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5  # v4.1.0
+      - name: Log in to GHCR
+        uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee  # v4.2.0
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Build and push by digest
+        id: build
+        uses: docker/build-push-action@f9f3042f7e2789586610d6e8b85c8f03e5195baf  # v7.2.0
+        with:
+          context: .
+          platforms: ${{ matrix.platform }}
+          outputs: type=image,name=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true
+          cache-from: type=gha,scope=${{ matrix.arch }}
+          cache-to: type=gha,mode=max,scope=${{ matrix.arch }}
+      - name: Export digest
+        run: |
+          mkdir -p /tmp/digests
+          digest="${{ steps.build.outputs.digest }}"
+          touch "/tmp/digests/${digest#sha256:}"
+      - name: Upload digest
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a  # v7.0.1
+        with:
+          name: digest-${{ matrix.arch }}
+          path: /tmp/digests/*
+          if-no-files-found: error
+          retention-days: 1
+
+  merge:
+    name: merge manifest + tag
+    runs-on: ubuntu-latest
+    needs: build
+    permissions:
+      contents: read
+      packages: write
+    steps:
+      - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
+        with:
+          persist-credentials: false
+      - name: Read APP_VERSION + short sha
+        id: ver
+        run: |
+          v=$(grep -E '^APP_VERSION' src/constants.py | head -1 | sed -E 's/.*"([^"]+)".*/\1/')
+          [ -n "$v" ] || { echo "APP_VERSION not found"; exit 1; }
+          echo "version=$v" >> "$GITHUB_OUTPUT"
+          echo "short=${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT"
+      - name: Download digests
+        uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c  # v8.0.1
+        with:
+          path: /tmp/digests
+          pattern: digest-*
+          merge-multiple: true
+      - name: Set up Buildx
+        uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5  # v4.1.0
+      - name: Log in to GHCR
+        uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee  # v4.2.0
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Compute tags
+        id: meta
+        uses: docker/metadata-action@80c7e94dd9b9319bd5eb7a0e0fe9291e23a2a2e9  # v6.1.0
+        with:
+          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+          tags: |
+            type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
+            type=raw,value=${{ steps.ver.outputs.version }},enable=${{ github.ref == 'refs/heads/main' }}
+            type=raw,value=dev,enable=${{ github.ref == 'refs/heads/dev' }}
+            type=raw,value=${{ steps.ver.outputs.version }}-dev.${{ steps.ver.outputs.short }},enable=${{ github.ref == 'refs/heads/dev' }}
+      - name: Create manifest list + push tags
+        working-directory: /tmp/digests
+        run: |
+          tags=$(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON")
+          digests=$(printf "${REGISTRY}/${IMAGE_NAME}@sha256:%s " *)
+          # word-splitting is intended: $tags and $digests each expand to multiple args
+          # shellcheck disable=SC2086
+          docker buildx imagetools create $tags $digests
+        env:
+          REGISTRY: ${{ env.REGISTRY }}
+          IMAGE_NAME: ${{ env.IMAGE_NAME }}
+      - name: Inspect
+        run: |
+          if [ "$GITHUB_REF" = "refs/heads/main" ]; then ref=latest; else ref=dev; fi
+          docker buildx imagetools inspect "${REGISTRY}/${IMAGE_NAME}:${ref}"
+        env:
+          REGISTRY: ${{ env.REGISTRY }}
+          IMAGE_NAME: ${{ env.IMAGE_NAME }}
@@ -14,10 +14,11 @@ jobs:
    # Skip bots (Dependabot, release-drafter, etc.)
    if: ${{ github.event.issue.user.type != 'Bot' }}
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
        with:
          sparse-checkout: .github/scripts
+          persist-credentials: false

-      - uses: actions/github-script@v7
+      - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3  # v9.0.0
        with:
          script: return require('./.github/scripts/check-issue-description.js')({github, context, core})
@@ -1,28 +1,109 @@
-name: ci / PR description check
+name: ci / PR checks

 on:
-  pull_request_target:
-    types: [opened, edited, synchronize, reopened]
+  # pull_request_target runs in the base-repo context (has secrets) so the check
+  # works on fork PRs. Safe here: the checkout pins to the base branch (no fork
+  # code runs) and the scripts only read context.payload and call the GitHub API.
+  pull_request_target:  # zizmor: ignore[dangerous-triggers]
+    types: [opened, edited, synchronize, reopened, ready_for_review]

-# pull_request_target runs in the base-repo context (has secrets).
-# The checkout below pins to the base branch so no fork code is executed.
-# The script only reads context.payload and calls the GitHub API.
-permissions:
-  issues: write
-  pull-requests: write
+# Default-deny at the workflow level; each job opts into only the scopes it needs.
+# Note: modifying a PR's labels/comments needs pull-requests:write even though the
+# REST path is under /issues/{n}/...; issues:write alone returns 403 on PRs.
+permissions: {}

 jobs:
  check-description:
    name: Check PR description
    runs-on: ubuntu-latest
-    # Skip bots — they open PRs programmatically and have their own process.
+    permissions:
+      contents: read
+      pull-requests: write
+      issues: write
+    # Skip bots: they open PRs programmatically and have their own process.
    if: github.event.pull_request.user.type != 'Bot'
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
        with:
          ref: ${{ github.base_ref }}
          sparse-checkout: .github/scripts
+          persist-credentials: false

-      - uses: actions/github-script@v7
+      - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3  # v9.0.0
        with:
          script: return require('./.github/scripts/check-pr-description.js')({github, context, core})
+
+  check-title:
+    name: Check PR title (Conventional Commits)
+    runs-on: ubuntu-latest
+    permissions: {}
+    # Skip bots: they open PRs programmatically and have their own process.
+    if: github.event.pull_request.user.type != 'Bot'
+    steps:
+      - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3  # v9.0.0
+        with:
+          script: |
+            const title = context.payload.pull_request.title || "";
+            // Conventional Commits: type(optional-scope)(optional !): summary
+            const re = /^(feat|fix|docs|style|refactor|perf|test|build|ci|chore|revert)(\([\w .\/-]+\))?!?: .+/;
+            if (!re.test(title)) {
+              core.setFailed(
+                `PR title is not in Conventional Commits format:\n  "${title}"\n\n` +
+                `Expected: type(scope): summary\n` +
+                `Example:  fix(search): handle empty query\n` +
+                `Types: feat, fix, docs, style, refactor, perf, test, build, ci, chore, revert.`
+              );
+            } else {
+              core.info(`PR title OK: ${title}`);
+            }
+
+  check-mergeable:
+    name: Flag unmergeable PRs
+    runs-on: ubuntu-latest
+    permissions:
+      pull-requests: write
+      issues: write
+    # Skip bots: they open PRs programmatically and have their own process.
+    if: github.event.pull_request.user.type != 'Bot'
+    steps:
+      - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3  # v9.0.0
+        with:
+          script: |
+            const repo = { owner: context.repo.owner, repo: context.repo.repo };
+            const number = context.payload.pull_request.number;
+            const READY = "ready for review";
+            const CONFLICT = "merge conflict";
+
+            // Ensure the conflict label exists (red). Ignore if already present.
+            try {
+              await github.rest.issues.getLabel({ ...repo, name: CONFLICT });
+            } catch {
+              await github.rest.issues.createLabel({
+                ...repo, name: CONFLICT, color: "B60205",
+                description: "Conflicts with the base branch; needs a rebase before review.",
+              }).catch(() => {});
+            }
+
+            // mergeable is computed asynchronously and is often null right after
+            // an event, so poll a few times until GitHub has resolved it.
+            let pr = null;
+            for (let i = 0; i < 5; i++) {
+              const { data } = await github.rest.pulls.get({ ...repo, pull_number: number });
+              if (data.mergeable !== null) { pr = data; break; }
+              await new Promise(r => setTimeout(r, 3000));
+            }
+            if (!pr || pr.draft) return;
+            const labels = pr.labels.map(l => l.name);
+
+            if (pr.mergeable === false) {
+              if (labels.includes(READY)) {
+                await github.rest.issues.removeLabel({ ...repo, issue_number: number, name: READY }).catch(() => {});
+              }
+              if (!labels.includes(CONFLICT)) {
+                await github.rest.issues.addLabels({ ...repo, issue_number: number, labels: [CONFLICT] });
+              }
+            } else if (pr.mergeable === true) {
+              if (labels.includes(CONFLICT)) {
+                await github.rest.issues.removeLabel({ ...repo, issue_number: number, name: CONFLICT }).catch(() => {});
+              }
+            }
@@ -0,0 +1,60 @@
+# Secret scanning
+#
+# Purpose: stop credentials (API keys, tokens, passwords, private keys) from
+# ever living in the Git history. Odysseus deliberately keeps real secrets in
+# files that are gitignored (.env, data/), but a slip in a future commit -- or a
+# malicious pull request that sneaks one in -- would otherwise go unnoticed.
+# This job reads the repository and the full commit history and fails if it
+# finds anything that looks like a secret.
+#
+# It runs the official gitleaks BINARY directly (pinned to an exact version and
+# verified against the project's published SHA-256 checksum) rather than the
+# gitleaks GitHub Action, because the Action asks for a paid license on
+# organization-owned repos. The binary is free and behaves identically.
+
+name: Secret scan
+
+on:
+  pull_request:
+  push:
+    branches: [main]
+  workflow_dispatch:
+
+# Start with zero permissions; the single job opts back in to read-only.
+permissions: {}
+
+concurrency:
+  group: secret-scan-${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  gitleaks:
+    name: gitleaks
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
+        with:
+          # Full history so a secret committed in an earlier commit (and later
+          # deleted) is still caught -- deletion does not remove it from Git.
+          fetch-depth: 0
+          persist-credentials: false
+
+      # Pinned version + checksum so a tampered release binary cannot run here.
+      # Bump VERSION/SHA256 together; the checksum comes from the matching
+      # gitleaks_<version>_checksums.txt on the GitHub release.
+      - name: Run gitleaks (pinned, checksum-verified)
+        env:
+          GITLEAKS_VERSION: 8.30.1
+          GITLEAKS_SHA256: 551f6fc83ea457d62a0d98237cbad105af8d557003051f41f3e7ca7b3f2470eb
+        run: |
+          set -euo pipefail
+          TARBALL="gitleaks_${GITLEAKS_VERSION}_linux_x64.tar.gz"
+          curl -fsSL -o "${TARBALL}" \
+            "https://github.com/gitleaks/gitleaks/releases/download/v${GITLEAKS_VERSION}/${TARBALL}"
+          echo "${GITLEAKS_SHA256}  ${TARBALL}" | sha256sum -c -
+          tar -xzf "${TARBALL}" gitleaks
+          # Scan the whole history. Findings print to the log and fail the job.
+          ./gitleaks git --no-banner --redact --verbose .
@@ -0,0 +1,80 @@
+# Workflow security (CI that audits the CI)
+#
+# Purpose: the GitHub Actions workflows themselves are an attack surface. A
+# poorly written workflow can leak the repository token, run attacker-supplied
+# code from a pull request, or pull in a tampered third-party action. These two
+# tools check every workflow file in this repo for those mistakes:
+#
+#   - actionlint: catches workflow syntax errors and shell-script bugs inside
+#     `run:` steps before they reach main.
+#   - zizmor: a security linter for Actions. Flags template-injection holes,
+#     unpinned actions, credential persistence, and over-broad token
+#     permissions -- exactly the patterns the rest of this CI is built to avoid.
+#
+# Add this early: it then audits every workflow added after it.
+
+name: Workflow security
+
+on:
+  pull_request:
+  push:
+    branches: [main]
+  workflow_dispatch:
+
+# Default-deny token; each job grants only read access to the code.
+permissions: {}
+
+concurrency:
+  group: workflow-security-${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  actionlint:
+    name: actionlint
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
+        with:
+          persist-credentials: false
+
+      # Pinned version + checksum so a tampered binary cannot run here.
+      - name: Run actionlint (pinned, checksum-verified)
+        env:
+          ACTIONLINT_VERSION: 1.7.12
+          ACTIONLINT_SHA256: 8aca8db96f1b94770f1b0d72b6dddcb1ebb8123cb3712530b08cc387b349a3d8
+        run: |
+          set -euo pipefail
+          TARBALL="actionlint_${ACTIONLINT_VERSION}_linux_amd64.tar.gz"
+          curl -fsSL -o "${TARBALL}" \
+            "https://github.com/rhysd/actionlint/releases/download/v${ACTIONLINT_VERSION}/${TARBALL}"
+          echo "${ACTIONLINT_SHA256}  ${TARBALL}" | sha256sum -c -
+          tar -xzf "${TARBALL}" actionlint
+          ./actionlint -color
+
+  zizmor:
+    name: zizmor (Actions SAST)
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10  # v6.0.3
+        with:
+          persist-credentials: false
+
+      - name: Set up Python
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405  # v6.2.0
+        with:
+          python-version: '3.12'
+
+      # Pinned zizmor release. --offline keeps the audit hermetic (no network
+      # calls about the actions it inspects); --min-severity=low surfaces
+      # everything so nothing slips through under the gate.
+      - name: Run zizmor
+        run: |
+          set -euo pipefail
+          pip install zizmor==1.25.2
+          zizmor --offline --min-severity=low .github/workflows/
@@ -14,6 +14,15 @@ venv/
 .env
 .env.bak.*
 !.env.example
+# Local uv lockfile (optional, per-platform — see "Faster installs with uv" in README)
+requirements.lock
+
+# SOPS workflow — encrypted `secrets.env` is intentionally committable,
+# but every variant (plaintext, manual decrypt copy, editor backup)
+# must stay out of git. Mirrored in .dockerignore so the same artifacts
+# also cannot enter image build layers.
+secrets.env.*
+!secrets.env.example

 # Data — all user data stays local
 data/
@@ -61,6 +70,9 @@ output.txt.txt
 *.tiff
 *.pdf

+# …except shipped static assets
+!static/icons/*.png
+
 # …except shipped demo assets in docs/ that the README links to.
 !docs/*.jpg
 !docs/*.jpeg
@@ -89,3 +101,4 @@ docs/windows-port/
 compound.config.json
 *.error.log
 _scratch/
+/odysseus/
@@ -94,6 +94,18 @@ Before submitting any change that affects what the app looks like — buttons, i

 If you are unsure whether a change is "visual," it is. Default to attaching a screenshot.

+## Code conventions
+
+Don't hardcode values that the project already exposes through a constant or a helper. Hardcoded literals drift out of sync, break on non-default deployments, and reintroduce bugs we've already fixed.
+
+- **Filesystem paths:** never build writable paths from `Path(__file__)...` into the source tree, hardcode `/app/...`, or use a relative `"data/..."` string. Every persisted file and directory has a named constant in `src/constants.py` (for example `AUTH_FILE`, `USER_PREFS_FILE`, `SETTINGS_FILE`, `TTS_CACHE_DIR`, `CHROMA_DIR`). Import and use that named constant; do not re-derive the path locally with `os.path.join(DATA_DIR, "x.json")` or `DATA_DIR / "x.json"`. `DATA_DIR` is the single place that reads `ODYSSEUS_DATA_DIR`, so use it directly only for dynamic paths that have no fixed name (for example per-owner files). If a data file or directory has no constant yet, add one to `src/constants.py`. The source tree is read-only in Docker and `/app/...` does not exist on native runs; guard directory creation so an unwritable path degrades gracefully instead of crashing at import.
+- **Internal API / loopback URLs:** don't hardcode `http://localhost:7000`. Use `internal_api_base()` from `src.constants` (it honors `ODYSSEUS_INTERNAL_BASE` / `APP_PORT`).
+- **Ports, limits, model lists, and similar:** reuse the existing constant if one exists; if it doesn't and the value is used in more than one place, add a constant rather than copying the literal.
+
+If you need a value that has no constant or helper yet, add it to `src/constants.py` (the single source of truth for paths and config; `core/constants.py` only re-exports it for backward compatibility) and import it, rather than repeating a literal across files.
+
+**Commits:** use [Conventional Commits](https://www.conventionalcommits.org), `type(scope): summary` (e.g. `fix(search): ...`, `feat(notes): ...`, `docs(contributing): ...`). Common types: `fix`, `feat`, `refactor`, `docs`, `test`, `chore`, `ci`. Keep the subject short and imperative; put the "why" in the body when it isn't obvious.
+
 ## Issue Reports

 For bugs, include:
@@ -1,4 +1,4 @@
-FROM python:3.12-slim
+FROM python:3.14-slim

 # System deps. tmux is required by Cookbook for background downloads/serves.
 # openssh-client is required for Cookbook remote server tests, setup, probes,
@@ -1,21 +1,235 @@
-MIT License
+GNU AFFERO GENERAL PUBLIC LICENSE
+Version 3, 19 November 2007

-Copyright (c) 2025 Odysseus Contributors
+Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>

-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
+Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed.

-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
+                            Preamble

-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+The GNU Affero General Public License is a free, copyleft license for software and other kinds of works, specifically designed to ensure cooperation with the community in the case of network server software.
+
+The licenses for most software and other practical works are designed to take away your freedom to share and change the works.  By contrast, our General Public Licenses are intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users.
+
+When we speak of free software, we are referring to freedom, not price.  Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things.
+
+Developers that use our General Public Licenses protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License which gives you legal permission to copy, distribute and/or modify the software.
+
+A secondary benefit of defending all users' freedom is that improvements made in alternate versions of the program, if they receive widespread use, become available for other developers to incorporate.  Many developers of free software are heartened and encouraged by the resulting cooperation.  However, in the case of software used on network servers, this result may fail to come about. The GNU General Public License permits making a modified version and letting the public access it on a server without ever releasing its source code to the public.
+
+The GNU Affero General Public License is designed specifically to ensure that, in such cases, the modified source code becomes available to the community.  It requires the operator of a network server to provide the source code of the modified version running there to the users of that server.  Therefore, public use of a modified version, on a publicly accessible server, gives the public access to the source code of the modified version.
+
+An older license, called the Affero General Public License and published by Affero, was designed to accomplish similar goals.  This is a different license, not a version of the Affero GPL, but Affero has released a new version of the Affero GPL which permits relicensing under this license.
+
+The precise terms and conditions for copying, distribution and modification follow.
+
+                       TERMS AND CONDITIONS
+
+0. Definitions.
+
+"This License" refers to version 3 of the GNU Affero General Public License.
+
+"Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks.
+
+"The Program" refers to any copyrightable work licensed under this License.  Each licensee is addressed as "you".  "Licensees" and "recipients" may be individuals or organizations.
+
+To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy.  The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work.
+
+A "covered work" means either the unmodified Program or a work based on the Program.
+
+To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy.  Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well.
+
+To "convey" a work means any kind of propagation that enables other parties to make or receive copies.  Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying.
+
+An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License.  If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion.
+
+1. Source Code.
+The "source code" for a work means the preferred form of the work for making modifications to it.  "Object code" means any non-source form of a work.
+
+A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language.
+
+The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form.  A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it.
+
+The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities.  However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work.  For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source.
+
+The Corresponding Source for a work in source code form is that same work.
+
+2. Basic Permissions.
+All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met.  This License explicitly affirms your unlimited permission to run the unmodified Program.  The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work.  This License acknowledges your rights of fair use or other equivalent, as provided by copyright law.
+
+You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force.  You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright.  Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you.
+
+Conveying under any other circumstances is permitted solely under the conditions stated below.  Sublicensing is not allowed; section 10 makes it unnecessary.
+
+3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures.
+
+When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures.
+
+4. Conveying Verbatim Copies.
+You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program.
+
+You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee.
+
+5. Conveying Modified Source Versions.
+You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7.  This requirement modifies the requirement in section 4 to "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy.  This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged.  This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so.
+
+A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit.  Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate.
+
+6. Conveying Non-Source Forms.
+You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source.  This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b.
+
+    d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge.  You need not require recipients to copy the Corresponding Source along with the object code.  If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source.  Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d.
+
+A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work.
+
+A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling.  In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage.  For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product.  A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product.
+
+"Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source.  The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made.
+
+If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information.  But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM).
+
+The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed.  Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network.
+
+Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying.
+
+7. Additional Terms.
+"Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law.  If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions.
+
+When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it.  (Additional permissions may be written to require their own removal in certain cases when you modify the work.)  You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission.
+
+Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors.
+
+All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10.  If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term.  If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying.
+
+If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms.
+
+Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way.
+
+8. Termination.
+
+You may not propagate or modify a covered work except as expressly provided under this License.  Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11).
+
+However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation.
+
+Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice.
+
+Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License.  If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10.
+
+9. Acceptance Not Required for Having Copies.
+
+You are not required to accept this License in order to receive or run a copy of the Program.  Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance.  However, nothing other than this License grants you permission to propagate or modify any covered work.  These actions infringe copyright if you do not accept this License.  Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so.
+
+10. Automatic Licensing of Downstream Recipients.
+
+Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License.  You are not responsible for enforcing compliance by third parties with this License.
+
+An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations.  If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts.
+
+You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License.  For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it.
+
+11. Patents.
+
+A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based.  The work thus licensed is called the contributor's "contributor version".
+
+A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version.  For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License.
+
+Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version.
+
+In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement).  To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party.
+
+If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid.
+
+If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it.
+
+A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License.  You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007.
+
+Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law.
+
+12. No Surrender of Others' Freedom.
+
+If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License.  If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program.
+
+13. Remote Network Interaction; Use with the GNU General Public License.
+
+Notwithstanding any other provision of this License, if you modify the Program, your modified version must prominently offer all users interacting with it remotely through a computer network (if your version supports such interaction) an opportunity to receive the Corresponding Source of your version by providing access to the Corresponding Source from a network server at no charge, through some standard or customary means of facilitating copying of software.  This Corresponding Source shall include the Corresponding Source for any work covered by version 3 of the GNU General Public License that is incorporated pursuant to the following paragraph.
+
+Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU General Public License into a single combined work, and to convey the resulting work.  The terms of this License will continue to apply to the part which is the covered work, but the work with which it is combined will remain governed by version 3 of the GNU General Public License.
+
+14. Revised Versions of this License.
+
+The Free Software Foundation may publish revised and/or new versions of the GNU Affero General Public License from time to time.  Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program specifies that a certain numbered version of the GNU Affero General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation.  If the Program does not specify a version number of the GNU Affero General Public License, you may choose any version ever published by the Free Software Foundation.
+
+If the Program specifies that a proxy can decide which future versions of the GNU Affero General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program.
+
+Later license versions may give you additional or different permissions.  However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version.
+
+15. Disclaimer of Warranty.
+
+THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+16. Limitation of Liability.
+
+IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+17. Interpretation of Sections 15 and 16.
+
+If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee.
+
+END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms.
+
+To do so, attach the following notices to the program.  It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found.
+
+     <one line to give the program's name and a brief idea of what it does.>
+     Copyright (C) <year>  <name of author>
+
+     This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
+
+     This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more details.
+
+     You should have received a copy of the GNU Affero General Public License along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If your software can interact with users remotely through a computer network, you should also make sure that it provides a way for users to get its source.  For example, if your program is a web application, its interface could display a "Source" link that leads users to an archive of the code.  There are many ways you could offer source, and different solutions will be better for different programs; see section 13 for the specific requirements.
+
+You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU AGPL, see <http://www.gnu.org/licenses/>.
@@ -1,426 +1,65 @@
-# Odysseus
+<p align="center">
+  <img src="docs/odysseus-wordmark.png" alt="Odysseus" width="280">
+</p>

-```
-───────────────────────────────────────────────
- ⊹ ࣪ ˖ ૮( ˶ᵔ ᵕ ᵔ˶ )っ  Odysseus vers. 1.0
-───────────────────────────────────────────────
-```
+<p align="center">
+  A self-hosted AI workspace for chat, agents, research, documents, email, notes, calendar, and local model workflows.
+</p>

-![Odysseus](docs/odysseus.jpg)
+<p align="center">
+  <a href="#quick-start">Quick Start</a> ·
+  <a href="docs/setup.md">Setup Guide</a> ·
+  <a href="CONTRIBUTING.md">Contributing</a> ·
+  <a href="ROADMAP.md">Roadmap</a>
+</p>

-A self-hosted AI workspace -- meant to be the self-hosted version of the UI experience you get from ChatGPT and Claude. But with more jank and fun. Running on your own hardware, with your own data -- local-first, privacy-first, and no trojan.
+<p align="center">
+  <a href="https://repology.org/project/odysseus-ai/versions"><img src="https://repology.org/badge/vertical-allrepos/odysseus-ai.svg" alt="Packaging status"></a>
+</p>

-## Features
-  - **Chat** -- chat with any local model or API; adding them is super simple.<br>　<sub>vLLM · llama.cpp · Ollama · OpenRouter · OpenAI · GitHub Copilot</sub>
-  - **Agent** -- hand it tools and let it run the whole task itself.<br>　<sub>built on [opencode](https://github.com/anomalyco/opencode) · MCP · web · files · shell · skills · memory</sub>
-  - **Cookbook** -- Scans your hardware, recommends models, click to download and serve.. easy!<br>　<sub>built on [llmfit](https://github.com/AlexsJones/llmfit) · VRAM-aware · GGUF / FP8 / AWQ · fit scoring · vLLM / llama.cpp serving</sub>
-  - **Deep Research** -- multi-step runs that gather, read, and synthesize sources into a nice visual report.<br>　<sub>adapted from [Tongyi DeepResearch](https://github.com/Alibaba-NLP/DeepResearch)</sub>
-  - **Compare** -- a fun tool to compare models side by side. Test completely blind, no bias!<br>　<sub>multi-model · blind test · synthesis</sub>
-  - **Documents** -- YOU write the text, AI is there to assist, not the opposite.<br>　<sub>multi-tab editor · markdown · HTML · CSV · syntax highlighting · AI edits · suggestions</sub>
-  - **Memory / Skills** -- Persistent memory and skills, your agent evolves over time as it better understands you and your tasks!<br>　<sub>ChromaDB · fastembed (ONNX) · vector + keyword retrieval · import/export</sub>
-  - **Email** -- IMAP/SMTP inbox with AI triage built in: urgency reminders, auto-tag, auto-summary, auto-reply drafts, auto-spam.<br>　<sub>IMAP · SMTP · per-account routing · CalDAV-aware</sub>
-  - **Notes & Tasks** -- Quick notes with reminders, a todo list, and scheduled tasks the agent can act on.<br>　<sub>note pings · checklist · cron-style tasks · ntfy / browser / email channels</sub>
-  - **Calendar** -- Local-first calendar with CalDAV sync to Radicale / Nextcloud / Apple / Fastmail.<br>　<sub>CalDAV pull · .ics import/export · per-calendar colors · agent-aware</sub>
-  - **Works on mobile** -- looks and runs great on your phone, not just desktop.<br>　<sub>responsive · installable (PWA) · touch gestures</sub>
-  - **Extras** -- more to explore, happy if you give it a go!<br>　<sub>image editor · theme editor · file uploads (vision + PDF) · web search · presets · sessions · 2FA</sub>
+<p align="center">
+  <img src="docs/odysseus.jpg" alt="Odysseus interface">
+</p>

-## Demo
-A full, hover-to-play tour lives on the landing page (`docs/index.html`).
-
-<details>
-<summary>Screenshots / clips</summary>
-
-### Chat & Agents
-![Chat & Agents](docs/chat.gif)
-### Deep Research
-![Deep Research](docs/research.gif)
-### Compare
-![Compare](docs/compare.gif)
-### Documents
-![Documents](docs/document.gif)
-### Notes & Tasks
-![Notes & Tasks](docs/notes.gif)
-
-</details>
+---

 ## Quick Start

-Defaults work out of the box: clone, run, then configure models/search/email
-inside **Settings**. Only edit `.env` for deployment-level overrides like
-`APP_BIND`, `APP_PORT`, `AUTH_ENABLED`, `DATABASE_URL`, or a pre-seeded admin password.
+> `dev` is the default branch and gets the newest changes first. Use [`main`](https://github.com/pewdiepie-archdaemon/odysseus/tree/main) if you want the more curated branch.

-On first setup, Odysseus creates an admin account (`admin` unless
-`ODYSSEUS_ADMIN_USER` is set) and prints a temporary password in the terminal.
-For Docker installs, the same line is in `docker compose logs odysseus`.
-Use that for the first login, then change it in **Settings**.
-
-Contributing? See [CONTRIBUTING.md](CONTRIBUTING.md) for setup, testing, and
-pull request guidelines.
-
-### Docker (recommended)
 ```bash
 git clone https://github.com/pewdiepie-archdaemon/odysseus.git
 cd odysseus
-cp .env.example .env       # optional, but recommended for explicit defaults
+cp .env.example .env
 docker compose up -d --build
 ```
-To include optional extras in the image (PDF viewer, Office extraction; includes AGPL PyMuPDF), build with `docker compose build --build-arg INSTALL_OPTIONAL=true` before `up`.

-Open `http://localhost:7000` when the containers are healthy. Docker Compose
-binds the web UI to `127.0.0.1` by default. If the port is taken, set
-`APP_PORT=7001` in `.env` and recreate the container. Set `APP_BIND=0.0.0.0`
-only when you intentionally want LAN/reverse-proxy access.
+Open `http://localhost:7000` when the containers are healthy. The first admin password is printed in `docker compose logs odysseus`.

-### Native Linux / macOS
-```bash
-git clone https://github.com/pewdiepie-archdaemon/odysseus.git
-cd odysseus
-python3 -m venv venv
-source venv/bin/activate
-pip install -r requirements.txt
-python setup.py
-python -m uvicorn app:app --host 127.0.0.1 --port 7000
-```
-Requirements: Python 3.11+. Cookbook also needs `tmux` for background model
-downloads and serves. The app itself is lightweight; local model serving is the
-heavy part and depends on the model, runtime, GPU, and VRAM, so small hosts can
-connect to API or remote model servers instead. Use `--host 0.0.0.0` only when you intentionally want LAN/reverse-proxy access.
+Native installs, GPU notes, Windows/macOS instructions, HTTPS, and configuration live in the [setup guide](docs/setup.md).

-### Apple Silicon
-Docker on macOS cannot use the Metal GPU. For GPU-accelerated Cookbook on an
-M-series Mac, run Odysseus natively:
+## Features

-```bash
-git clone https://github.com/pewdiepie-archdaemon/odysseus.git
-cd odysseus
-./start-macos.sh
-```
+- **Chat + Agents** — local/API models, tools, MCP, files, shell, skills, and memory.
+- **Cookbook** — hardware-aware model recommendations, downloads, and serving.
+- **Deep Research** — multi-step web research with source reading and report generation.
+- **Compare** — blind side-by-side model testing and synthesis.
+- **Documents** — writing-first editor with AI edits, suggestions, Markdown, HTML, CSV, and syntax highlighting.
+- **Email** — IMAP/SMTP inbox with triage, tags, summaries, reminders, and reply drafts.
+- **Notes, Tasks + Calendar** — reminders, todos, scheduled agent tasks, and CalDAV sync.
+- **Extras** — gallery/image editor, themes, uploads, web search, presets, sessions, and 2FA.

-It launches at `http://127.0.0.1:7860`. To expose it to your phone over a trusted LAN/VPN such as Tailscale, bind all interfaces:
+## Demo

-```bash
-ODYSSEUS_HOST=0.0.0.0 ./start-macos.sh
-# then open http://<tailscale-ip>:7860
-```
-
-The script also reads `.env` at startup, so `APP_BIND=0.0.0.0` and `APP_PORT`
-set there are picked up automatically without a command-line override each run.
-
-Keep `AUTH_ENABLED=true` (the default) before binding outside loopback. Do not
-expose this port directly to the public internet. To build a clickable app wrapper:
-
-```bash
-./build-macos-app.sh
-```
-
-<details>
-<summary>Cookbook, GPU, Ollama, and troubleshooting notes</summary>
-
-**Docker bundled services.** Compose starts Odysseus, ChromaDB, SearXNG, and
-ntfy. Odysseus and the bundled service ports bind to `127.0.0.1` by default, so
-they are reachable from the host but not exposed to your LAN/public internet
-unless you opt in.
-
-**Cookbook storage in Docker.** Downloads live in `./data/huggingface`
-(`~/.cache/huggingface` in the container). Cookbook-installed Python CLIs and
-serve engines live in `./data/local` (`~/.local` in the container), so they
-survive container recreation.
-
-**Remote servers.** In **Cookbook -> Settings -> Servers**, generate the
-Odysseus SSH key and add the public key to the remote server's
-`~/.ssh/authorized_keys`. From the host you can also run:
-
-```bash
-ssh-copy-id -i data/ssh/id_ed25519.pub user@server
-```
-
-**Docker GPU overlays.** CPU-only users can skip this section. Cookbook can
-only detect GPUs that Docker exposes to the container — if the host runtime or
-device passthrough is not configured, Cookbook sees the iGPU, another card, or
-CPU instead of your intended GPU.
-
-For NVIDIA, `scripts/check-docker-gpu.sh` diagnoses GPU passthrough and can
-optionally install the host runtime or update `.env`.
-
-```bash
-# Read-only diagnostic (default — installs nothing, never edits .env):
-scripts/check-docker-gpu.sh
-
-# Print OS-specific install commands without running them:
-scripts/check-docker-gpu.sh --print-install-commands
-
-# Install NVIDIA Container Toolkit on Ubuntu/Debian (requires sudo):
-scripts/check-docker-gpu.sh --install-nvidia-toolkit
-
-# Write COMPOSE_FILE to .env (only when GPU passthrough is confirmed working):
-scripts/check-docker-gpu.sh --enable-nvidia-overlay
-
-# Full assisted setup — install toolkit, then enable overlay if passthrough works:
-scripts/check-docker-gpu.sh --install-nvidia-toolkit --enable-nvidia-overlay
-```
-
-Safety notes:
- The app never installs host GPU runtime automatically.
- The app never edits `.env` automatically.
- `.env` is only modified when `--enable-nvidia-overlay` is explicitly passed,
-  and only after GPU passthrough succeeds. `--yes` skips prompts but does not
-  bypass the passthrough gate.
- `.env.bak.*` backups created by `--enable-nvidia-overlay` are ignored by
-  Git and the Docker build context.
-
-To enable manually without the script, add this to `.env`:
-
-```bash
-COMPOSE_FILE=docker-compose.yml:docker/gpu.nvidia.yml
-```
-
-**AMD / ROCm.** AMD setup is read-only diagnostic plus manual `.env` edit. Run:
-
-```bash
-scripts/check-docker-amd-gpu.sh
-```
-
-Then add the reported values to `.env`, replacing `RENDER_GID` with your host's
-numeric render group id:
-
-```bash
-COMPOSE_FILE=docker-compose.yml:docker/gpu.amd.yml
-RENDER_GID=989
-```
-
-For NVIDIA/AMD GPU support, also read the comments in the selected overlay file: docker/gpu.nvidia.yml or docker/gpu.amd.yml.
-
-**Stack-management UIs (Portainer, Coolify, Dockhand, etc.).** These tools
-often accept only a single Compose file and do not reliably honor `COMPOSE_FILE`
-or multiple `-f` overlays. CLI users should keep using the `COMPOSE_FILE`
-overlay workflow above. For stack UIs, point the stack at one of the standalone
-files instead, which bundle the base stack plus the GPU settings:
-
- `docker-compose.gpu-nvidia.yml` — still requires the NVIDIA Container Toolkit
-  on the host.
- `docker-compose.gpu-amd.yml` — still requires host ROCm/kfd/DRI setup, the
-  `video`/`render` group membership, and `RENDER_GID` when needed.
-
-The base `docker-compose.yml` plus the `docker/gpu.*.yml` overlays remain the
-source of truth; the standalone files mirror them for single-file deployments.
-
-Verify after enabling either overlay:
-
-```bash
-docker compose exec odysseus nvidia-smi -L   # NVIDIA
-docker compose exec odysseus sh -lc 'test -e /dev/kfd && test -d /dev/dri && ls -l /dev/kfd /dev/dri/renderD*'  # AMD
-```
-
-> **GPU passthrough ≠ llama.cpp CUDA.** `nvidia-smi` passing inside the
-> container confirms Docker GPU access, but llama.cpp also needs `cudart` and
-> the CUDA Toolkit at runtime. If Cookbook logs show `Unable to find cudart
-> library`, `Could NOT find CUDAToolkit`, `CUDA Toolkit not found`, or
-> tensors/layers assigned to CPU, that is a Cookbook/llama.cpp build issue —
-> not a Docker passthrough failure. Re-install the serve engine via
-> **Cookbook → Dependencies** to get a CUDA-enabled build.
->
-> The same split applies to AMD/ROCm: seeing `/dev/kfd` and `/dev/dri` inside
-> the container confirms device passthrough, not ROCm userspace or a
-> ROCm-enabled vLLM/llama.cpp build. `rocm-smi` and `rocminfo` are not expected
-> inside the slim Odysseus image.
-
-**Ollama with Docker.** If Ollama runs on the host, add this endpoint in
-Settings:
-
-```text
-http://host.docker.internal:11434/v1
-```
-
-Ollama must listen outside its own loopback interface:
-
-```bash
-OLLAMA_HOST=0.0.0.0:11434 ollama serve
-```
-
-This connects Odysseus in Docker to an Ollama server that is already running on
-your host machine; it does not start Ollama inside the container.
-`host.docker.internal` is Docker's hostname for the host machine from inside the
-container. Cookbook **Serve** is a separate workflow for serving downloaded
-models through Odysseus/llama.cpp, so Windows users with an existing Ollama
-install usually only need to add the endpoint in Settings.
-
-**Useful checks.**
-
-```bash
-docker compose ps
-docker compose logs --tail=120 odysseus
-docker compose logs odysseus | grep -E 'ChromaDB|MemoryVectorStore|DEGRADED'
-```
-
-**macOS details.** `start-macos.sh` installs Homebrew deps, creates the venv,
-runs setup, and starts uvicorn on port `7860` because AirPlay often holds
-`7000`. It uses llama.cpp/Ollama for Metal. vLLM/SGLang are CUDA/ROCm-only and
-do not run on macOS. MLX-only models are not served by Odysseus.
-
-</details>
-
-### Native Windows
-
-**One-command launcher** (creates the venv, installs deps, runs setup, starts the
-server; safe to re-run):
-
-```powershell
-git clone https://github.com/pewdiepie-archdaemon/odysseus.git
-cd odysseus
-powershell -ExecutionPolicy Bypass -File .\launch-windows.ps1
-```
-
-Or do it by hand:
-
-```powershell
-git clone https://github.com/pewdiepie-archdaemon/odysseus.git
-cd odysseus
-py -3.11 -m venv venv
-venv\Scripts\Activate.ps1
-pip install -r requirements.txt
-python setup.py
-python -m uvicorn app:app --host 127.0.0.1 --port 7000
-```
-
-If `python` points at an older interpreter, use `py -3.12` (or another installed
-3.11+ version) for the venv step.
-
-**Requirements:** Python 3.11+. The core app (chat, agent, memory, documents,
-email, calendar, deep research) runs fully native. For full **Cookbook** background
-model downloads and the agent shell tool, also install
-[Git for Windows](https://git-scm.com/download/win) (provides `bash.exe`).
-Local GPU *serving* of vLLM/SGLang needs Linux/WSL2; for a local model on Windows,
-[Ollama](https://ollama.com/download) is the easiest path — point Odysseus at
-`http://localhost:11434/v1` in Settings.
-
-Open `http://localhost:7000`, log in with the generated admin password,
-and configure everything else inside **Settings**.
-
-## Troubleshooting & Advanced Setup
-
-### `chromadb-client` conflicts with embedded ChromaDB
-If `chromadb-client` (the lightweight HTTP-only package) is installed alongside the full `chromadb` package, Odysseus starts but ChromaDB silently falls back to HTTP-only mode and fails.
-
-**Fix:** uninstall `chromadb-client` and force-reinstall the full package:
-```bash
-./venv/bin/pip uninstall chromadb-client -y
-./venv/bin/pip install --force-reinstall chromadb
-```
-
-### HTTPS + LAN/Tailscale exposure
-To expose Odysseus on a local network or Tailscale with HTTPS:
-1. Change the bind address to `0.0.0.0` in `.env` (`APP_BIND=0.0.0.0` or `ODYSSEUS_HOST=0.0.0.0`).
-2. Generate a locally-trusted cert for your LAN/Tailscale IPs using [mkcert](https://github.com/FiloSottile/mkcert):
-   ```bash
-   mkcert -install
-   mkcert -cert-file cert.pem -key-file key.pem 192.168.1.100 tailscale-ip
-   ```
-3. Run `uvicorn` with the generated certs:
-   ```bash
-   python -m uvicorn app:app --host 0.0.0.0 --port 7000 --ssl-certfile=cert.pem --ssl-keyfile=key.pem
-   ```
-4. Install the `mkcert` CA on any other device you want to access Odysseus from (e.g., for iOS, email the `rootCA.pem` to yourself, install the profile, and trust it in Certificate Trust Settings).
-
-### Optional Dependencies
-`requirements-optional.txt` contains packages that unlock extra features. It is not installed by default.
-
-| Package | Feature unlocked |
-|---------|-----------------|
-| `faster-whisper` | Local speech-to-text (microphone -> text) via the "local" STT provider. |
-| `duckduckgo-search` | DuckDuckGo as a search provider option. |
-| `PyMuPDF` | PDF page rendering in the side viewer panel and form-filling. (Note: AGPL-3.0) |
-| `markitdown` | Office/EPUB document text extraction (converts .docx/.xlsx/.pptx/.xls/.epub to Markdown). |
-
-## Security Notes
-Odysseus is a self-hosted workspace with powerful local tools: shell access, file uploads, model downloads, web research, email/calendar integrations, and API tokens. Treat it like an admin console.
-
- Keep `AUTH_ENABLED=true` for any network-accessible deployment.
- Keep `LOCALHOST_BYPASS=false` outside local development.
- Use `SECURE_COOKIES=true` when Odysseus is served through HTTPS by a trusted reverse proxy or private access gateway.
- Do not expose it directly to the public internet without HTTPS and a trusted reverse proxy or private access layer.
- Keep `.env`, `data/`, `logs/`, databases, uploads, generated media, backups, auth/session files, API keys, and model/provider tokens out of Git and private shares. They are ignored by default.
- Review `data/auth.json` after first boot: disable open signup unless you intentionally want it, make only your own account admin, and keep demo/test accounts non-admin.
- Non-admin users do not get shell/Python/file read/write by default, and admin-only routes/tools such as MCP management, API tokens, webhooks, model/cookbook serving, backup/vault, and app settings are admin-gated. Other features are controlled by per-user privileges, so review each user's privileges before exposing a deployment.
- Rotate any API keys or tokens that were ever pasted into a shared chat, demo, screenshot, or log.
- If you enable API tokens or webhooks, create separate tokens per integration and delete unused ones.
- Prefer binding manual development runs to `127.0.0.1`; bind to `0.0.0.0` only when you intentionally want LAN/reverse-proxy access.
- Keep ChromaDB, SearXNG, ntfy, Ollama, vLLM, llama.cpp, databases, and raw model/provider APIs internal-only. Expose only the authenticated Odysseus web/API entrypoint through your trusted proxy or private access layer.
- Before publishing a fork, run `git status --short` and confirm no private files from `.env`, `data/`, `logs/`, uploads, backups, or local databases are staged.
-
-### Private or proxied deployments
-Odysseus serves plain HTTP on its app port. Docker Compose binds Odysseus and the bundled services to `127.0.0.1` by default, so a typical production/private setup is:
-
-1. Keep Odysseus on localhost, for example `127.0.0.1:7000`.
-2. Terminate HTTPS at a trusted reverse proxy or private access gateway.
-3. Put the authenticated Odysseus web/API entrypoint behind that layer.
-4. Keep raw service and model ports internal-only.
-
-Cloudflare Access, Tailscale, Caddy, nginx, and Traefik can all fit this pattern; none are required by Odysseus. If your access layer reaches Odysseus on the same host, proxy to `http://127.0.0.1:7000` and keep `AUTH_ENABLED=true`, `LOCALHOST_BYPASS=false`, and `SECURE_COOKIES=true`.
-
-Common internal-only ports from the default docs/compose setup:
-
-| Port | Service |
-|---|---|
-| `7000` | Odysseus raw app port |
-| `8080` | SearXNG |
-| `8091` | ntfy |
-| `8100` | ChromaDB host port for manual/compose access |
-| `11434` | Ollama |
-| `8000-8020` | Common local model/provider APIs |
+A full hover-to-play tour lives on the landing page: [`docs/index.html`](docs/index.html).

 ## Contributing
-Help is welcome. The best entry points are fresh-install testing, provider setup
-bugs, mobile/editor polish, docs, and small focused refactors. See
-[ROADMAP.md](ROADMAP.md) for the current help-wanted list.

-## Configuration
-Most setup is done inside the app with `/setup` or **Settings**. Use `.env`
-for deployment-level defaults and secrets you want present before first boot.
-Key settings:
+Help is welcome. The best entry points are fresh-install testing, provider setup bugs, mobile/editor polish, docs, and small focused refactors. See [CONTRIBUTING.md](CONTRIBUTING.md) and [ROADMAP.md](ROADMAP.md).

-| Variable | Default | Description |
-|---|---|---|
-| `LLM_HOST` | `localhost` | Your LLM server (e.g. `llm-host.local:8000`) |
-| `LLM_HOSTS` | -- | Comma-separated list for model discovery |
-| `OPENAI_API_KEY` | -- | Optional OpenAI key. Prefer adding providers in the app unless pre-seeding. |
-| `SEARXNG_INSTANCE` | `http://localhost:8080` | SearXNG URL. Docker overrides this to `http://searxng:8080`. |
-| `SEARXNG_SECRET` | generated on first Docker boot | Optional SearXNG cookie/CSRF secret. Leave blank unless you need to pin it. |
-| `APP_BIND` | `127.0.0.1` | Docker Compose host bind address for the web UI. Use `0.0.0.0` only for intentional LAN/reverse-proxy access. |
-| `APP_PORT` | `7000` | Docker Compose host port for the web UI. |
-| `AUTH_ENABLED` | `true` | Enable/disable login |
-| `LOCALHOST_BYPASS` | `false` | Development-only auth bypass for loopback requests. Keep false for shared/network deployments. |
-| `SECURE_COOKIES` | `false` | Set true when serving Odysseus through HTTPS at a trusted proxy or private access gateway. |
-| `DATABASE_URL` | `sqlite:///./data/app.db` | Database connection string |
-| `CHROMADB_HOST` | `localhost` | ChromaDB host for vector memory. Docker overrides this to `chromadb`. |
-| `CHROMADB_PORT` | `8100` | ChromaDB port for manual host runs. Docker overrides this to `8000`. |
-| `EMBEDDING_URL` | -- | OpenAI-compatible embeddings endpoint |
+## Security

-### Built-in MCP servers (optional setup)
-
-Odysseus auto-registers a few built-in MCP servers at startup. The npx-based ones (currently the browser server, `@playwright/mcp`) only start when their npm package is already in the local npx cache. If a package isn't cached, that server is skipped with a startup log message explaining what to do, so a fresh install does not block on a multi-minute npm download or hang if Playwright system deps are missing.
-
-To enable the browser MCP (page navigation, screenshots, vision), run once:
-
-```bash
-npx -y @playwright/mcp@latest --version
-```
-
-That installs `@playwright/mcp` plus Playwright (~300MB total). Restart Odysseus and the server will register at startup.
-
-## Architecture
-```
-app.py                   # FastAPI entry point
-core/      auth, database, middleware, constants
-src/       llm_core, agent_loop, agent_tools, chat_processor, search/
-routes/    chat, session, document, memory, model … endpoints
-services/  docs, memory, search, hwfit (Cookbook) …
-static/    index.html + app.js + style.css + js/ (modular front-end)
-docs/      landing page (index.html) + preview clips
-```
-
-## Data
-All user data lives in `data/` (gitignored): `app.db` (sessions, messages, documents),
-`memory.json`, `presets.json`, `uploads/`, `personal_docs/`, `chroma/`, `settings.json`.
+Odysseus is a self-hosted workspace with powerful local tools. Keep auth enabled, keep private data out of Git, and do not expose raw model/service ports publicly. Deployment details are in the [setup guide](docs/setup.md#security-notes).

 ## Star History

@@ -433,19 +72,5 @@ All user data lives in `data/` (gitignored): `app.db` (sessions, messages, docum
 </a>

 ## License
-MIT -- see [LICENSE](LICENSE) and [ACKNOWLEDGMENTS.md](ACKNOWLEDGMENTS.md).

-```
-                                  |
-                                 |||
-                                |||||
-                  |    |    |   |||||||
-                 )_)  )_)  )_)   ~|~
-                )___))___))___)\  |
-               )____)____)_____)\\|
-             _____|____|____|_____\\\__
-             \                       /
-       ~^~^~~^~^~~^~^~~^~^~~^~^~~^~^~~^~^~~^~^~
-               ~^~  all aboard!  ~^~
-       ~^~^~~^~^~~^~^~~^~^~~^~^~~^~^~~^~^~~^~^~
-```
+AGPL-3.0-or-later -- see [LICENSE](LICENSE) and [ACKNOWLEDGMENTS.md](ACKNOWLEDGMENTS.md).
@@ -47,15 +47,16 @@ from fastapi.responses import JSONResponse, FileResponse, HTMLResponse
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.staticfiles import StaticFiles
 from starlette.middleware.base import BaseHTTPMiddleware
+from starlette.middleware.gzip import GZipMiddleware

 # Core imports
 from core.constants import (
    BASE_DIR, STATIC_DIR, SESSIONS_FILE,
-    REQUEST_TIMEOUT, OPENAI_API_KEY,
+    REQUEST_TIMEOUT, OPENAI_API_KEY, AUTH_FILE,
 )
 from core.database import SessionLocal, ApiToken
-from core.middleware import SecurityHeadersMiddleware
-from core.auth import AuthManager
+from core.middleware import SecurityHeadersMiddleware, is_cors_preflight
+from core.auth import AuthManager, normalize_known_username
 from core.exceptions import (
    SessionNotFoundError, InvalidFileUploadError,
    LLMServiceError, WebSearchError,
@@ -64,13 +65,41 @@ from core.exceptions import (
 import bcrypt as _bcrypt

 from src.app_helpers import abs_join
+from src.generated_images import GENERATED_IMAGE_HEADERS, resolve_generated_image_path
 from starlette.responses import RedirectResponse

 # ========= LOGGING =========
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-)
+import logging.handlers
+from core.constants import DATA_DIR
+
+_root_logger = logging.getLogger()
+_root_logger.setLevel(logging.INFO)
+_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+
+# Clear existing handlers to avoid duplicates
+for _h in list(_root_logger.handlers):
+    _root_logger.removeHandler(_h)
+
+_console_h = logging.StreamHandler()
+_console_h.setFormatter(_formatter)
+_root_logger.addHandler(_console_h)
+
+try:
+    _log_dir = os.path.join(DATA_DIR, "logs")
+    os.makedirs(_log_dir, exist_ok=True)
+    _log_file = os.path.join(_log_dir, "app.log")
+
+    # RotatingFileHandler is not multi-process safe (e.g. if uvicorn is run with --workers N).
+    # Odysseus is single-process by convention, so this is acceptable, but be aware that
+    # concurrent log rotation issues can arise if multiple workers are configured.
+    _file_h = logging.handlers.RotatingFileHandler(
+        _log_file, maxBytes=5 * 1024 * 1024, backupCount=3, encoding="utf-8"
+    )
+    _file_h.setFormatter(_formatter)
+    _root_logger.addHandler(_file_h)
+except Exception as e:
+    _root_logger.warning(f"Failed to initialize file logging handler (falling back to console-only): {e}")
+
 logger = logging.getLogger(__name__)

 # ========= APP =========
@@ -103,6 +132,16 @@ app.add_middleware(
    ],
 )

+# ========= RESPONSE COMPRESSION (gzip) =========
+# The frontend's text assets (style.css, index.html, the JS bundles) shipped
+# uncompressed on every cold load. gzip cuts CSS/JS/HTML by ~75-85% on the wire
+# with no behavioural change. Starlette's GZipMiddleware excludes
+# `text/event-stream` by default, so the SSE streams (chat, shell, research,
+# model-probe — all served with media_type="text/event-stream") are never
+# compressed or buffered; only complete bodies over minimum_size are. The
+# security-header middleware composes cleanly on top.
+app.add_middleware(GZipMiddleware, minimum_size=1024, compresslevel=6)
+
 # ========= SECURITY HEADERS MIDDLEWARE =========
 app.add_middleware(SecurityHeadersMiddleware)

@@ -128,6 +167,7 @@ _TIMEOUT_EXEMPT_PREFIXES = (
    "/api/cookbook/setup",  # remote pacman/apt installs
    "/api/upload",          # large files
    "/api/image",           # diffusion proxies (inpaint/harmonize/upscale/etc.) — own 120s httpx timeout
+    "/api/memory/audit",    # retains own 120s LLM inactivity timeout
 )


@@ -216,8 +256,16 @@ if AUTH_ENABLED:
        try:
            rows = db.query(ApiToken).filter(ApiToken.is_active == True).all()
            for r in rows:
+                owner_key = normalize_known_username(auth_manager.users, getattr(r, "owner", None))
+                if not owner_key:
+                    logger.warning(
+                        "Ignoring active API token '%s' for unknown auth user '%s'",
+                        getattr(r, "id", ""),
+                        getattr(r, "owner", None),
+                    )
+                    continue
                scopes = [s.strip() for s in (getattr(r, "scopes", "") or "chat").split(",") if s.strip()]
-                new_map[r.token_prefix].append((r.id, r.token_hash, getattr(r, "owner", None), scopes))
+                new_map[r.token_prefix].append((r.id, r.token_hash, owner_key, scopes))
        finally:
            db.close()
        _token_cache.clear()
@@ -252,6 +300,15 @@ if AUTH_ENABLED:
    class AuthMiddleware(BaseHTTPMiddleware):
        async def dispatch(self, request: Request, call_next):
            path = request.url.path
+            # A genuine CORS preflight (OPTIONS + Access-Control-Request-Method)
+            # carries no credentials by design and must reach CORSMiddleware to be
+            # answered. AuthMiddleware is the outermost middleware, so gating the
+            # preflight on auth 401s it before CORS can respond -- which blocks
+            # every cross-origin browser/WebView client before the real request
+            # is sent. Let real preflights through (only OPTIONS w/ the ACRM
+            # header; never a credentialed request).
+            if is_cors_preflight(request.method, request.headers):
+                return await call_next(request)
            if _is_auth_exempt(path):
                return await call_next(request)
            # In-process internal-tool token bypass. Used by the agent
@@ -274,8 +331,8 @@ if AUTH_ENABLED:
                        request.state.current_user = "internal-tool"
                    request.state.api_token = False
                    return await call_next(request)
-            except Exception:
-                pass
+            except Exception as _e:
+                logger.warning("Internal tool auth header check failed", exc_info=_e)
            # Allow DIRECT localhost requests (internal service calls from
            # heartbeats etc.). Tunnel/proxy-forwarded requests are excluded by
            # _is_trusted_loopback so LOCALHOST_BYPASS can't be abused over a
@@ -328,11 +385,10 @@ if AUTH_ENABLED:
                                    _db.close()
                            try:
                                await _asyncio.to_thread(_do)
-                            except Exception:
-                                pass
+                            except Exception as _e:
+                                logger.debug("Failed to update token last_used_at", exc_info=_e)
                        _asyncio.create_task(_touch_last_used(matched_id))
                        # Keep bearer-token callers out of normal cookie/user
-                        # routes. API-aware routes can read api_token_owner.
                        request.state.current_user = "api"
                        request.state.api_token = True
                        request.state.api_token_id = matched_id
@@ -387,13 +443,7 @@ app.mount("/static", _RevalidatingStatic(directory="static"), name="static")
@app.get("/api/generated-image/{filename}")
 async def serve_generated_image(filename: str, request: Request):
    """Serve generated images from the data directory."""
-    from pathlib import Path
-    import re
-    if not re.match(r'^[a-f0-9]{8,64}\.(png|jpg|jpeg|webp|gif|mp4|mov|webm|mkv|m4v)$', filename):
-        raise HTTPException(status_code=400, detail="Invalid filename")
-    img_path = Path("data/generated_images") / filename
-    if not img_path.exists():
-        raise HTTPException(status_code=404, detail="Image not found")
+    img_path = resolve_generated_image_path(filename)
    # SECURITY: filename is the only key, so anyone who knows / guesses a
    # 12-hex content hash could pull another user's image bytes. Require
    # auth and verify ownership via the gallery row (when one exists).
@@ -413,8 +463,8 @@ async def serve_generated_image(filename: str, request: Request):
                _db.close()
    except HTTPException:
        raise
-    except Exception:
-        pass
+    except Exception as _e:
+        logger.warning("Image ownership verification failed for %r", filename, exc_info=_e)
    ext = filename.rsplit('.', 1)[-1].lower()
    mime = {
        "png": "image/png", "jpg": "image/jpeg", "jpeg": "image/jpeg",
@@ -429,7 +479,7 @@ async def serve_generated_image(filename: str, request: Request):
    return FileResponse(
        str(img_path),
        media_type=mime,
-        headers={"Cache-Control": "public, max-age=31536000, immutable"},
+        headers=GENERATED_IMAGE_HEADERS,
    )

 # ========= YOUTUBE INIT =========
@@ -468,14 +518,20 @@ components = initialize_managers(BASE_DIR, rag_manager)
 session_manager   = components["session_manager"]
 from src.assistant_log import set_session_manager as _set_asst_sm
 _set_asst_sm(session_manager)
+# Set the global session manager singleton (used by core.models.Session.add_message)
+from core.models import set_session_manager_instance
+set_session_manager_instance(session_manager)
+app.state.session_manager = session_manager
 memory_manager    = components["memory_manager"]
 memory_vector     = components.get("memory_vector")
 upload_handler    = components["upload_handler"]
+app.state.upload_handler = upload_handler
 personal_docs_mgr = components["personal_docs_manager"]
 api_key_manager   = components["api_key_manager"]
 preset_manager    = components["preset_manager"]
 chat_processor    = components["chat_processor"]
 research_handler  = components["research_handler"]
+app.state.research_handler = research_handler
 chat_handler      = components["chat_handler"]
 model_discovery   = components["model_discovery"]
 skills_manager    = components["skills_manager"]
@@ -525,9 +581,6 @@ upload_cleanup_task = None
 from routes.emoji_routes import setup_emoji_routes
 app.include_router(setup_emoji_routes())

-from routes.workspace_routes import setup_workspace_routes
-app.include_router(setup_workspace_routes())
-
 # Sessions
 from routes.session_routes import setup_session_routes
 session_config = {"REQUEST_TIMEOUT": REQUEST_TIMEOUT, "OPENAI_API_KEY": OPENAI_API_KEY, "SESSIONS_FILE": SESSIONS_FILE}
@@ -572,7 +625,7 @@ app.include_router(setup_preset_routes(preset_manager))

 # Diagnostics
 from routes.diagnostics_routes import setup_diagnostics_routes
-app.include_router(setup_diagnostics_routes(rag_manager, rag_available, research_handler))
+app.include_router(setup_diagnostics_routes(rag_manager, rag_available, research_handler, memory_vector))

 # Cleanup
 from routes.cleanup_routes import setup_cleanup_routes
@@ -594,6 +647,10 @@ app.include_router(setup_model_routes(model_discovery))
 from routes.copilot_routes import setup_copilot_routes
 app.include_router(setup_copilot_routes())

+# ChatGPT Subscription device-flow login
+from routes.chatgpt_subscription_routes import setup_chatgpt_subscription_routes
+app.include_router(setup_chatgpt_subscription_routes())
+
 # TTS
 from routes.tts_routes import setup_tts_routes
 app.include_router(setup_tts_routes(tts_service))
@@ -646,6 +703,9 @@ app.include_router(setup_shell_routes())
 from routes.cookbook_routes import setup_cookbook_routes
 app.include_router(setup_cookbook_routes())

+from routes.workspace_routes import setup_workspace_routes
+app.include_router(setup_workspace_routes())
+
 # Hardware model fitting (cookbook "What Fits?" tab)
 from routes.hwfit_routes import setup_hwfit_routes
 app.include_router(setup_hwfit_routes())
@@ -789,6 +849,8 @@ async def serve_backgrounds(request: Request):

@app.get("/login")
 async def serve_login(request: Request):
+    if not AUTH_ENABLED:
+        return RedirectResponse(url="/", status_code=302)
    return _serve_html_with_nonce(request, abs_join(BASE_DIR, "static/login.html"))

@app.get("/api/version")
@@ -916,16 +978,21 @@ async def _startup_event():
    async def _warmup_endpoints():
        try:
            import httpx
-            endpoints = model_discovery.get_endpoints() if model_discovery else []
-            for ep in endpoints[:5]:
-                url = ep.get("url", "").replace("/chat/completions", "/models")
-                if url:
-                    try:
-                        async with httpx.AsyncClient(timeout=5.0) as client:
-                            await client.get(url)
-                        logger.info(f"Warmup ping OK: {url}")
-                    except Exception as e:
-                        logger.debug(f"Warmup ping failed for endpoint: {e}")
+            # model_discovery has no get_endpoints(); that call raised
+            # AttributeError every run and silently disabled warmup/keepalive.
+            # Resolve the /models probe URLs via the real discovery API, off the
+            # event loop since discovery does a blocking port scan.
+            urls = (
+                await asyncio.to_thread(model_discovery.warmup_ping_urls)
+                if model_discovery else []
+            )
+            for url in urls:
+                try:
+                    async with httpx.AsyncClient(timeout=5.0) as client:
+                        await client.get(url)
+                    logger.info(f"Warmup ping OK: {url}")
+                except Exception as e:
+                    logger.debug(f"Warmup ping failed for endpoint: {e}")
        except Exception as e:
            logger.debug(f"Warmup ping skipped: {e}")

@@ -948,7 +1015,7 @@ async def _startup_event():
        owners = set()
        try:
            import json as _json
-            auth_path = "data/auth.json"
+            auth_path = AUTH_FILE
            with open(auth_path, encoding="utf-8") as f:
                users = _json.load(f).get("users", {})
            owners.update(users.keys())
@@ -995,7 +1062,7 @@ async def _startup_event():
    # does not make an existing library look empty after auth/account changes.
    try:
        import json as _json
-        auth_path = "data/auth.json"
+        auth_path = AUTH_FILE
        with open(auth_path, encoding="utf-8") as f:
            users = _json.load(f).get("users", {})
        primary_owner = None
@@ -1067,6 +1134,16 @@ async def _startup_event():
                logger.warning(f"Nightly skill audit failed: {e}")

    _startup_tasks.append(asyncio.create_task(_skill_audit_nightly_loop()))
+
+    # Cookbook serve lifecycle — kills scheduler-launched serves whose
+    # window-end has passed. Paired with the cookbook_serve builtin
+    # action; both are no-ops unless a scheduled task actually launches
+    # something with end_after_min set. Removing this line + the
+    # cookbook_serve entry in BUILTIN_ACTIONS + src/cookbook_serve_lifecycle.py
+    # removes the feature.
+    from src.cookbook_serve_lifecycle import cookbook_serve_lifecycle_loop
+    _startup_tasks.append(asyncio.create_task(cookbook_serve_lifecycle_loop()))
+
    logger.info("Application startup complete")

 async def _shutdown_event():
@@ -14,6 +14,8 @@ import uuid

 import bcrypt

+from src.constants import AUTH_FILE
+
 PAIRING_VERSION = 1
 COMPANION_SCOPE = "chat"

@@ -61,7 +63,7 @@ def lan_ip_candidates() -> list[str]:
 def find_admin_user() -> str | None:
    """Resolve an admin username from data/auth.json (schema uses is_admin),
    falling back to the first user."""
-    auth_path = os.path.join("data", "auth.json")
+    auth_path = AUTH_FILE
    try:
        with open(auth_path, "r", encoding="utf-8") as f:
            data = json.load(f)
@@ -3,6 +3,7 @@ Authentication module — multi-user password hashing, session tokens, config pe
 Config stored in data/auth.json. Uses bcrypt directly.
 """

+import enum
 import json
 import os
 import secrets
@@ -30,14 +31,24 @@ DEFAULT_PRIVILEGES = {
    "can_manage_memory": True,
    "max_messages_per_day": 0,
    "allowed_models": [],
+    "allowed_models_restricted": False,
+    # Explicit "block every model" sentinel. An empty `allowed_models` list is
+    # ambiguous — it's also what gets sent when the admin clicks "[All]" — so
+    # we need a dedicated flag to express "this user may use no models at all"
+    # distinctly from "this user has no restriction".
+    "block_all_models": False,
 }

 # Admins get everything
 ADMIN_PRIVILEGES = {k: (True if isinstance(v, bool) else (0 if isinstance(v, int) else [])) for k, v in DEFAULT_PRIVILEGES.items()}
+ADMIN_PRIVILEGES["allowed_models_restricted"] = False
+# Admins must never be blocked from using models — the generic dict
+# comprehension above flips every boolean default to True, which would be
+# backwards for this sentinel.
+ADMIN_PRIVILEGES["block_all_models"] = False

-DEFAULT_AUTH_PATH = os.path.join(
-    Path(__file__).parent.parent, "data", "auth.json"
-)
+from src.constants import AUTH_FILE
+DEFAULT_AUTH_PATH = AUTH_FILE
 TOKEN_TTL = 60 * 60 * 24 * 7  # 7 days

 # Usernames the auth + middleware layer reserve as internal "synthetic owner"
@@ -57,6 +68,14 @@ TOKEN_TTL = 60 * 60 * 24 * 7  # 7 days
 RESERVED_USERNAMES = frozenset({"internal-tool", "api", "demo", "system"})


+def normalize_known_username(users: Dict[str, Any], username: str | None) -> Optional[str]:
+    """Return a normalized username only when it exists in the auth user map."""
+    key = str(username or "").strip().lower()
+    if not key or key not in users:
+        return None
+    return key
+
+
 def _hash_password(password: str) -> str:
    return bcrypt.hashpw(password.encode("utf-8"), bcrypt.gensalt()).decode("utf-8")

@@ -65,6 +84,15 @@ def _verify_password(password: str, hashed: str) -> bool:
    return bcrypt.checkpw(password.encode("utf-8"), hashed.encode("utf-8"))


+class SetAdminResult(enum.Enum):
+    """Outcome of AuthManager.set_admin, so callers can map each case to a
+    precise response instead of guessing from a bare bool."""
+    OK = "ok"
+    USER_NOT_FOUND = "user_not_found"
+    NOT_AUTHORIZED = "not_authorized"   # requester is not an admin
+    LAST_ADMIN = "last_admin"           # would remove the last remaining admin
+
+
 class AuthManager:
    """Manages multi-user password + session-token auth system."""

@@ -76,12 +104,17 @@ class AuthManager:
        # Guards mutations of self._sessions and the on-disk sessions.json.
        # Validate/create/revoke run concurrently from the FastAPI threadpool.
        self._sessions_lock = threading.RLock()
+        # Guards all mutations of self._config and the on-disk auth.json so
+        # concurrent create/delete/rename/privilege operations don't interleave
+        # and corrupt the user database.
+        self._config_lock = threading.Lock()
        # Guards the first-run setup check-and-write so concurrent requests
        # cannot both observe is_configured==False and both create admin accounts.
        self._setup_lock = threading.Lock()
        self._load()
        self._load_sessions()
        self._migrate_single_user()
+        self._drop_reserved_loaded_users()
        self._migrate_legacy_admin_role()

    def _load(self):
@@ -134,7 +167,13 @@ class AuthManager:
    def _migrate_single_user(self):
        """Migrate old single-user format to multi-user format."""
        if "password_hash" in self._config and "users" not in self._config:
-            old_user = self._config.get("username", "admin")
+            old_user = str(self._config.get("username", "admin") or "admin").strip().lower()
+            if old_user in RESERVED_USERNAMES:
+                logger.warning(
+                    "Migrating legacy single-user reserved username '%s' to 'admin'",
+                    old_user,
+                )
+                old_user = "admin"
            old_hash = self._config["password_hash"]
            self._config = {
                "users": {
@@ -148,6 +187,30 @@ class AuthManager:
            self._save()
            logger.info(f"Migrated single-user auth to multi-user (admin: {old_user})")

+    def _drop_reserved_loaded_users(self):
+        """Fail closed for legacy/manual auth rows that collide with sentinels."""
+        users = self._config.get("users")
+        if not isinstance(users, dict):
+            return
+        normalized = {}
+        removed = []
+        for username, data in users.items():
+            key = str(username or "").strip().lower()
+            if not key:
+                continue
+            if key in RESERVED_USERNAMES:
+                removed.append(key)
+                continue
+            normalized[key] = data
+        if removed or normalized != users:
+            self._config["users"] = normalized
+            self._save()
+        if removed:
+            logger.warning(
+                "Removed reserved username(s) from auth config: %s",
+                ", ".join(sorted(set(removed))),
+            )
+
    def _migrate_legacy_admin_role(self):
        """Normalize setup.py's old role='admin' marker to is_admin=True."""
        changed = False
@@ -172,8 +235,9 @@ class AuthManager:

    @signup_enabled.setter
    def signup_enabled(self, value: bool):
-        self._config["signup_enabled"] = value
-        self._save()
+        with self._config_lock:
+            self._config["signup_enabled"] = value
+            self._save()

    @property
    def is_configured(self) -> bool:
@@ -198,17 +262,18 @@ class AuthManager:
        if username in RESERVED_USERNAMES:
            logger.warning("Refused to create reserved username '%s'", username)
            return False
-        if username in self.users:
-            return False
-        if "users" not in self._config:
-            self._config["users"] = {}
-        self._config["users"][username] = {
-            "password_hash": _hash_password(password),
-            "created": time.time(),
-            "is_admin": is_admin,
-            "privileges": dict(ADMIN_PRIVILEGES if is_admin else DEFAULT_PRIVILEGES),
-        }
-        self._save()
+        with self._config_lock:
+            if username in self.users:
+                return False
+            if "users" not in self._config:
+                self._config["users"] = {}
+            self._config["users"][username] = {
+                "password_hash": _hash_password(password),
+                "created": time.time(),
+                "is_admin": is_admin,
+                "privileges": dict(ADMIN_PRIVILEGES if is_admin else DEFAULT_PRIVILEGES),
+            }
+            self._save()
        logger.info(f"Created user '{username}' (admin={is_admin})")
        return True

@@ -221,14 +286,31 @@ class AuthManager:
        their cookie expired naturally (default ~30 days).
        """
        username = username.strip().lower()
-        if username not in self.users:
-            return False
-        if username == requesting_user:
-            return False
-        if not self.users.get(requesting_user, {}).get("is_admin"):
-            return False
-        del self._config["users"][username]
-        self._save()
+        with self._config_lock:
+            if username not in self.users:
+                return False
+            if username == requesting_user:
+                return False
+            if not self.users.get(requesting_user, {}).get("is_admin"):
+                return False
+            # Revoke API bearer tokens before removing the auth row. The bearer
+            # path authenticates from ApiToken rows and does not require the
+            # owner to still exist, so a successful delete must not leave active
+            # rows behind. If the token store is unavailable, fail closed and
+            # keep the user/session state intact so the admin can retry.
+            try:
+                from core.database import get_db_session, ApiToken
+                with get_db_session() as db:
+                    removed_tokens = db.query(ApiToken).filter(ApiToken.owner == username).delete()
+                if removed_tokens:
+                    logger.info(
+                        f"Revoked {removed_tokens} API token(s) owned by deleted user '{username}'"
+                    )
+            except Exception:
+                logger.warning(f"Failed to revoke API tokens for deleted user '{username}'")
+                return False
+            del self._config["users"][username]
+            self._save()
        # Purge all sessions belonging to this user. validate_token doesn't
        # cross-check `self.users`, so without this step a deleted user's
        # cookie keeps authenticating.
@@ -241,18 +323,6 @@ class AuthManager:
                revoked += 1
        if revoked:
            self._save_sessions()
-        # Also revoke API bearer tokens owned by this user. The bearer auth
-        # path authenticates straight against ApiToken rows and never
-        # re-checks that the owner still exists, so leaving the rows behind
-        # would let a deleted user keep full API access indefinitely.
-        try:
-            from core.database import get_db_session, ApiToken
-            with get_db_session() as db:
-                removed = db.query(ApiToken).filter(ApiToken.owner == username).delete()
-            if removed:
-                logger.info(f"Revoked {removed} API token(s) owned by deleted user '{username}'")
-        except Exception:
-            logger.warning(f"Failed to revoke API tokens for deleted user '{username}'")
        logger.info(f"Deleted user '{username}' (by {requesting_user}); revoked {revoked} active session(s)")
        return True

@@ -266,14 +336,15 @@ class AuthManager:
        if new_username in RESERVED_USERNAMES:
            logger.warning("Refused to rename '%s' into reserved username '%s'", old_username, new_username)
            return False
-        if old_username not in self.users:
-            return False
-        if new_username in self.users:
-            return False
-        if not self.users.get(requesting_user, {}).get("is_admin"):
-            return False
-        self._config.setdefault("users", {})[new_username] = self._config["users"].pop(old_username)
-        self._save()
+        with self._config_lock:
+            if old_username not in self.users:
+                return False
+            if new_username in self.users:
+                return False
+            if not self.users.get(requesting_user, {}).get("is_admin"):
+                return False
+            self._config.setdefault("users", {})[new_username] = self._config["users"].pop(old_username)
+            self._save()

        renamed_sessions = 0
        with self._sessions_lock:
@@ -311,28 +382,93 @@ class AuthManager:
    def set_privileges(self, username: str, privileges: Dict[str, Any]) -> bool:
        """Update privileges for a user. Can't modify admin privileges."""
        username = username.strip().lower()
-        if username not in self.users:
-            return False
-        if self.users[username].get("is_admin"):
-            return False  # admins always have full access
-        # Only allow known privilege keys
-        current = self.get_privileges(username)
-        for k, v in privileges.items():
-            if k in DEFAULT_PRIVILEGES:
-                current[k] = v
-        self._config["users"][username]["privileges"] = current
-        self._save()
+        with self._config_lock:
+            if username not in self.users:
+                return False
+            if self.users[username].get("is_admin"):
+                return False  # admins always have full access
+            # Only allow known privilege keys
+            current = self.get_privileges(username)
+            for k, v in privileges.items():
+                if k in DEFAULT_PRIVILEGES:
+                    current[k] = v
+            self._config["users"][username]["privileges"] = current
+            self._save()
        logger.info(f"Updated privileges for '{username}': {current}")
        return True

+    def set_admin(self, username: str, is_admin: bool,
+                  requesting_user: str) -> SetAdminResult:
+        """Promote/demote an existing user to/from admin. Admin only.
+
+        Refuses to remove the last remaining admin so the instance can never
+        be locked out of admin access; self-demotion is allowed as long as
+        another admin remains. Admin status is re-checked live on every
+        request, so unlike delete/rename no session or token revocation is
+        needed — a demoted admin simply fails the next is_admin() gate.
+
+        Promotion stashes the user's current privilege map and demotion
+        restores it, so a temporary admin stint can't silently broaden a
+        user's non-admin access; users without a stash (created as admin,
+        or promoted before stashing existed) demote to DEFAULT_PRIVILEGES.
+
+        Counting admins and flipping the flag happen in one critical section
+        so two concurrent demotions can't race the admin count to zero.
+        """
+        username = (username or "").strip().lower()
+        requesting_user = (requesting_user or "").strip().lower()
+        is_admin = bool(is_admin)
+        with self._config_lock:
+            target = self._config.get("users", {}).get(username)
+            if target is None:
+                return SetAdminResult.USER_NOT_FOUND
+            if not self.users.get(requesting_user, {}).get("is_admin"):
+                return SetAdminResult.NOT_AUTHORIZED
+            currently_admin = bool(target.get("is_admin"))
+            if currently_admin == is_admin:
+                return SetAdminResult.OK  # no-op; leave privileges untouched
+            if currently_admin and not is_admin:
+                admin_count = sum(1 for d in self.users.values() if d.get("is_admin"))
+                if admin_count <= 1:
+                    return SetAdminResult.LAST_ADMIN
+            # Write order matters for lock-free readers: get_privileges()
+            # reads without _config_lock and trusts is_admin, so the admin
+            # flag must be flipped while the stored map is safe to expose —
+            # before writing admin privileges on promote, after restoring
+            # the pre-admin map on demote.
+            if is_admin:
+                target["is_admin"] = True
+                # Stash the pre-admin map so a later demotion can restore it.
+                # While is_admin is set the stored map is inert: get_privileges
+                # short-circuits to ADMIN_PRIVILEGES and set_privileges refuses
+                # admins, so only set_admin ever touches the stash.
+                target["privileges_before_admin"] = dict(
+                    target.get("privileges") or DEFAULT_PRIVILEGES
+                )
+                target["privileges"] = dict(ADMIN_PRIVILEGES)
+            else:
+                # Restore the stashed pre-admin map. Fall back to defaults for
+                # users created as admins (their stored map is ADMIN_PRIVILEGES,
+                # which must not leak past demotion — e.g. can_use_bash) and
+                # for admins promoted before the stash existed.
+                target["privileges"] = dict(
+                    target.pop("privileges_before_admin", None)
+                    or DEFAULT_PRIVILEGES
+                )
+                target["is_admin"] = False
+            self._save()
+        logger.info("Set is_admin=%s for '%s' (by '%s')", is_admin, username, requesting_user)
+        return SetAdminResult.OK
+
    def change_password(self, username: str, current_password: str, new_password: str) -> bool:
        username = username.strip().lower()
        if username not in self.users:
            return False
        if not _verify_password(current_password, self.users[username]["password_hash"]):
            return False
-        self._config["users"][username]["password_hash"] = _hash_password(new_password)
-        self._save()
+        with self._config_lock:
+            self._config["users"][username]["password_hash"] = _hash_password(new_password)
+            self._save()
        return True

    # ------------------------------------------------------------------
@@ -350,8 +486,9 @@ class AuthManager:
        if username not in self.users:
            return None
        secret = pyotp.random_base32()
-        self._config["users"][username]["totp_secret_pending"] = secret
-        self._save()
+        with self._config_lock:
+            self._config["users"][username]["totp_secret_pending"] = secret
+            self._save()
        return secret

    def totp_get_provisioning_uri(self, username: str, secret: str) -> str:
@@ -370,13 +507,14 @@ class AuthManager:
        if not totp.verify(code, valid_window=1):
            return False
        # Enable 2FA
-        self._config["users"][username]["totp_secret"] = secret
-        self._config["users"][username]["totp_enabled"] = True
-        self._config["users"][username].pop("totp_secret_pending", None)
-        # Generate backup codes
-        backup = [secrets.token_hex(4) for _ in range(8)]
-        self._config["users"][username]["totp_backup_codes"] = backup
-        self._save()
+        with self._config_lock:
+            self._config["users"][username]["totp_secret"] = secret
+            self._config["users"][username]["totp_enabled"] = True
+            self._config["users"][username].pop("totp_secret_pending", None)
+            # Generate backup codes
+            backup = [secrets.token_hex(4) for _ in range(8)]
+            self._config["users"][username]["totp_backup_codes"] = backup
+            self._save()
        logger.info(f"2FA enabled for '{username}'")
        return True

@@ -395,9 +533,10 @@ class AuthManager:
        # Check backup codes first
        backup = user.get("totp_backup_codes", [])
        if code in backup:
-            backup.remove(code)
-            self._config["users"][username]["totp_backup_codes"] = backup
-            self._save()
+            with self._config_lock:
+                backup.remove(code)
+                self._config["users"][username]["totp_backup_codes"] = backup
+                self._save()
            logger.info(f"Backup code used for '{username}' ({len(backup)} remaining)")
            return True
        totp = pyotp.TOTP(secret)
@@ -408,11 +547,12 @@ class AuthManager:
        username = username.strip().lower()
        if not self.verify_password(username, password):
            return False
-        self._config["users"][username].pop("totp_secret", None)
-        self._config["users"][username].pop("totp_secret_pending", None)
-        self._config["users"][username].pop("totp_backup_codes", None)
-        self._config["users"][username]["totp_enabled"] = False
-        self._save()
+        with self._config_lock:
+            self._config["users"][username].pop("totp_secret", None)
+            self._config["users"][username].pop("totp_secret_pending", None)
+            self._config["users"][username].pop("totp_backup_codes", None)
+            self._config["users"][username]["totp_enabled"] = False
+            self._save()
        logger.info(f"2FA disabled for '{username}'")
        return True

@@ -431,6 +571,12 @@ class AuthManager:
        username = username.strip().lower()
        if not self.verify_password(username, password):
            return None
+        return self.create_session_trusted(username)
+
+    def create_session_trusted(self, username: str) -> str:
+        """Issue a session token for an already-verified user.
+        Call only after verify_password (and TOTP if enabled) have passed."""
+        username = username.strip().lower()
        token = secrets.token_hex(32)
        with self._sessions_lock:
            self._sessions[token] = {
@@ -1,40 +1,12 @@
-# src/constants.py
-"""Application-wide constants and configuration values."""
-import os
+# core/constants.py
+"""Backward-compatible shim — the single source of truth is src/constants.py.

-APP_VERSION = "0.9.1"
-
-# Base paths
-BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + "/"
-STATIC_DIR = os.path.join(BASE_DIR, "static")
-DATA_DIR = os.path.join(BASE_DIR, "data")
-
-# Data file paths
-SESSIONS_FILE = os.path.join(DATA_DIR, "sessions.json")
-MEMORY_FILE = os.path.join(DATA_DIR, "memory.json")
-MEMORY_DOC = os.path.join(DATA_DIR, "memory_doc.md")
-PERSONAL_DIR = os.path.join(DATA_DIR, "personal_docs")
-RUNBOOK_DIR = os.path.join(PERSONAL_DIR, "runbook")
-UPLOAD_DIR = os.path.join(DATA_DIR, "uploads")
-FEATURES_FILE = os.path.join(DATA_DIR, "features.json")
-SETTINGS_FILE = os.path.join(DATA_DIR, "settings.json")
-
-# API Configuration
-MAX_CONTEXT_MESSAGES = 90
-REQUEST_TIMEOUT = 20
-OPENAI_COMPAT_PATH = "/v1/chat/completions"
-
-# Environment variables with defaults
-DEFAULT_HOST = os.getenv("LLM_HOST", "localhost")
-LLM_HOSTS = [h.strip() for h in os.getenv("LLM_HOSTS", "").split(",") if h.strip()]
-OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
-SEARXNG_INSTANCE = os.getenv('SEARXNG_INSTANCE', 'http://localhost:8080')
-
-
-# Cleanup configuration
-CLEANUP_ENABLED = os.getenv("CLEANUP_ENABLED", "True").lower() == "true"
-CLEANUP_INTERVAL_HOURS = int(os.getenv("CLEANUP_INTERVAL_HOURS", "24"))
-
-# Default parameters
-DEFAULT_TEMPERATURE = 1.0
-DEFAULT_MAX_TOKENS = 0
+Historically there were two copies of this module (this one lagged behind at
+APP_VERSION 0.9.1 and was missing the consolidated tool-output constants). To
+kill the drift, this now simply re-exports everything from src.constants so
+there is exactly one place that defines paths and reads ODYSSEUS_DATA_DIR.
+internal_api_base() also lives in src.constants now and is re-exported here so
+existing `from core.constants import internal_api_base` callers keep working.
+"""
+from src.constants import *  # noqa: F401,F403
+from src.constants import internal_api_base  # noqa: F401  (explicit: functions aren't covered by some linters' * checks)
@@ -29,8 +29,9 @@ class TimestampMixin:
    def updated_at(cls):
        return Column(DateTime, default=utcnow_naive, onupdate=utcnow_naive, nullable=False)

-# Get database URL from environment, default to SQLite
-DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///./data/app.db")
+# Get database URL from environment, default to SQLite in DATA_DIR
+from src.constants import DATA_DIR, AUTH_FILE, MEMORY_FILE, USER_PREFS_FILE, SETTINGS_FILE
+DATABASE_URL = os.getenv("DATABASE_URL", f"sqlite:///{DATA_DIR}/app.db")

 # Create engine
 engine = create_engine(
@@ -323,6 +324,13 @@ class EmailAccount(TimestampMixin, Base):
    smtp_password  = Column(String, default="")

    from_address   = Column(String, default="")
+    display_name   = Column(String, nullable=True)   # "Hriday Ranka" — used in From: header
+
+    # OAuth2 (Google / Google Workspace). Tokens stored encrypted via secret_storage.
+    oauth_provider      = Column(String, nullable=True)   # "google" or None
+    oauth_access_token  = Column(String, nullable=True)   # encrypted
+    oauth_refresh_token = Column(String, nullable=True)   # encrypted
+    oauth_token_expiry  = Column(String, nullable=True)   # unix timestamp string

    __table_args__ = (
        Index('ix_email_accounts_owner_default', 'owner', 'is_default'),
@@ -360,6 +368,24 @@ class ModelEndpoint(TimestampMixin, Base):
    # is the historical default. When non-null, the model picker only shows
    # the endpoint to that user (admins always see everything).
    owner = Column(String, nullable=True, index=True)
+    # Optional OAuth/session-backed credential row. Used by subscription-backed
+    # providers that need refresh tokens instead of a static API key.
+    provider_auth_id = Column(String, nullable=True, index=True)
+
+
+class ProviderAuthSession(TimestampMixin, Base):
+    """Encrypted OAuth/session credentials for refresh-aware model providers."""
+    __tablename__ = "provider_auth_sessions"
+
+    id = Column(String, primary_key=True, index=True)
+    provider = Column(String, nullable=False, index=True)
+    owner = Column(String, nullable=True, index=True)
+    label = Column(String, nullable=True)
+    base_url = Column(String, nullable=False)
+    access_token = Column(EncryptedText, nullable=True)
+    refresh_token = Column(EncryptedText, nullable=True)
+    last_refresh = Column(DateTime, nullable=True)
+    auth_mode = Column(String, nullable=True)

 class McpServer(TimestampMixin, Base):
    """Admin-configured MCP (Model Context Protocol) tool servers."""
@@ -669,6 +695,7 @@ def _migrate_add_last_message_at_column():
    db_path = DATABASE_URL.replace("sqlite:///", "")
    if not os.path.exists(db_path):
        return
+    conn = None
    try:
        conn = sqlite3.connect(db_path)
        cursor = conn.execute("PRAGMA table_info(sessions)")
@@ -694,10 +721,14 @@ def _migrate_add_last_message_at_column():
            "ON sessions(archived, last_message_at)"
        )
        conn.commit()
-        conn.close()
        logging.getLogger(__name__).info("Migrated: added + backfilled 'last_message_at' on sessions")
    except Exception as e:
        logging.getLogger(__name__).warning(f"last_message_at migration failed: {e}")
+    finally:
+        try:
+            conn.close()
+        except Exception:
+            pass

 def _migrate_add_document_archived_column():
    """Add `archived` to documents (soft-archive flag). Guarded + idempotent."""
@@ -705,6 +736,7 @@ def _migrate_add_document_archived_column():
    db_path = DATABASE_URL.replace("sqlite:///", "")
    if not os.path.exists(db_path):
        return
+    conn = None
    try:
        conn = sqlite3.connect(db_path)
        cursor = conn.execute("PRAGMA table_info(documents)")
@@ -713,9 +745,13 @@ def _migrate_add_document_archived_column():
            conn.execute("ALTER TABLE documents ADD COLUMN archived BOOLEAN DEFAULT 0")
            conn.commit()
            logging.getLogger(__name__).info("Migrated: added 'archived' to documents")
-        conn.close()
    except Exception as e:
        logging.getLogger(__name__).warning(f"documents.archived migration failed: {e}")
+    finally:
+        try:
+            conn.close()
+        except Exception:
+            pass


 def _migrate_add_owner_column():
@@ -724,6 +760,7 @@ def _migrate_add_owner_column():
    db_path = DATABASE_URL.replace("sqlite:///", "")
    if not os.path.exists(db_path):
        return
+    conn = None
    try:
        conn = sqlite3.connect(db_path)
        cursor = conn.execute("PRAGMA table_info(sessions)")
@@ -733,9 +770,13 @@ def _migrate_add_owner_column():
            conn.execute("CREATE INDEX IF NOT EXISTS ix_sessions_owner ON sessions(owner)")
            conn.commit()
            logging.getLogger(__name__).info("Migrated: added 'owner' column to sessions")
-        conn.close()
    except Exception as e:
        logging.getLogger(__name__).warning(f"Migration check failed: {e}")
+    finally:
+        try:
+            conn.close()
+        except Exception:
+            pass

 def _migrate_model_endpoints():
    """Recreate model_endpoints table if schema changed (url->base_url)."""
@@ -743,6 +784,7 @@ def _migrate_model_endpoints():
    db_path = DATABASE_URL.replace("sqlite:///", "")
    if not os.path.exists(db_path):
        return
+    conn = None
    try:
        conn = sqlite3.connect(db_path)
        cursor = conn.execute("PRAGMA table_info(model_endpoints)")
@@ -751,9 +793,13 @@ def _migrate_model_endpoints():
            conn.execute("DROP TABLE IF EXISTS model_endpoints")
            conn.commit()
            logging.getLogger(__name__).info("Migrated: dropped old model_endpoints table (schema change)")
-        conn.close()
    except Exception as e:
        logging.getLogger(__name__).warning(f"model_endpoints migration check failed: {e}")
+    finally:
+        try:
+            conn.close()
+        except Exception:
+            pass

 def _migrate_add_hidden_models_column():
    """Add hidden_models column to model_endpoints if it doesn't exist."""
@@ -761,6 +807,7 @@ def _migrate_add_hidden_models_column():
    db_path = DATABASE_URL.replace("sqlite:///", "")
    if not os.path.exists(db_path):
        return
+    conn = None
    try:
        conn = sqlite3.connect(db_path)
        cursor = conn.execute("PRAGMA table_info(model_endpoints)")
@@ -769,9 +816,13 @@ def _migrate_add_hidden_models_column():
            conn.execute("ALTER TABLE model_endpoints ADD COLUMN hidden_models TEXT")
            conn.commit()
            logging.getLogger(__name__).info("Migrated: added 'hidden_models' column to model_endpoints")
-        conn.close()
    except Exception as e:
        logging.getLogger(__name__).warning(f"hidden_models migration failed: {e}")
+    finally:
+        try:
+            conn.close()
+        except Exception:
+            pass

 def _migrate_add_model_endpoint_owner_column():
    """Add owner column to model_endpoints if it doesn't exist.
@@ -786,6 +837,7 @@ def _migrate_add_model_endpoint_owner_column():
    db_path = DATABASE_URL.replace("sqlite:///", "")
    if not os.path.exists(db_path):
        return
+    conn = None
    try:
        conn = sqlite3.connect(db_path)
        cursor = conn.execute("PRAGMA table_info(model_endpoints)")
@@ -795,9 +847,38 @@ def _migrate_add_model_endpoint_owner_column():
            conn.execute("CREATE INDEX IF NOT EXISTS ix_model_endpoints_owner ON model_endpoints(owner)")
            conn.commit()
            logging.getLogger(__name__).info("Migrated: added 'owner' column + index to model_endpoints")
-        conn.close()
    except Exception as e:
        logging.getLogger(__name__).warning(f"model_endpoints.owner migration failed: {e}")
+    finally:
+        try:
+            conn.close()
+        except Exception:
+            pass
+
+
+def _migrate_add_provider_auth_id_column():
+    """Add provider_auth_id column to model_endpoints if it doesn't exist."""
+    import sqlite3
+    db_path = DATABASE_URL.replace("sqlite:///", "")
+    if not os.path.exists(db_path):
+        return
+    conn = None
+    try:
+        conn = sqlite3.connect(db_path)
+        cursor = conn.execute("PRAGMA table_info(model_endpoints)")
+        columns = [row[1] for row in cursor.fetchall()]
+        if columns and "provider_auth_id" not in columns:
+            conn.execute("ALTER TABLE model_endpoints ADD COLUMN provider_auth_id VARCHAR")
+            conn.execute("CREATE INDEX IF NOT EXISTS ix_model_endpoints_provider_auth_id ON model_endpoints(provider_auth_id)")
+            conn.commit()
+            logging.getLogger(__name__).info("Migrated: added 'provider_auth_id' column + index to model_endpoints")
+    except Exception as e:
+        logging.getLogger(__name__).warning(f"model_endpoints.provider_auth_id migration failed: {e}")
+    finally:
+        try:
+            conn.close()
+        except Exception:
+            pass


 def _migrate_add_model_type_column():
@@ -806,6 +887,7 @@ def _migrate_add_model_type_column():
    db_path = DATABASE_URL.replace("sqlite:///", "")
    if not os.path.exists(db_path):
        return
+    conn = None
    try:
        conn = sqlite3.connect(db_path)
        cursor = conn.execute("PRAGMA table_info(model_endpoints)")
@@ -814,9 +896,13 @@ def _migrate_add_model_type_column():
            conn.execute("ALTER TABLE model_endpoints ADD COLUMN model_type TEXT DEFAULT 'llm'")
            conn.commit()
            logging.getLogger(__name__).info("Migrated: added 'model_type' column to model_endpoints")
-        conn.close()
    except Exception as e:
        logging.getLogger(__name__).warning(f"model_type migration failed: {e}")
+    finally:
+        try:
+            conn.close()
+        except Exception:
+            pass

 def _migrate_add_model_endpoint_refresh_columns():
    """Add endpoint classification / refresh policy columns if missing."""
@@ -824,6 +910,7 @@ def _migrate_add_model_endpoint_refresh_columns():
    db_path = DATABASE_URL.replace("sqlite:///", "")
    if not os.path.exists(db_path):
        return
+    conn = None
    try:
        conn = sqlite3.connect(db_path)
        cursor = conn.execute("PRAGMA table_info(model_endpoints)")
@@ -837,9 +924,13 @@ def _migrate_add_model_endpoint_refresh_columns():
        if columns and "model_refresh_timeout" not in columns:
            conn.execute("ALTER TABLE model_endpoints ADD COLUMN model_refresh_timeout INTEGER")
        conn.commit()
-        conn.close()
    except Exception as e:
        logging.getLogger(__name__).warning(f"model_endpoints refresh-policy migration failed: {e}")
+    finally:
+        try:
+            conn.close()
+        except Exception:
+            pass

 def _migrate_add_task_run_model_column():
    """Add model column to task_runs if it doesn't exist (records which model ran)."""
@@ -847,6 +938,7 @@ def _migrate_add_task_run_model_column():
    db_path = DATABASE_URL.replace("sqlite:///", "")
    if not os.path.exists(db_path):
        return
+    conn = None
    try:
        conn = sqlite3.connect(db_path)
        cursor = conn.execute("PRAGMA table_info(task_runs)")
@@ -855,9 +947,13 @@ def _migrate_add_task_run_model_column():
            conn.execute("ALTER TABLE task_runs ADD COLUMN model TEXT")
            conn.commit()
            logging.getLogger(__name__).info("Migrated: added 'model' column to task_runs")
-        conn.close()
    except Exception as e:
        logging.getLogger(__name__).warning(f"task_runs model migration failed: {e}")
+    finally:
+        try:
+            conn.close()
+        except Exception:
+            pass

 def _migrate_add_supports_tools_column():
    """Add supports_tools column to model_endpoints if it doesn't exist."""
@@ -865,6 +961,7 @@ def _migrate_add_supports_tools_column():
    db_path = DATABASE_URL.replace("sqlite:///", "")
    if not os.path.exists(db_path):
        return
+    conn = None
    try:
        conn = sqlite3.connect(db_path)
        cursor = conn.execute("PRAGMA table_info(model_endpoints)")
@@ -873,9 +970,13 @@ def _migrate_add_supports_tools_column():
            conn.execute("ALTER TABLE model_endpoints ADD COLUMN supports_tools BOOLEAN")
            conn.commit()
            logging.getLogger(__name__).info("Migrated: added 'supports_tools' column to model_endpoints")
-        conn.close()
    except Exception as e:
        logging.getLogger(__name__).warning(f"supports_tools migration failed: {e}")
+    finally:
+        try:
+            conn.close()
+        except Exception:
+            pass


 def _migrate_add_cached_models_column():
@@ -884,6 +985,7 @@ def _migrate_add_cached_models_column():
    db_path = DATABASE_URL.replace("sqlite:///", "")
    if not os.path.exists(db_path):
        return
+    conn = None
    try:
        conn = sqlite3.connect(db_path)
        cursor = conn.execute("PRAGMA table_info(model_endpoints)")
@@ -891,9 +993,13 @@ def _migrate_add_cached_models_column():
        if columns and "cached_models" not in columns:
            conn.execute("ALTER TABLE model_endpoints ADD COLUMN cached_models TEXT")
            conn.commit()
-        conn.close()
    except Exception as e:
        logging.getLogger(__name__).warning(f"cached_models migration failed: {e}")
+    finally:
+        try:
+            conn.close()
+        except Exception:
+            pass

 def _migrate_add_pinned_models_column():
    """Add pinned_models column to model_endpoints if it doesn't exist."""
@@ -901,6 +1007,7 @@ def _migrate_add_pinned_models_column():
    db_path = DATABASE_URL.replace("sqlite:///", "")
    if not os.path.exists(db_path):
        return
+    conn = None
    try:
        conn = sqlite3.connect(db_path)
        cursor = conn.execute("PRAGMA table_info(model_endpoints)")
@@ -909,9 +1016,13 @@ def _migrate_add_pinned_models_column():
            conn.execute("ALTER TABLE model_endpoints ADD COLUMN pinned_models TEXT")
            conn.commit()
            logging.getLogger(__name__).info("Migrated: added 'pinned_models' column to model_endpoints")
-        conn.close()
    except Exception as e:
        logging.getLogger(__name__).warning(f"pinned_models migration failed: {e}")
+    finally:
+        try:
+            conn.close()
+        except Exception:
+            pass

 def _migrate_add_notes_sort_order():
    """Add sort_order, image_url, repeat columns to notes if they don't exist."""
@@ -919,6 +1030,7 @@ def _migrate_add_notes_sort_order():
    db_path = DATABASE_URL.replace("sqlite:///", "")
    if not os.path.exists(db_path):
        return
+    conn = None
    try:
        conn = sqlite3.connect(db_path)
        cursor = conn.execute("PRAGMA table_info(notes)")
@@ -936,9 +1048,13 @@ def _migrate_add_notes_sort_order():
        if columns and "agent_session_id" not in columns:
            conn.execute("ALTER TABLE notes ADD COLUMN agent_session_id TEXT")
        conn.commit()
-        conn.close()
    except Exception as e:
        logging.getLogger(__name__).warning(f"notes migration failed: {e}")
+    finally:
+        try:
+            conn.close()
+        except Exception:
+            pass

 def _migrate_add_mode_column():
    """Add mode column to sessions table if it doesn't exist."""
@@ -946,6 +1062,7 @@ def _migrate_add_mode_column():
    db_path = DATABASE_URL.replace("sqlite:///", "")
    if not os.path.exists(db_path):
        return
+    conn = None
    try:
        conn = sqlite3.connect(db_path)
        cursor = conn.execute("PRAGMA table_info(sessions)")
@@ -954,9 +1071,13 @@ def _migrate_add_mode_column():
            conn.execute("ALTER TABLE sessions ADD COLUMN mode TEXT")
            conn.commit()
            logging.getLogger(__name__).info("Migrated: added 'mode' column to sessions")
-        conn.close()
    except Exception as e:
        logging.getLogger(__name__).warning(f"Migration check for mode failed: {e}")
+    finally:
+        try:
+            conn.close()
+        except Exception:
+            pass

 def _migrate_add_folder_column():
    """Add folder column to sessions table if it doesn't exist."""
@@ -964,6 +1085,7 @@ def _migrate_add_folder_column():
    db_path = DATABASE_URL.replace("sqlite:///", "")
    if not os.path.exists(db_path):
        return
+    conn = None
    try:
        conn = sqlite3.connect(db_path)
        cursor = conn.execute("PRAGMA table_info(sessions)")
@@ -972,9 +1094,13 @@ def _migrate_add_folder_column():
            conn.execute("ALTER TABLE sessions ADD COLUMN folder TEXT")
            conn.commit()
            logging.getLogger(__name__).info("Migrated: added 'folder' column to sessions")
-        conn.close()
    except Exception as e:
        logging.getLogger(__name__).warning(f"Migration check for folder failed: {e}")
+    finally:
+        try:
+            conn.close()
+        except Exception:
+            pass

 def _migrate_add_token_columns():
    """Add cumulative token tracking columns to sessions table."""
@@ -982,6 +1108,7 @@ def _migrate_add_token_columns():
    db_path = DATABASE_URL.replace("sqlite:///", "")
    if not os.path.exists(db_path):
        return
+    conn = None
    try:
        conn = sqlite3.connect(db_path)
        cursor = conn.execute("PRAGMA table_info(sessions)")
@@ -991,9 +1118,13 @@ def _migrate_add_token_columns():
            conn.execute("ALTER TABLE sessions ADD COLUMN total_output_tokens INTEGER DEFAULT 0")
            conn.commit()
            logging.getLogger(__name__).info("Migrated: added token tracking columns to sessions")
-        conn.close()
    except Exception as e:
        logging.getLogger(__name__).warning(f"Migration check for token columns failed: {e}")
+    finally:
+        try:
+            conn.close()
+        except Exception:
+            pass

 def _migrate_add_owner_to_table(table_name: str, index_name: str):
    """Generic helper: add owner TEXT column + index to a table if missing."""
@@ -1001,6 +1132,7 @@ def _migrate_add_owner_to_table(table_name: str, index_name: str):
    db_path = DATABASE_URL.replace("sqlite:///", "")
    if not os.path.exists(db_path):
        return
+    conn = None
    try:
        conn = sqlite3.connect(db_path)
        cursor = conn.execute(f"PRAGMA table_info({table_name})")
@@ -1010,9 +1142,13 @@ def _migrate_add_owner_to_table(table_name: str, index_name: str):
            conn.execute(f"CREATE INDEX IF NOT EXISTS {index_name} ON {table_name}(owner)")
            conn.commit()
            logging.getLogger(__name__).info(f"Migrated: added 'owner' column to {table_name}")
-        conn.close()
    except Exception as e:
        logging.getLogger(__name__).warning(f"Migration owner column for {table_name} failed: {e}")
+    finally:
+        try:
+            conn.close()
+        except Exception:
+            pass

 def _migrate_add_multiuser_owner_columns():
    """Add owner column to memories, gallery_images, user_tools, comparisons."""
@@ -1037,6 +1173,7 @@ def _migrate_add_api_token_scopes_column():
    db_path = DATABASE_URL.replace("sqlite:///", "")
    if not os.path.exists(db_path):
        return
+    conn = None
    try:
        conn = sqlite3.connect(db_path)
        columns = [row[1] for row in conn.execute("PRAGMA table_info(api_tokens)").fetchall()]
@@ -1045,9 +1182,13 @@ def _migrate_add_api_token_scopes_column():
            conn.execute("UPDATE api_tokens SET scopes = 'chat' WHERE scopes IS NULL OR scopes = ''")
            conn.commit()
            logging.getLogger(__name__).info("Migrated: added scopes column to api_tokens")
-        conn.close()
    except Exception as e:
        logging.getLogger(__name__).warning(f"api_tokens.scopes migration failed: {e}")
+    finally:
+        try:
+            conn.close()
+        except Exception:
+            pass

 def _migrate_assign_legacy_owner():
    """Assign all null-owner data to the first (admin) user.
@@ -1065,7 +1206,7 @@ def _migrate_assign_legacy_owner():
    # fell through to "first user" every time.
    auth_path = os.path.join(os.path.dirname(DATABASE_URL.replace("sqlite:///", "")), "auth.json")
    if not os.path.isabs(auth_path):
-        auth_path = os.path.join("data", "auth.json")
+        auth_path = AUTH_FILE
    admin_user = None
    try:
        with open(auth_path, "r", encoding="utf-8") as f:
@@ -1089,6 +1230,7 @@ def _migrate_assign_legacy_owner():
        return

    logger = logging.getLogger(__name__)
+    conn = None
    try:
        conn = sqlite3.connect(db_path)
        # Every table with an `owner` column. New tables added later will be
@@ -1113,12 +1255,16 @@ def _migrate_assign_legacy_owner():
            except Exception as e:
                logger.warning(f"Legacy owner assignment for {table} failed: {e}")
        conn.commit()
-        conn.close()
    except Exception as e:
        logger.warning(f"Legacy owner migration failed: {e}")
+    finally:
+        try:
+            conn.close()
+        except Exception:
+            pass

    # Also migrate memory.json
-    mem_path = os.path.join("data", "memory.json")
+    mem_path = MEMORY_FILE
    try:
        if os.path.exists(mem_path):
            with open(mem_path, "r", encoding="utf-8") as f:
@@ -1136,7 +1282,7 @@ def _migrate_assign_legacy_owner():
        logger.warning(f"memory.json legacy migration failed: {e}")

    # Also migrate user_prefs.json to per-user format
-    prefs_path = os.path.join("data", "user_prefs.json")
+    prefs_path = USER_PREFS_FILE
    try:
        if os.path.exists(prefs_path):
            with open(prefs_path, "r", encoding="utf-8") as f:
@@ -1288,6 +1434,25 @@ def _migrate_add_task_automation_columns():
    except Exception as e:
        logging.getLogger(__name__).warning(f"task automation migration: {e}")

+def _migrate_add_email_oauth_columns():
+    """Add Google OAuth and display_name columns to email_accounts if missing."""
+    try:
+        with engine.connect() as conn:
+            cols = [r[1] for r in conn.execute(text("PRAGMA table_info(email_accounts)"))]
+            for col, typedef in [
+                ("oauth_provider",      "TEXT"),
+                ("oauth_access_token",  "TEXT"),
+                ("oauth_refresh_token", "TEXT"),
+                ("oauth_token_expiry",  "TEXT"),
+                ("display_name",        "TEXT"),
+            ]:
+                if col not in cols:
+                    conn.execute(text(f"ALTER TABLE email_accounts ADD COLUMN {col} {typedef}"))
+            conn.commit()
+    except Exception as e:
+        logging.getLogger(__name__).warning(f"email oauth columns migration: {e}")
+
+
 def _migrate_add_oauth_config():
    """Add oauth_config column to mcp_servers table if missing."""
    try:
@@ -1458,7 +1623,12 @@ class CalendarCal(TimestampMixin, Base):
    owner = Column(String, nullable=True, index=True)
    name  = Column(String, nullable=False)
    color = Column(String, default="#5b8abf")
-    source = Column(String, default="local")  # "local" or "timetree"
+    source = Column(String, default="local")  # "local" or "caldav"
+    # UUID of the CalDAV account in user prefs that owns this calendar.
+    # NULL for local calendars and for CalDAV calendars created before
+    # multi-account support was added (treated as "use any configured account").
+    account_id = Column(String, nullable=True, index=True)
+    caldav_base_url = Column(String, nullable=True)

    events = relationship("CalendarEvent", back_populates="calendar", cascade="all, delete-orphan")

@@ -1489,10 +1659,27 @@ class CalendarEvent(TimestampMixin, Base):
    # vanishes upstream). NULL/local = created locally (agent, email triage, or
    # a UI event whose write-back failed) and must NOT be pruned by the sync.
    origin      = Column(String, nullable=True, index=True)
+    remote_href = Column(String, nullable=True)        # CalDAV object URL for updates/deletes
+    remote_etag = Column(String, nullable=True)        # Last seen CalDAV ETag, when available
+    caldav_sync_pending = Column(String, nullable=True) # create | update | delete retry marker

    calendar = relationship("CalendarCal", back_populates="events")


+class CalendarDeletedEvent(TimestampMixin, Base):
+    """Hidden CalDAV delete tombstone retained until remote delete succeeds."""
+    __tablename__ = "caldav_deleted_events"
+
+    uid = Column(String, primary_key=True, index=True)
+    owner = Column(String, nullable=True, index=True)
+    calendar_id = Column(String, nullable=True, index=True)
+    remote_href = Column(String, nullable=True)
+    remote_etag = Column(String, nullable=True)
+    caldav_base_url = Column(String, nullable=True)
+    summary = Column(String, nullable=True)
+    last_error = Column(Text, nullable=True)
+
+
 class Integration(TimestampMixin, Base):
    """An external service connection (email, RSS, webhook, etc.)."""
    __tablename__ = "integrations"
@@ -1526,7 +1713,7 @@ def _migrate_seed_email_account():
        import json as _json
        import uuid as _uuid
        from pathlib import Path
-        settings_file = Path("data/settings.json")
+        settings_file = Path(SETTINGS_FILE)
        if not settings_file.exists():
            return
        try:
@@ -1594,6 +1781,7 @@ def init_db():
    _migrate_add_model_type_column()
    _migrate_add_model_endpoint_refresh_columns()
    _migrate_add_model_endpoint_owner_column()
+    _migrate_add_provider_auth_id_column()
    _migrate_add_supports_tools_column()
    _migrate_add_task_run_model_column()
    _migrate_add_owner_column()
@@ -1609,6 +1797,7 @@ def init_db():
    _migrate_add_tidy_verdict()
    _migrate_add_doc_source_email_cols()
    _migrate_add_oauth_config()
+    _migrate_add_email_oauth_columns()
    _migrate_add_task_automation_columns()
    _migrate_add_disabled_tools()
    _migrate_add_mcp_oauth_tokens_column()
@@ -1622,9 +1811,106 @@ def init_db():
    _migrate_add_calendar_metadata()
    _migrate_add_calendar_is_utc()
    _migrate_add_calendar_origin()
+    _migrate_add_calendar_account_id()
+    _migrate_add_caldav_sync_columns()
+    _migrate_chat_messages_fts()
    _migrate_encrypt_email_passwords()
    _migrate_encrypt_signatures()
    _migrate_encrypt_endpoint_keys()
+    _migrate_backfill_task_folders()
+
+
+def _migrate_backfill_task_folders():
+    """Backfill folder='Tasks' on pre-existing task/research sessions.
+
+    Sessions created by the task scheduler (LLM tasks, action tasks, research
+    runs) now set folder='Tasks' at creation time.  This migration tags any
+    older sessions that predate that assignment.  Idempotent — only touches
+    rows where folder is NULL or empty and the title matches known prefixes.
+    """
+    try:
+        with engine.connect() as conn:
+            cols = [r[1] for r in conn.execute(text("PRAGMA table_info(sessions)"))]
+            if "folder" not in cols:
+                return
+            res = conn.execute(text(
+                "UPDATE sessions SET folder = 'Tasks' "
+                "WHERE (folder IS NULL OR folder = '') "
+                "AND (name LIKE '[Task] %' OR name LIKE '[Research] %')"
+            ))
+            conn.commit()
+            if res.rowcount:
+                logging.getLogger(__name__).info(
+                    f"Backfilled folder='Tasks' on {res.rowcount} task/research sessions")
+    except Exception as e:
+        logging.getLogger(__name__).warning(f"task folder backfill: {e}")
+
+
+def _migrate_chat_messages_fts():
+    """Create and backfill the session transcript FTS index for SQLite."""
+    if not DATABASE_URL.startswith("sqlite"):
+        return
+
+    db_path = DATABASE_URL.replace("sqlite:///", "")
+    if db_path == ":memory:":
+        return
+    conn = None
+    try:
+        conn = sqlite3.connect(db_path)
+        try:
+            conn.execute("CREATE VIRTUAL TABLE IF NOT EXISTS temp._odysseus_fts5_probe USING fts5(content)")
+            conn.execute("DROP TABLE IF EXISTS temp._odysseus_fts5_probe")
+        except Exception as e:
+            logging.getLogger(__name__).warning(f"chat_messages FTS migration skipped; FTS5 unavailable: {e}")
+            return
+
+        conn.executescript(
+            """
+            CREATE VIRTUAL TABLE IF NOT EXISTS chat_messages_fts USING fts5(
+                content,
+                message_id UNINDEXED,
+                session_id UNINDEXED,
+                role UNINDEXED
+            );
+
+            CREATE TRIGGER IF NOT EXISTS chat_messages_fts_ai
+            AFTER INSERT ON chat_messages BEGIN
+                INSERT INTO chat_messages_fts(content, message_id, session_id, role)
+                VALUES (COALESCE(new.content, ''), new.id, new.session_id, new.role);
+            END;
+
+            CREATE TRIGGER IF NOT EXISTS chat_messages_fts_ad
+            AFTER DELETE ON chat_messages BEGIN
+                DELETE FROM chat_messages_fts WHERE message_id = old.id;
+            END;
+
+            CREATE TRIGGER IF NOT EXISTS chat_messages_fts_au
+            AFTER UPDATE ON chat_messages BEGIN
+                DELETE FROM chat_messages_fts WHERE message_id = old.id;
+                INSERT INTO chat_messages_fts(content, message_id, session_id, role)
+                VALUES (COALESCE(new.content, ''), new.id, new.session_id, new.role);
+            END;
+            """
+        )
+        conn.execute(
+            """
+            INSERT INTO chat_messages_fts(content, message_id, session_id, role)
+            SELECT COALESCE(cm.content, ''), cm.id, cm.session_id, cm.role
+            FROM chat_messages cm
+            WHERE NOT EXISTS (
+                SELECT 1 FROM chat_messages_fts fts
+                WHERE fts.message_id = cm.id
+            )
+            """
+        )
+        conn.commit()
+    except Exception as e:
+        logging.getLogger(__name__).warning(f"chat_messages FTS migration failed: {e}")
+    finally:
+        try:
+            conn.close()
+        except Exception:
+            pass


 def _migrate_add_email_smtp_security():
@@ -1633,6 +1919,7 @@ def _migrate_add_email_smtp_security():
    db_path = DATABASE_URL.replace("sqlite:///", "")
    if not os.path.exists(db_path):
        return
+    conn = None
    try:
        conn = sqlite3.connect(db_path)
        cursor = conn.execute("PRAGMA table_info(email_accounts)")
@@ -1648,9 +1935,13 @@ def _migrate_add_email_smtp_security():
            )
            conn.commit()
            logging.getLogger(__name__).info("Migrated: added smtp_security column to email_accounts")
-        conn.close()
    except Exception as e:
        logging.getLogger(__name__).warning(f"smtp_security migration skipped: {e}")
+    finally:
+        try:
+            conn.close()
+        except Exception:
+            pass


 def _migrate_encrypt_endpoint_keys():
@@ -1751,6 +2042,7 @@ def _migrate_add_calendar_is_utc():
    db_path = DATABASE_URL.replace("sqlite:///", "")
    if not os.path.exists(db_path):
        return
+    conn = None
    try:
        conn = sqlite3.connect(db_path)
        cursor = conn.execute("PRAGMA table_info(calendar_events)")
@@ -1759,9 +2051,13 @@ def _migrate_add_calendar_is_utc():
            conn.execute("ALTER TABLE calendar_events ADD COLUMN is_utc BOOLEAN DEFAULT 0 NOT NULL")
            conn.commit()
            logging.getLogger(__name__).info("Migrated: added 'is_utc' column to calendar_events")
-        conn.close()
    except Exception as e:
        logging.getLogger(__name__).warning(f"is_utc migration failed: {e}")
+    finally:
+        try:
+            conn.close()
+        except Exception:
+            pass


 def _migrate_add_calendar_origin():
@@ -1772,6 +2068,7 @@ def _migrate_add_calendar_origin():
    db_path = DATABASE_URL.replace("sqlite:///", "")
    if not os.path.exists(db_path):
        return
+    conn = None
    try:
        conn = sqlite3.connect(db_path)
        cursor = conn.execute("PRAGMA table_info(calendar_events)")
@@ -1781,9 +2078,64 @@ def _migrate_add_calendar_origin():
            conn.execute("CREATE INDEX IF NOT EXISTS ix_calendar_events_origin ON calendar_events(origin)")
            conn.commit()
            logging.getLogger(__name__).info("Migrated: added 'origin' column to calendar_events")
-        conn.close()
    except Exception as e:
        logging.getLogger(__name__).warning(f"calendar_events.origin migration failed: {e}")
+    finally:
+        try:
+            conn.close()
+        except Exception:
+            pass
+
+
+def _migrate_add_calendar_account_id():
+    """Add `account_id` to calendars so each CalDAV-backed calendar knows which
+    credential set (from caldav_accounts in user prefs) owns it. Idempotent."""
+    import sqlite3
+    db_path = DATABASE_URL.replace("sqlite:///", "")
+    if not os.path.exists(db_path):
+        return
+    conn = None
+    try:
+        conn = sqlite3.connect(db_path)
+        cursor = conn.execute("PRAGMA table_info(calendars)")
+        columns = [row[1] for row in cursor.fetchall()]
+        if columns and "account_id" not in columns:
+            conn.execute("ALTER TABLE calendars ADD COLUMN account_id TEXT")
+            conn.execute("CREATE INDEX IF NOT EXISTS ix_calendars_account_id ON calendars(account_id)")
+            conn.commit()
+            logging.getLogger(__name__).info("Migrated: added 'account_id' column to calendars")
+    except Exception as e:
+        logging.getLogger(__name__).warning(f"calendars.account_id migration failed: {e}")
+    finally:
+        try:
+            conn.close()
+        except Exception:
+            pass
+
+
+def _migrate_add_caldav_sync_columns():
+    """Add remote CalDAV metadata used for bidirectional sync."""
+    import sqlite3
+    db_path = DATABASE_URL.replace("sqlite:///", "")
+    if not os.path.exists(db_path):
+        return
+    try:
+        conn = sqlite3.connect(db_path)
+        ev_columns = [row[1] for row in conn.execute("PRAGMA table_info(calendar_events)").fetchall()]
+        if ev_columns and "remote_href" not in ev_columns:
+            conn.execute("ALTER TABLE calendar_events ADD COLUMN remote_href TEXT")
+        if ev_columns and "remote_etag" not in ev_columns:
+            conn.execute("ALTER TABLE calendar_events ADD COLUMN remote_etag TEXT")
+        if ev_columns and "caldav_sync_pending" not in ev_columns:
+            conn.execute("ALTER TABLE calendar_events ADD COLUMN caldav_sync_pending TEXT")
+
+        cal_columns = [row[1] for row in conn.execute("PRAGMA table_info(calendars)").fetchall()]
+        if cal_columns and "caldav_base_url" not in cal_columns:
+            conn.execute("ALTER TABLE calendars ADD COLUMN caldav_base_url TEXT")
+        conn.commit()
+        conn.close()
+    except Exception as e:
+        logging.getLogger(__name__).warning(f"CalDAV sync metadata migration failed: {e}")


 def _migrate_add_calendar_metadata():
@@ -1792,6 +2144,7 @@ def _migrate_add_calendar_metadata():
    db_path = DATABASE_URL.replace("sqlite:///", "")
    if not os.path.exists(db_path):
        return
+    conn = None
    try:
        conn = sqlite3.connect(db_path)
        cursor = conn.execute("PRAGMA table_info(calendar_events)")
@@ -1803,9 +2156,13 @@ def _migrate_add_calendar_metadata():
        if columns and "last_pinged" not in columns:
            conn.execute("ALTER TABLE calendar_events ADD COLUMN last_pinged DATETIME")
        conn.commit()
-        conn.close()
    except Exception as e:
        logging.getLogger(__name__).warning(f"calendar_events migration failed: {e}")
+    finally:
+        try:
+            conn.close()
+        except Exception:
+            pass

 def get_db():
    """
@@ -17,6 +17,15 @@ INTERNAL_TOOL_TOKEN = os.environ.get("ODYSSEUS_INTERNAL_TOKEN") or secrets.token
 INTERNAL_TOOL_HEADER = "X-Odysseus-Internal-Token"


+def is_cors_preflight(method: str, headers) -> bool:
+    """True for a genuine CORS preflight: an OPTIONS request carrying the
+    Access-Control-Request-Method header. Such requests are credential-less by
+    design and must reach CORSMiddleware to be answered -- gating them on auth
+    401s the preflight and breaks every cross-origin browser/WebView client.
+    Pure so it can be unit-tested without standing up the app."""
+    return method == "OPTIONS" and "access-control-request-method" in headers
+
+
 def require_admin(request: Request):
    """Raise 403 if the current user isn't an admin.
    Allows access when auth is explicitly disabled, or when the request carries
@@ -58,11 +67,22 @@ class SecurityHeadersMiddleware(BaseHTTPMiddleware):

        # Tool render endpoints are served inside iframes — allow framing by self
        is_tool_render = path.startswith("/api/tools/") and path.endswith("/render")
+        # PDF previews are embedded by the in-app document library. Keep the
+        # exception route-scoped so normal app pages remain unframeable.
+        is_document_pdf_preview = path.startswith("/api/document/") and path.endswith("/render-pdf")
        # Visual report pages are self-contained HTML — need inline scripts + external images
        is_report = path.startswith("/api/research/report/")

        response.headers["X-Content-Type-Options"] = "nosniff"
        response.headers["Referrer-Policy"] = "no-referrer"
+        response.headers["Permissions-Policy"] = "camera=(), microphone=(self), geolocation=()"
+
+        is_https = (
+            request.url.scheme == "https"
+            or request.headers.get("X-Forwarded-Proto") == "https"
+        )
+        if is_https:
+            response.headers["Strict-Transport-Security"] = "max-age=31536000; includeSubDomains"

        if is_report:
            response.headers["Content-Security-Policy"] = (
@@ -79,6 +99,12 @@ class SecurityHeadersMiddleware(BaseHTTPMiddleware):
            # sandbox="allow-scripts" attribute provides isolation.
            # Don't overwrite the route's own restrictive CSP either.
            pass
+        elif is_document_pdf_preview:
+            response.headers["X-Frame-Options"] = "SAMEORIGIN"
+            response.headers["Content-Security-Policy"] = (
+                "default-src 'none'; "
+                "frame-ancestors 'self'"
+            )
        else:
            response.headers["X-Frame-Options"] = "DENY"
            # NOTE: `style-src 'unsafe-inline'` is intentionally retained.
@@ -11,14 +11,24 @@ from typing import Dict, List, Any, Optional, TYPE_CHECKING
 if TYPE_CHECKING:
    from .session_manager import SessionManager

-# Module-level session manager reference (set at app startup)
-_session_manager: Optional["SessionManager"] = None
+# Module-level session manager singleton (single source of truth)
+_SESSION_MANAGER_INSTANCE: Optional["SessionManager"] = None


-def set_session_manager(manager: "SessionManager"):
-    """Set the global session manager reference."""
-    global _session_manager
-    _session_manager = manager
+def set_session_manager_instance(manager: "SessionManager"):
+    """Set the global SessionManager singleton."""
+    global _SESSION_MANAGER_INSTANCE
+    _SESSION_MANAGER_INSTANCE = manager
+
+
+def get_session_manager_instance() -> Optional["SessionManager"]:
+    """Get the global SessionManager singleton."""
+    return _SESSION_MANAGER_INSTANCE
+
+
+# Keep legacy name for backward compatibility
+set_session_manager = set_session_manager_instance
+get_session_manager = get_session_manager_instance


@dataclass
@@ -42,7 +52,17 @@ class ChatMessage:

@dataclass
 class Session:
-    """A chat session — pure data container."""
+    """A chat session — pure data container.
+
+    ``.history`` is the authoritative mutable message list. Callers may
+    read, append, pop, or reassign it directly — these changes take
+    effect immediately. ``_history`` remains a compatibility alias that
+    always resolves to the authoritative ``history`` list.
+
+    Each session gets its own unique history list at construction time
+    (the dataclass default is never shared between instances).
+    """
+
    id: str
    name: str
    endpoint_url: str
@@ -56,24 +76,35 @@ class Session:
    message_count: int = 0

    def __post_init__(self):
-        if self.history is None:
-            self.history = []
        if self.headers is None:
            self.headers = {}
+        # Ensure each session gets its OWN list (not the shared dataclass default)
+        if self.history is None:
+            self.history = []
+
+    @property
+    def _history(self) -> List[ChatMessage]:
+        """Compatibility alias for callers that still reference ``_history``."""
+        return self.history
+
+    @_history.setter
+    def _history(self, messages: List[ChatMessage]):
+        self.history = messages

    def add_message(self, message: ChatMessage):
        """
        Add a message to this session.

-        Delegates to SessionManager for persistence if available,
-        otherwise just appends to history.
+        Appends to the authoritative history list and increments
+        message_count. Delegates to SessionManager for persistence
+        if available.
        """
        self.history.append(message)
        self.message_count = len(self.history)

        # Delegate to session manager for persistence
-        if _session_manager:
-            _session_manager._persist_message(self.id, message)
+        if _SESSION_MANAGER_INSTANCE:
+            _SESSION_MANAGER_INSTANCE._persist_message(self.id, message)

    def get_context_messages(self) -> List[Dict[str, Any]]:
        """Get messages in format for LLM API.
@@ -94,3 +125,7 @@ class Session:
    def get(self, key: str, default=None):
        """Dict-like access for compatibility."""
        return getattr(self, key, default)
+
+    def __getitem__(self, key: str):
+        """Allow session['field'] syntax."""
+        return getattr(self, key)
@@ -18,10 +18,22 @@ import ntpath
 import shutil
 import subprocess
 from pathlib import Path
+import sys
 from typing import List, Optional
+import platform

 IS_WINDOWS = os.name == "nt"
 IS_POSIX = not IS_WINDOWS
+# Allows APFEL support and ARM-native binary recommendations on Apple Silicon Macs.
+IS_APPLE_SILICON = (
+    IS_POSIX
+    and platform.system() == "Darwin"
+    and platform.machine().lower()
+    in {
+        "arm64",
+        "aarch64",
+    }
+)


 # ── File permissions ────────────────────────────────────────────────────────
@@ -53,9 +65,8 @@ def detached_popen_kwargs() -> dict:
    and is detached from any console.
    """
    if IS_WINDOWS:
-        flags = (
-            getattr(subprocess, "CREATE_NEW_PROCESS_GROUP", 0x00000200)
-            | getattr(subprocess, "DETACHED_PROCESS", 0x00000008)
+        flags = getattr(subprocess, "CREATE_NEW_PROCESS_GROUP", 0x00000200) | getattr(
+            subprocess, "DETACHED_PROCESS", 0x00000008
        )
        return {"creationflags": flags}
    return {"start_new_session": True}
@@ -150,6 +161,29 @@ _WINDOWS_BASH_RELATIVE_PATHS = (
    ("usr", "bin", "bash.exe"),
 )

+# Paths to add to the remote SSH probe command to find tools like nvidia-smi that may not be on PATH.
+_SSH_PATH_MEMBERS = (
+    "/usr/bin",
+    "/usr/local/bin",
+    "/usr/local/cuda/bin",
+    "/usr/lib/wsl/lib"
+)
+# Fallback locations for nvidia-smi on WSL and other Linux distros where it may not be on PATH.
+NVIDIA_PATH_CANDIDATES = (
+    "/usr/bin/nvidia-smi",
+    "/usr/local/bin/nvidia-smi",
+    "/usr/local/cuda/bin/nvidia-smi",
+    "/usr/lib/wsl/lib/nvidia-smi",
+)
+
+
+def _ssh_path_override() -> str:
+    """Build the PATH export snippet used for remote SSH shell probes."""
+    return f"export PATH=\"$PATH:{':'.join(_SSH_PATH_MEMBERS)}\"; "
+
+
+SSH_PATH_OVERRIDE = _ssh_path_override()
+

 def _windows_bash_fallbacks() -> List[str]:
    roots: List[str] = []
@@ -157,6 +191,8 @@ def _windows_bash_fallbacks() -> List[str]:
        base = os.environ.get(env_name)
        if base:
            roots.append(ntpath.join(base, "Git"))
+            if env_name == "LocalAppData":
+                roots.append(ntpath.join(base, "Programs", "Git"))
    roots.extend(_WINDOWS_BASH_DEFAULT_ROOTS)

    paths: List[str] = []
@@ -180,6 +216,21 @@ def _is_windows_bash_stub(path: str) -> bool:
    )


+def git_bash_path(path: str | Path) -> str:
+    """Convert a path to POSIX style suitable for Git Bash on Windows.
+
+    Transforms drive letters (e.g., 'C:\\path') to POSIX '/c/path',
+    and uses forward slashes.
+    """
+    p = Path(path)
+    p_str = p.as_posix()
+    if IS_WINDOWS and len(p_str) >= 2 and p_str[1] == ":":
+        drive = p_str[0].lower()
+        return f"/{drive}{p_str[2:]}"
+    return p_str
+
+
+
 def find_bash() -> Optional[str]:
    """Locate a real ``bash`` interpreter, or None.

@@ -242,3 +293,160 @@ def run_script_argv(script_path) -> List[str]:
        comspec = os.environ.get("ComSpec", "cmd.exe")
        return [comspec, "/c", str(script_path)]
    return ["sh", str(script_path)]
+
+
+def is_wsl() -> bool:
+    """True if running inside Windows Subsystem for Linux (WSL)."""
+    import sys
+    if sys.platform.startswith("linux") or os.name == "posix":
+        try:
+            with open("/proc/version", "r", encoding="utf-8", errors="ignore") as f:
+                if "microsoft" in f.read().lower():
+                    return True
+        except Exception:
+            pass
+    return False
+
+
+def translate_path(path_str: str) -> str:
+    """Translate a path (possibly a Windows path) to the current OS format.
+
+    Particularly handles Windows paths (e.g. C:\\foo or C:/foo) when running
+    under WSL, translating them to /mnt/c/foo.
+    Also handles standard path normalization to avoid string breakages.
+    """
+    if not path_str:
+        return path_str
+
+    if is_wsl():
+        path_str = path_str.replace("\\", "/")
+        import re
+        m = re.match(r"^([a-zA-Z]):(.*)", path_str)
+        if m:
+            drive = m.group(1).lower()
+            rest = m.group(2)
+            if not rest.startswith("/"):
+                rest = "/" + rest
+            return f"/mnt/{drive}{rest}"
+
+    try:
+        return str(Path(path_str).resolve())
+    except Exception:
+        return path_str
+
+
+def get_wsl_windows_user_profile() -> Optional[str]:
+    """Retrieve the Windows host User Profile path from inside WSL."""
+    if not is_wsl():
+        return None
+    try:
+        r = run_wsl_windows_powershell("Write-Output $env:USERPROFILE", timeout=5)
+        if r.returncode == 0 and r.stdout.strip():
+            return translate_path(r.stdout.strip())
+    except Exception:
+        pass
+
+    try:
+        users_dir = "/mnt/c/Users"
+        if os.path.isdir(users_dir):
+            for entry in os.listdir(users_dir):
+                if entry not in ("All Users", "Default", "Default User", "desktop.ini", "Public"):
+                    path = os.path.join(users_dir, entry)
+                    if os.path.isdir(path):
+                        return path
+    except Exception:
+        pass
+    return None
+
+
+def _ssh_exec_argv(
+    remote: str,
+    ssh_port: str | None,
+    *,
+    remote_cmd: str | None = None,
+    connect_timeout: int | None = None,
+    strict_host_key_checking: bool | None = None,
+) -> list[str]:
+    """Build a consistent ssh argv for remote command execution."""
+    remote_value = str(remote or "").strip()
+    remote_host = remote_value.rsplit("@", 1)[-1]
+    if not remote_value or remote_value.startswith("-") or not remote_host or remote_host.startswith("-"):
+        raise ValueError("Invalid SSH remote host")
+    argv = ["ssh"]
+    if connect_timeout is not None:
+        argv.extend(["-o", f"ConnectTimeout={int(connect_timeout)}"])
+    if strict_host_key_checking is not None:
+        argv.extend(
+            [
+                "-o",
+                "StrictHostKeyChecking=yes"
+                if strict_host_key_checking
+                else "StrictHostKeyChecking=no",
+            ]
+        )
+    if ssh_port and ssh_port != "22":
+        argv.extend(["-p", str(ssh_port)])
+    argv.append(remote)
+    if remote_cmd is not None:
+        argv.append(remote_cmd)
+    return argv
+
+
+def run_ssh_command(
+    remote: str,
+    ssh_port: str | None,
+    remote_cmd: str,
+    *,
+    timeout: float,
+    connect_timeout: int | None = None,
+    strict_host_key_checking: bool | None = None,
+    text: bool = True,
+) -> subprocess.CompletedProcess:
+    """Run an ssh command with centralized timeout and stderr/stdout capture."""
+    return subprocess.run(
+        _ssh_exec_argv(
+            remote,
+            ssh_port,
+            remote_cmd=remote_cmd,
+            connect_timeout=connect_timeout,
+            strict_host_key_checking=strict_host_key_checking,
+        ),
+        timeout=timeout,
+        capture_output=True,
+        text=text,
+    )
+
+
+def _windows_powershell_argv(
+    command: str,
+    *,
+    no_profile: bool = True,
+    non_interactive: bool = True,
+) -> List[str]:
+    argv: List[str] = ["powershell.exe"]
+    if no_profile:
+        argv.append("-NoProfile")
+    if non_interactive:
+        argv.append("-NonInteractive")
+    argv.extend(["-Command", command])
+    return argv
+
+
+def run_wsl_windows_powershell(
+    command: str,
+    *,
+    timeout: float = 5,
+) -> subprocess.CompletedProcess[str]:
+    """Run a PowerShell command on the Windows host from WSL.
+
+    Raises ``RuntimeError`` when called outside WSL.
+    """
+
+    if not is_wsl():
+        raise RuntimeError("run_wsl_windows_powershell is only supported in WSL")
+    return subprocess.run(
+        _windows_powershell_argv(command),
+        capture_output=True,
+        text=True,
+        timeout=timeout,
+    )
@@ -14,9 +14,12 @@ import logging
 from datetime import datetime, timezone, timedelta
 from typing import Dict, Optional

-from .database import Session as DbSession, ChatMessage as DbChatMessage, Document as DbDocument, SessionLocal
+from .database import Session as DbSession, ChatMessage as DbChatMessage, Document as DbDocument, SessionLocal, utcnow_naive
 from .models import Session, ChatMessage

+# Re-export singleton accessors from models for convenience
+from .models import set_session_manager_instance, get_session_manager_instance
+
 logger = logging.getLogger(__name__)


@@ -188,12 +191,17 @@ class SessionManager:
        """
        Add a message to a session and persist to database.

+        Updates the authoritative history list and persists through this
+        manager directly so tests and temporary managers do not depend on the
+        process-wide session-manager singleton.
+
        Args:
            session_id: Session ID
            message: ChatMessage to add
        """
        session = self.get_session(session_id)
        session.history.append(message)
+        session._history = session.history
        session.message_count = len(session.history)

        self._persist_message(session_id, message)
@@ -232,7 +240,10 @@ class SessionManager:
            )
            db.add(db_message)

-            db_session.message_count = len(self.sessions.get(session_id, {}).history) if session_id in self.sessions else 0
+            if session_id in self.sessions:
+                db_session.message_count = len(self.sessions[session_id].history)
+            else:
+                db_session.message_count = 0
            _now = datetime.now(timezone.utc)
            db_session.last_accessed = _now
            # Clean "last conversation" timestamp — only bumped here on a
@@ -283,6 +294,7 @@ class SessionManager:

            # Update in-memory
            session.history = session.history[:keep_count]
+            session._history = session.history

            logger.info(f"Truncated session {session_id} to {keep_count} messages")
            return True
@@ -333,6 +345,7 @@ class SessionManager:

            db.commit()
            session.history = list(messages)
+            session._history = session.history
            session.message_count = len(messages)
            logger.info("Replaced session %s history with %d messages", session_id, len(messages))
            return True
@@ -608,24 +621,52 @@ class SessionManager:
    def save_sessions(self):
        """No-op for DB compatibility."""

+    def ensure_task_session(self, session_id: str, name: str, endpoint_url: str, model: str, owner: str = None, task: object = None) -> Session:
+        """Create a task session if it doesn't exist, or return the existing one.
+
+        Unlike create_session, this checks the cache first and does NOT
+        overwrite an existing in-memory session. The task scheduler must
+        use this instead of direct dict assignment.
+        """
+        if session_id in self.sessions:
+            return self.sessions[session_id]
+
+        session = self.create_session(session_id, name, endpoint_url, model, owner=owner)
+        if task is not None:
+            task.session_id = session_id
+        return session
+
    # ------------------------------------------------------------------
    # Cleanup
    # ------------------------------------------------------------------

-    def cleanup_empty_sessions(self, auto_archive_days: int = 30) -> dict:
-        """Clean up empty and old sessions."""
+    def cleanup_empty_sessions(self, auto_archive_days: int = 30, min_age_hours: int = 1) -> dict:
+        """Clean up empty and old sessions.
+
+        Args:
+            auto_archive_days: Age in days before non-important sessions are archived.
+            min_age_hours: Minimum age in hours before an empty session can be deleted.
+                          Prevents deleting sessions that were just created.
+        """
        db = SessionLocal()
        stats = {'deleted_empty': 0, 'archived_old': 0, 'total_checked': 0}

        try:
            all_sessions = db.query(DbSession).all()
-            cutoff_date = datetime.now(timezone.utc) - timedelta(days=auto_archive_days)
+            cutoff_date = utcnow_naive() - timedelta(days=auto_archive_days)
+            min_age = utcnow_naive() - timedelta(hours=min_age_hours)

            for db_session in all_sessions:
                stats['total_checked'] += 1

-                # Delete empty sessions
+                # Delete empty sessions only if older than min_age_hours
                if db_session.message_count == 0:
+                    if db_session.created_at is not None:
+                        created = db_session.created_at
+                        if created.tzinfo is None:
+                            created = created.replace(tzinfo=timezone.utc)
+                        if created > min_age:
+                            continue  # Too young to delete
                    if db_session.id in self.sessions:
                        del self.sessions[db_session.id]
                    db.delete(db_session)
@@ -16,18 +16,18 @@ services:
    ports:
      - "${APP_BIND:-127.0.0.1}:${APP_PORT:-7000}:7000"
    volumes:
-      - ./data:/app/data:z
-      - ./logs:/app/logs:z
+      - ${APP_DATA_DIR:-./data}:/app/data:z
+      - ${APP_LOGS_DIR:-./logs}:/app/logs:z
      # Cookbook remote-server SSH identity. Odysseus can generate a key here;
      # add the shown public key to each remote server's authorized_keys.
-      - ./data/ssh:/app/.ssh:z
+      - ${APP_DATA_DIR:-./data}/ssh:/app/.ssh:z
      # Cookbook local model cache. Inside Docker, "Local" means the Odysseus
      # container, so persist its HuggingFace cache under ./data/huggingface.
-      - ./data/huggingface:/app/.cache/huggingface:z
+      - ${APP_DATA_DIR:-./data}/huggingface:/app/.cache/huggingface:z
      # Cookbook-installed Python CLIs/packages (vLLM, llama-cpp-python, etc.)
      # land under /app/.local for the odysseus user. Persist them so a
      # container recreate does not silently remove installed serve engines.
-      - ./data/local:/app/.local:z
+      - ${APP_DATA_DIR:-./data}/local:/app/.local:z
    extra_hosts:
      # Lets the container reach local services on the Docker host, including
      # Ollama at http://host.docker.internal:11434.
@@ -52,12 +52,14 @@ services:
      - SECURE_COOKIES=${SECURE_COOKIES:-false}
      - EMBEDDING_URL=${EMBEDDING_URL:-}
      - EMBEDDING_MODEL=${EMBEDDING_MODEL:-}
+      - EMBEDDING_API_KEY=${EMBEDDING_API_KEY:-}
      - FASTEMBED_MODEL=${FASTEMBED_MODEL:-sentence-transformers/all-MiniLM-L6-v2}
      - FASTEMBED_CACHE_PATH=${FASTEMBED_CACHE_PATH:-}
      - CLEANUP_INTERVAL_HOURS=${CLEANUP_INTERVAL_HOURS:-24}
      - ODYSSEUS_INPROCESS_POLLERS=${ODYSSEUS_INPROCESS_POLLERS:-1}
      - ODYSSEUS_INPROCESS_TASKS=${ODYSSEUS_INPROCESS_TASKS:-1}
      - ODYSSEUS_SCRIPT_HOST=${ODYSSEUS_SCRIPT_HOST:-localhost}
+      - ODYSSEUS_CHAT_UPLOAD_MAX_BYTES=${ODYSSEUS_CHAT_UPLOAD_MAX_BYTES:-10485760}
      - DATA_BRAVE_API_KEY=${DATA_BRAVE_API_KEY:-}
      - GOOGLE_API_KEY=${GOOGLE_API_KEY:-}
      - GOOGLE_PSE_CX=${GOOGLE_PSE_CX:-}
@@ -15,18 +15,18 @@ services:
    ports:
      - "${APP_BIND:-127.0.0.1}:${APP_PORT:-7000}:7000"
    volumes:
-      - ./data:/app/data:z
-      - ./logs:/app/logs:z
+      - ${APP_DATA_DIR:-./data}:/app/data:z
+      - ${APP_LOGS_DIR:-./logs}:/app/logs:z
      # Cookbook remote-server SSH identity. Odysseus can generate a key here;
      # add the shown public key to each remote server's authorized_keys.
-      - ./data/ssh:/app/.ssh:z
+      - ${APP_DATA_DIR:-./data}/ssh:/app/.ssh:z
      # Cookbook local model cache. Inside Docker, "Local" means the Odysseus
      # container, so persist its HuggingFace cache under ./data/huggingface.
-      - ./data/huggingface:/app/.cache/huggingface:z
+      - ${APP_DATA_DIR:-./data}/huggingface:/app/.cache/huggingface:z
      # Cookbook-installed Python CLIs/packages (vLLM, llama-cpp-python, etc.)
      # land under /app/.local for the odysseus user. Persist them so a
      # container recreate does not silently remove installed serve engines.
-      - ./data/local:/app/.local:z
+      - ${APP_DATA_DIR:-./data}/local:/app/.local:z
    extra_hosts:
      # Lets the container reach local services on the Docker host, including
      # Ollama at http://host.docker.internal:11434.
@@ -51,12 +51,14 @@ services:
      - SECURE_COOKIES=${SECURE_COOKIES:-false}
      - EMBEDDING_URL=${EMBEDDING_URL:-}
      - EMBEDDING_MODEL=${EMBEDDING_MODEL:-}
+      - EMBEDDING_API_KEY=${EMBEDDING_API_KEY:-}
      - FASTEMBED_MODEL=${FASTEMBED_MODEL:-sentence-transformers/all-MiniLM-L6-v2}
      - FASTEMBED_CACHE_PATH=${FASTEMBED_CACHE_PATH:-}
      - CLEANUP_INTERVAL_HOURS=${CLEANUP_INTERVAL_HOURS:-24}
      - ODYSSEUS_INPROCESS_POLLERS=${ODYSSEUS_INPROCESS_POLLERS:-1}
      - ODYSSEUS_INPROCESS_TASKS=${ODYSSEUS_INPROCESS_TASKS:-1}
      - ODYSSEUS_SCRIPT_HOST=${ODYSSEUS_SCRIPT_HOST:-localhost}
+      - ODYSSEUS_CHAT_UPLOAD_MAX_BYTES=${ODYSSEUS_CHAT_UPLOAD_MAX_BYTES:-10485760}
      - DATA_BRAVE_API_KEY=${DATA_BRAVE_API_KEY:-}
      - GOOGLE_API_KEY=${GOOGLE_API_KEY:-}
      - GOOGLE_PSE_CX=${GOOGLE_PSE_CX:-}
@@ -4,18 +4,18 @@ services:
    ports:
      - "${APP_BIND:-127.0.0.1}:${APP_PORT:-7000}:7000"
    volumes:
-      - ./data:/app/data:z
-      - ./logs:/app/logs:z
+      - ${APP_DATA_DIR:-./data}:/app/data:z
+      - ${APP_LOGS_DIR:-./logs}:/app/logs:z
      # Cookbook remote-server SSH identity. Odysseus can generate a key here;
      # add the shown public key to each remote server's authorized_keys.
-      - ./data/ssh:/app/.ssh:z
+      - ${APP_DATA_DIR:-./data}/ssh:/app/.ssh:z
      # Cookbook local model cache. Inside Docker, "Local" means the Odysseus
      # container, so persist its HuggingFace cache under ./data/huggingface.
-      - ./data/huggingface:/app/.cache/huggingface:z
+      - ${APP_DATA_DIR:-./data}/huggingface:/app/.cache/huggingface:z
      # Cookbook-installed Python CLIs/packages (vLLM, llama-cpp-python, etc.)
      # land under /app/.local for the odysseus user. Persist them so a
      # container recreate does not silently remove installed serve engines.
-      - ./data/local:/app/.local:z
+      - ${APP_DATA_DIR:-./data}/local:/app/.local:z
    extra_hosts:
      # Lets the container reach local services on the Docker host, including
      # Ollama at http://host.docker.internal:11434.
@@ -40,12 +40,14 @@ services:
      - SECURE_COOKIES=${SECURE_COOKIES:-false}
      - EMBEDDING_URL=${EMBEDDING_URL:-}
      - EMBEDDING_MODEL=${EMBEDDING_MODEL:-}
+      - EMBEDDING_API_KEY=${EMBEDDING_API_KEY:-}
      - FASTEMBED_MODEL=${FASTEMBED_MODEL:-sentence-transformers/all-MiniLM-L6-v2}
      - FASTEMBED_CACHE_PATH=${FASTEMBED_CACHE_PATH:-}
      - CLEANUP_INTERVAL_HOURS=${CLEANUP_INTERVAL_HOURS:-24}
      - ODYSSEUS_INPROCESS_POLLERS=${ODYSSEUS_INPROCESS_POLLERS:-1}
      - ODYSSEUS_INPROCESS_TASKS=${ODYSSEUS_INPROCESS_TASKS:-1}
      - ODYSSEUS_SCRIPT_HOST=${ODYSSEUS_SCRIPT_HOST:-localhost}
+      - ODYSSEUS_CHAT_UPLOAD_MAX_BYTES=${ODYSSEUS_CHAT_UPLOAD_MAX_BYTES:-10485760}
      - DATA_BRAVE_API_KEY=${DATA_BRAVE_API_KEY:-}
      - GOOGLE_API_KEY=${GOOGLE_API_KEY:-}
      - GOOGLE_PSE_CX=${GOOGLE_PSE_CX:-}
@@ -0,0 +1,194 @@
+# Agent migration manifests
+
+Odysseus should be able to learn from another agent without blindly trusting
+that agent's whole state. The safe migration path is:
+
+```text
+source agent export -> source adapter -> agent-migration.v1 manifest -> preview -> apply
+```
+
+The manifest is intentionally source-neutral. OpenClaw, Hermes, a folder of
+Markdown notes, or any other agent can have its own adapter, but Odysseus only
+needs to understand the normalized manifest.
+
+## Why not import everything as memory?
+
+Durable memory should stay compact and useful. Long notes, logs, session
+transcripts, and project archives are useful context, but they are not all
+memories. A good migration keeps two layers separate:
+
+- **Archive documents** preserve source material for search, reading, and later
+  extraction.
+- **Memory candidates** are short facts or preferences that can be reviewed
+  before being saved into Odysseus memory.
+
+This keeps Odysseus' existing memory-review flow intact while giving it better
+source material to review.
+
+## Manifest shape
+
+`agent-migration.v1` is a JSON object:
+
+```json
+{
+  "schema_version": "agent-migration.v1",
+  "generated_at": "2026-06-06T00:00:00Z",
+  "source": {
+    "name": "example-agent",
+    "kind": "generic"
+  },
+  "summary": {
+    "item_count": 3,
+    "counts_by_kind": {
+      "memory": 1,
+      "skill": 1,
+      "conversation_thread": 1,
+      "archive_document": 1
+    },
+    "warning_count": 0
+  },
+  "items": [],
+  "warnings": []
+}
+```
+
+Each item has a stable `id`, a `kind`, source metadata, and enough content for a
+future importer to preview it before applying.
+
+Supported item kinds in the first pass:
+
+- `memory` — a candidate memory with `text`, `category`, `source`, and
+  provenance metadata.
+- `skill` — a `SKILL.md` file with content and parsed frontmatter metadata.
+- `conversation_thread` — a normalized transcript thread from an exported chat
+  history. Message content is optional; adapters can preserve only thread
+  metadata, message counts, timestamps, and hashes when a manifest should stay
+  small or avoid embedding private transcript text.
+- `archive_document` — long-form source material. Content is optional; adapters
+  can preserve only path/hash/size metadata when a manifest should stay small.
+
+## Build a manifest
+
+Use the read-only helper:
+
+```bash
+python3 scripts/agent_migration_manifest.py \
+  --source-name old-agent \
+  --source-kind generic \
+  --memory-json /path/to/memories.json \
+  --skills-dir /path/to/skills \
+  --conversation-json /path/to/conversations.json \
+  --archive /path/to/notes \
+  --output /tmp/agent-migration.json
+```
+
+The helper does not write to `data/`, call an LLM, import Odysseus modules, or
+modify the source. It only writes JSON.
+
+Memory JSON may be:
+
+```json
+[
+  "A plain memory string",
+  {
+    "text": "A categorized memory",
+    "category": "preference",
+    "source": "old-agent"
+  }
+]
+```
+
+or an object containing a list under `memories`, `memory`, `items`, or `data`.
+
+Skills are scanned recursively for `SKILL.md`:
+
+```bash
+python3 scripts/agent_migration_manifest.py \
+  --source-name hermes \
+  --source-kind hermes \
+  --skills-dir ~/.hermes/skills \
+  --output /tmp/hermes-skills-manifest.json
+```
+
+Archive documents are metadata-only by default. To embed text content:
+
+```bash
+python3 scripts/agent_migration_manifest.py \
+  --source-name notes-export \
+  --archive /path/to/markdown-notes \
+  --include-archive-content \
+  --output /tmp/notes-manifest.json
+```
+
+Conversation exports are also metadata-only by default:
+
+```bash
+python3 scripts/agent_migration_manifest.py \
+  --source-name chatgpt-export \
+  --source-kind chatgpt \
+  --conversation-json /path/to/conversations.json \
+  --output /tmp/chatgpt-conversations-manifest.json
+```
+
+The first pass supports generic conversation JSON such as:
+
+```json
+[
+  {
+    "id": "thread-1",
+    "title": "Project plan",
+    "messages": [
+      {"role": "user", "content": "Can we design this?"},
+      {"role": "assistant", "content": "Yes, start with a narrow slice."}
+    ]
+  }
+]
+```
+
+It also recognizes ChatGPT-style `mapping` exports from `conversations.json`.
+To embed normalized messages:
+
+```bash
+python3 scripts/agent_migration_manifest.py \
+  --source-name chatgpt-export \
+  --source-kind chatgpt \
+  --conversation-json /path/to/conversations.json \
+  --include-conversation-content \
+  --max-conversation-messages 2000 \
+  --output /tmp/chatgpt-conversations-with-content.json
+```
+
+Content embedding is explicit because exported chat histories can be huge and
+private. A future source-specific adapter can add ZIP traversal, attachment
+metadata, and provider-specific project/workspace fields while still emitting
+the same `conversation_thread` manifest item.
+
+## Recommended apply behavior
+
+A future Odysseus importer should treat the manifest as untrusted user-provided
+data and apply it in stages:
+
+1. Show a dry-run summary with counts, warnings, duplicates, and sample items.
+2. Back up current `data/` state before writing anything.
+3. Import archive documents as documents or another searchable source, not as
+   memory.
+4. Import conversation threads as searchable archived context first, with
+   citations back to the source thread. Do not turn whole transcripts into
+   memory.
+5. Show memory candidates for review before saving through the normal memory
+   path.
+6. Import skills only after name/category conflict checks.
+7. Skip secrets by default. Credentials need explicit, provider-specific flows.
+
+## What belongs in source adapters?
+
+Adapters can be source-specific. The core manifest should not be.
+
+For example, an OpenClaw adapter may know about OpenClaw's workspace files. A
+Hermes adapter may know about `~/.hermes/config.yaml` and `~/.hermes/skills`.
+A ChatGPT adapter may know about `conversations.json`, uploaded-file metadata,
+and image attachment directories. A Claude adapter may know about Claude's
+export shape and project boundaries. A generic adapter may only know about
+memory JSON, conversation JSON, `SKILL.md`, and Markdown folders.
+
+Nonstandard folders should be adapter details, not required Odysseus concepts.
@@ -0,0 +1,129 @@
+# Backup & Restore
+
+Odysseus keeps all of your state in the `data/` directory — the SQLite database
+(`app.db`), the Fernet encryption key (`data/.app_key`), the vault, memory, RAG
+indexes, personal documents, and uploads. The `scripts/odysseus-backup` tool
+snapshots that directory into a single gzip tarball and restores it later.
+
+Snapshots are safe to take while the app is running: SQLite databases are copied
+through SQLite's own `.backup` API rather than a raw file copy, so an in-flight
+write can't corrupt the snapshot.
+
+> **A snapshot contains your secrets.** The tarball includes the Fernet
+> encryption key (`data/.app_key`), the vault, sessions, and any stored
+> provider/API tokens — so treat it like a password. Store backups somewhere
+> private, never commit them to Git, and prefer an encrypted destination when
+> copying them offsite.
+
+## Quick start
+
+Run the tool from the repository root:
+
+```bash
+# Create a snapshot → backups/odysseus-backup-<YYYYMMDD-HHMMSS>.tar.gz
+./scripts/odysseus-backup snapshot
+
+# List existing snapshots (most recent first)
+./scripts/odysseus-backup list
+
+# Check a tarball's integrity without extracting it
+./scripts/odysseus-backup verify backups/odysseus-backup-20260101-120000.tar.gz
+
+# Restore (destructive — see the warning below)
+./scripts/odysseus-backup restore backups/odysseus-backup-20260101-120000.tar.gz --yes
+```
+
+The script depends only on the Python standard library, so any `python3` on your
+`PATH` will run it — you don't need the app's virtualenv active.
+
+Every command prints a JSON result. Add `--pretty` for indented output.
+
+## Commands
+
+### `snapshot`
+
+Writes a `tar.gz` of `data/` to `backups/<timestamp>.tar.gz`.
+
+| Flag | Effect |
+| --- | --- |
+| `--out PATH` | Write to a specific path instead of the default `backups/` location. Must be **outside** `data/`. |
+| `--include-research` | Include `data/deep_research/` (skipped by default — research runs are large). |
+| `--include-attachments` | Include `data/mail-attachments/` (skipped by default — cached IMAP extractions, re-derivable). |
+
+By default the snapshot includes everything under `data/` **except**
+`deep_research/` and `mail-attachments/`. Personal uploads and documents are
+included.
+
+```bash
+# Snapshot straight to a mounted NAS path
+./scripts/odysseus-backup snapshot --out /mnt/nas/odysseus-$(date +%F).tar.gz
+
+# Full snapshot including research runs and mail attachments
+./scripts/odysseus-backup snapshot --include-research --include-attachments
+```
+
+### `list`
+
+Lists the tarballs in `backups/`, most recent first, with size and modification
+time.
+
+### `verify PATH`
+
+Opens the tarball read-only and walks every member to confirm it is intact and
+safe to restore. Nothing is extracted. Use this before relying on an old backup
+or after copying one across machines.
+
+### `restore PATH --yes`
+
+Overwrites `data/` from a tarball.
+
+> **Restore is destructive.** It replaces the current `data/` directory. `--yes`
+> is required so a mistyped command can't wipe your live state.
+
+Restore is not a blind delete: before extracting, the tool **renames your current
+`data/` to `data.before-restore-<timestamp>`** in the repository root. If a
+restore turns out to be wrong, your previous state is still there — delete the
+restored `data/` and rename the stashed directory back. The restore path is also
+validated entry-by-entry: archives containing absolute paths, `..` segments,
+symlinks, or anything outside `data/` are rejected.
+
+## Scheduling offsite backups
+
+The tarball output composes cleanly with cron and any copy tool. For example, a
+nightly snapshot copied offsite:
+
+```cron
+0 3 * * *  cd /path/to/odysseus && ./scripts/odysseus-backup snapshot --out "/mnt/nas/odysseus-$(date +\%F).tar.gz"
+```
+
+Swap the `--out` target for `scp`, `rclone`, `s3cmd`, or similar to push the
+snapshot to remote storage.
+
+## Docker vs native installs
+
+The tool reads `data/` and writes `backups/` relative to the repository root, so
+where you run it matters:
+
+- **Native installs** — run it from the repo root as shown above. `data/` and
+  `backups/` are both in the repo directory.
+- **Docker** — `docker-compose.yml` bind-mounts the host's `./data` to
+  `/app/data`, so the live data is also present on the host. **Run the tool on
+  the host** from the repo root; the snapshot reads the bind-mounted `./data` and
+  writes to `./backups` on the host. Running it *inside* the container is not
+  recommended, because `backups/` is not a mounted volume and the tarball would
+  be lost when the container is recreated.
+
+> **ChromaDB caveat (Docker only).** In the Docker setup, ChromaDB stores its
+> vectors in a separate Compose-managed volume (declared as `chromadb-data`),
+> **not** under `./data`. `odysseus-backup` therefore does not capture the Docker
+> ChromaDB store. Back it up separately if you need it. Compose prefixes the
+> volume with the project name, so find the real name first
+> (`docker volume ls | grep chromadb`), then archive it — for example:
+>
+> ```bash
+> docker run --rm -v <project>_chromadb-data:/data -v "$PWD":/backup \
+>   alpine tar czf /backup/chromadb.tar.gz -C /data .
+> ```
+>
+> On native installs ChromaDB lives at `data/chroma/` and is included in the
+> snapshot normally.
@@ -0,0 +1,17 @@
+# Outlook / Office 365 email accounts
+
+Odysseus email accounts currently use IMAP and SMTP with username/password
+authentication. That works for providers that still allow app passwords or
+mailbox passwords for IMAP/SMTP.
+
+Microsoft disables basic authentication for Outlook and Microsoft 365 in most
+modern accounts and tenants. If you try to add an Outlook account with a normal
+password, Microsoft may return errors such as:
+
+- `IMAP: AUTHENTICATE failed`
+- `SMTP: 535 5.7.139 Authentication unsuccessful, basic authentication is disabled`
+
+This is expected. Odysseus does not support Microsoft OAuth or Graph Mail yet,
+so Outlook / Office 365 accounts cannot currently be added through the password
+form. Use another email provider with app-password support, or track the future
+Microsoft Graph OAuth integration.
@@ -25,9 +25,16 @@
    --radius: 8px;
  }
  * { box-sizing: border-box; }
-  html { scroll-behavior: smooth; scroll-snap-type: y proximity; scroll-padding-top: 60px; }
-  /* Each section is a full-viewport "page" with its content centered, so only
-     one shows at a time and the snap is obvious. */
+  html { scroll-behavior: smooth; scroll-padding-top: 60px; }
+  /* REMOVED: "scroll-snap-type: y proximity"
+     The idea was: >>Each section is a full-viewport "page" with its content centered,
+     so only one shows at a time and the snap is obvious.<<
+
+     PROBLEM: sections easily grow taller than 100vh IRL
+     This cause forced jumps mid-read. It's intrusive UX.
+     The landing-page is not a PowerPoint presentation!
+
+     Preserved: CSS snap-points to avoid destroying code meta-data*/
  .hero, section {
    scroll-snap-align: start; min-height: 100vh;
    display: flex; flex-direction: column; justify-content: center;
@@ -0,0 +1,107 @@
+# Security CI guide
+
+This project runs a set of automated security checks on pull requests and
+selected branch pushes. This page explains what each one does, whether it can
+block a merge, and the few one-time settings you should turn on to get the full
+benefit.
+
+## What runs, and why
+
+Most checks live in files under `.github/workflows/`. CodeQL is configured
+through GitHub's code scanning default setup, so it appears as a dynamic GitHub
+workflow instead of a checked-in workflow file. They run automatically; you do
+not start them.
+
+| Check | What it protects against | Blocks a merge? |
+|---|---|---|
+| **Secret scan** (gitleaks) | An API key, token, or password being committed by mistake or on purpose | Yes |
+| **Workflow security** (actionlint + zizmor) | A broken or insecure automation file that could leak the repo's access token | Yes |
+| **Dependency review** | A pull request that adds a software library with a known security hole | Yes |
+| **pip-audit** | Known security holes in the Python libraries already used | No (advisory) |
+| **Container scan: hadolint** | Mistakes and insecure patterns in the `Dockerfile` | Yes |
+| **Container scan: Trivy** | Known security holes in the Docker image | No (advisory) |
+| **CodeQL** | Real bugs in the app's own code: injection, auth mistakes, path traversal | No (advisory) |
+
+"Blocks a merge" means a red X appears on the pull request and, once you enable
+the setting below, the **Merge** button is disabled until it is fixed.
+
+"Advisory" means it reports problems into the repository's **Security** tab so
+you can review them on your own schedule, but it never stops a merge. These are
+advisory on purpose: they often flag long-standing issues in other people's
+libraries, not something a given pull request introduced.
+
+## Where results appear
+
+- **Checks tab of a pull request**: the pass/fail of each check. A green tick is
+  good; a red X needs attention.
+- **Security tab of the repository**: detailed findings from the advisory
+  scanners (Trivy and CodeQL). This is your dashboard.
+
+## If a check fails
+
+- **Secret scan failed**: a real credential may have been committed. Treat it as
+  leaked: rotate (regenerate) that key or token immediately, then remove it from
+  the file. Do not just delete the commit; assume it was seen.
+- **Dependency review failed**: the pull request adds a library with a known
+  vulnerability. Ask the contributor to use a patched version, or decline the
+  change.
+- **hadolint / workflow security failed**: the contributor changed the
+  `Dockerfile` or an automation file in a way the linter rejects. Ask them to
+  address the message shown in the failed check.
+
+## One-time settings to turn on
+
+These two settings unlock the full value. You only do them once.
+
+### 1. Require the blocking checks before merging
+
+This makes the **Merge** button refuse to work until the gating checks pass.
+
+1. Go to the repository on GitHub.
+2. Click **Settings** (top right of the repo).
+3. In the left sidebar, click **Branches**.
+4. Under **Branch protection rules**, click **Add branch ruleset** (or **Add
+   rule**), and set the branch name pattern to `dev` (this is the branch all
+   pull requests target; `main` is fast-forwarded at releases).
+5. Enable **Require status checks to pass before merging**.
+6. In the search box that appears, add these checks by name:
+   - `Python syntax (compileall)`
+   - `JS syntax (node --check)`
+   - `gitleaks`
+   - `actionlint`
+   - `zizmor (Actions SAST)`
+   - `hadolint (Dockerfile lint)`
+   - `dependency-review (PR gate)`
+
+   The first two come from the correctness CI (`ci.yml`); the rest are this
+   security suite. Leave pytest, pip-audit, Trivy, and CodeQL unchecked so they
+   stay advisory.
+7. Also enable **Require a pull request before merging** and **Require review
+   from Code Owners** (this uses the `.github/CODEOWNERS` file so every change
+   needs your sign-off).
+8. Click **Create** / **Save changes**.
+
+Note: a check name only appears in the list after it has run at least once, so
+let the workflows run on one pull request first, then add them here.
+
+### 2. Turn on the Security tab features
+
+1. **Settings -> Code security** (or **Code security and analysis**).
+2. Turn on **Dependency graph** (usually on by default for public repos) -- this
+   powers Dependency review and Dependabot.
+3. Turn on **Dependabot alerts** and **Dependabot security updates**.
+4. Under **Code scanning**, use **Set up -> Default** for CodeQL. GitHub then
+   runs CodeQL as a dynamic workflow without the fork-token limitations that
+   affect checked-in advanced workflows.
+
+   Do not also add a checked-in CodeQL workflow while default setup is enabled:
+   GitHub rejects advanced CodeQL uploads when default setup is active. If the
+   project later needs an advanced CodeQL workflow, disable default setup first
+   and keep only one CodeQL publishing path active.
+
+## Keeping it current
+
+`.github/dependabot.yml` opens small weekly pull requests to update Python and
+npm packages, the Docker base image, and the pinned automation actions
+themselves. Review and merge those like any other pull request; they keep the
+project patched without manual tracking.
@@ -0,0 +1,425 @@
+# Odysseus Setup Guide
+
+This page keeps the detailed install, deployment, troubleshooting, and configuration notes out of the front README.
+
+## Quick Start
+
+> **Branch note:** `dev` is the default branch and contains the latest development changes, but it may be unstable. For the more stable curated branch, use [`main`](https://github.com/pewdiepie-archdaemon/odysseus/tree/main).
+
+Defaults work out of the box: clone, run, then configure models/search/email
+inside **Settings**. Only edit `.env` for deployment-level overrides like
+`APP_BIND`, `APP_PORT`, `AUTH_ENABLED`, `DATABASE_URL`, or a pre-seeded admin password.
+
+On first setup, Odysseus creates an admin account (`admin` unless
+`ODYSSEUS_ADMIN_USER` is set) and prints a temporary password in the terminal.
+For Docker installs, the same line is in `docker compose logs odysseus`.
+Use that for the first login, then change it in **Settings**.
+
+Contributing? See [CONTRIBUTING.md](CONTRIBUTING.md) for setup, testing, and
+pull request guidelines.
+
+### Docker (recommended)
+```bash
+git clone https://github.com/pewdiepie-archdaemon/odysseus.git
+cd odysseus
+cp .env.example .env       # optional, but recommended for explicit defaults
+docker compose up -d --build
+```
+To include optional extras in the image (PDF viewer, Office extraction; includes AGPL PyMuPDF), build with `docker compose build --build-arg INSTALL_OPTIONAL=true` before `up`.
+
+Open `http://localhost:7000` when the containers are healthy. Docker Compose
+binds the web UI to `127.0.0.1` by default. If the port is taken, set
+`APP_PORT=7001` in `.env` and recreate the container. Set `APP_BIND=0.0.0.0`
+only when you intentionally want LAN/reverse-proxy access.
+
+> **On Apple Silicon (M-series) Macs:** Docker can't reach the Metal GPU, so
+> Cookbook serves local models on CPU only. For GPU-accelerated model serving,
+> run natively instead — see [Apple Silicon](#apple-silicon) below.
+
+### Native Linux / macOS
+```bash
+git clone https://github.com/pewdiepie-archdaemon/odysseus.git
+cd odysseus
+python3 -m venv venv
+source venv/bin/activate
+pip install -r requirements.txt
+python setup.py
+python -m uvicorn app:app --host 127.0.0.1 --port 7000
+```
+Requirements: Python 3.11+. Cookbook also needs `tmux` for background model
+downloads and serves. The app itself is lightweight; local model serving is the
+heavy part and depends on the model, runtime, GPU, and VRAM, so small hosts can
+connect to API or remote model servers instead. Use `--host 0.0.0.0` only when you intentionally want LAN/reverse-proxy access.
+
+### Apple Silicon
+Docker on macOS cannot use the Metal GPU. For GPU-accelerated Cookbook on an
+M-series Mac, run Odysseus natively:
+
+```bash
+git clone https://github.com/pewdiepie-archdaemon/odysseus.git
+cd odysseus
+./start-macos.sh
+```
+
+It launches at `http://127.0.0.1:7860`. To expose it to your phone over a trusted LAN/VPN such as Tailscale, bind all interfaces:
+
+```bash
+ODYSSEUS_HOST=0.0.0.0 ./start-macos.sh
+# then open http://<tailscale-ip>:7860
+```
+
+The script also reads `.env` at startup, so `APP_BIND=0.0.0.0` and `APP_PORT`
+set there are picked up automatically without a command-line override each run.
+
+Keep `AUTH_ENABLED=true` (the default) before binding outside loopback. Do not
+expose this port directly to the public internet. To build a clickable app wrapper:
+
+```bash
+./build-macos-app.sh
+```
+
+<details>
+<summary>Cookbook, GPU, Ollama, and troubleshooting notes</summary>
+
+**Docker bundled services.** Compose starts Odysseus, ChromaDB, SearXNG, and
+ntfy. Odysseus and the bundled service ports bind to `127.0.0.1` by default, so
+they are reachable from the host but not exposed to your LAN/public internet
+unless you opt in.
+
+**Cookbook storage in Docker.** Downloads live in `./data/huggingface`
+(`~/.cache/huggingface` in the container). Cookbook-installed Python CLIs and
+serve engines live in `./data/local` (`~/.local` in the container), so they
+survive container recreation.
+
+**Remote servers.** In **Cookbook -> Settings -> Servers**, generate the
+Odysseus SSH key and add the public key to the remote server's
+`~/.ssh/authorized_keys`. From the host you can also run:
+
+```bash
+ssh-copy-id -i data/ssh/id_ed25519.pub user@server
+```
+
+**Docker GPU overlays.** CPU-only users can skip this section. Cookbook can
+only detect GPUs that Docker exposes to the container — if the host runtime or
+device passthrough is not configured, Cookbook sees the iGPU, another card, or
+CPU instead of your intended GPU.
+
+For NVIDIA, `scripts/check-docker-gpu.sh` diagnoses GPU passthrough and can
+optionally install the host runtime or update `.env`.
+
+```bash
+# Read-only diagnostic (default — installs nothing, never edits .env):
+scripts/check-docker-gpu.sh
+
+# Print OS-specific install commands without running them:
+scripts/check-docker-gpu.sh --print-install-commands
+
+# Install NVIDIA Container Toolkit on Ubuntu/Debian (requires sudo):
+scripts/check-docker-gpu.sh --install-nvidia-toolkit
+
+# Write COMPOSE_FILE to .env (only when GPU passthrough is confirmed working):
+scripts/check-docker-gpu.sh --enable-nvidia-overlay
+
+# Full assisted setup — install toolkit, then enable overlay if passthrough works:
+scripts/check-docker-gpu.sh --install-nvidia-toolkit --enable-nvidia-overlay
+```
+
+Safety notes:
+- The app never installs host GPU runtime automatically.
+- The app never edits `.env` automatically.
+- `.env` is only modified when `--enable-nvidia-overlay` is explicitly passed,
+  and only after GPU passthrough succeeds. `--yes` skips prompts but does not
+  bypass the passthrough gate.
+- `.env.bak.*` backups created by `--enable-nvidia-overlay` are ignored by
+  Git and the Docker build context.
+
+To enable manually without the script, add this to `.env`:
+
+```bash
+COMPOSE_FILE=docker-compose.yml:docker/gpu.nvidia.yml
+```
+
+**AMD / ROCm.** AMD setup is read-only diagnostic plus manual `.env` edit. Run:
+
+```bash
+scripts/check-docker-amd-gpu.sh
+```
+
+Then add the reported values to `.env`, replacing `RENDER_GID` with your host's
+numeric render group id:
+
+```bash
+COMPOSE_FILE=docker-compose.yml:docker/gpu.amd.yml
+RENDER_GID=989
+```
+
+For NVIDIA/AMD GPU support, also read the comments in the selected overlay file: docker/gpu.nvidia.yml or docker/gpu.amd.yml.
+
+**Stack-management UIs (Portainer, Coolify, Dockhand, etc.).** These tools
+often accept only a single Compose file and do not reliably honor `COMPOSE_FILE`
+or multiple `-f` overlays. CLI users should keep using the `COMPOSE_FILE`
+overlay workflow above. For stack UIs, point the stack at one of the standalone
+files instead, which bundle the base stack plus the GPU settings:
+
+- `docker-compose.gpu-nvidia.yml` — still requires the NVIDIA Container Toolkit
+  on the host.
+- `docker-compose.gpu-amd.yml` — still requires host ROCm/kfd/DRI setup, the
+  `video`/`render` group membership, and `RENDER_GID` when needed.
+
+The base `docker-compose.yml` plus the `docker/gpu.*.yml` overlays remain the
+source of truth; the standalone files mirror them for single-file deployments.
+
+Verify after enabling either overlay:
+
+```bash
+docker compose exec odysseus nvidia-smi -L   # NVIDIA
+docker compose exec odysseus sh -lc 'test -e /dev/kfd && test -d /dev/dri && ls -l /dev/kfd /dev/dri/renderD*'  # AMD
+```
+
+> **GPU passthrough ≠ llama.cpp CUDA.** `nvidia-smi` passing inside the
+> container confirms Docker GPU access, but llama.cpp also needs `cudart` and
+> the CUDA Toolkit at runtime. If Cookbook logs show `Unable to find cudart
+> library`, `Could NOT find CUDAToolkit`, `CUDA Toolkit not found`, or
+> tensors/layers assigned to CPU, that is a Cookbook/llama.cpp build issue —
+> not a Docker passthrough failure. Reinstall the serve engine via
+> **Cookbook → Dependencies** to get a CUDA-enabled build.
+>
+> The same split applies to AMD/ROCm: seeing `/dev/kfd` and `/dev/dri` inside
+> the container confirms device passthrough, not ROCm userspace or a
+> ROCm-enabled vLLM/llama.cpp build. `rocm-smi` and `rocminfo` are not expected
+> inside the slim Odysseus image.
+
+**Ollama with Docker.** If Ollama runs on the host, add this endpoint in
+Settings:
+
+```text
+http://host.docker.internal:11434/v1
+```
+
+Ollama must listen outside its own loopback interface:
+
+```bash
+OLLAMA_HOST=0.0.0.0:11434 ollama serve
+```
+
+This connects Odysseus in Docker to an Ollama server that is already running on
+your host machine; it does not start Ollama inside the container.
+`host.docker.internal` is Docker's hostname for the host machine from inside the
+container. Cookbook **Serve** is a separate workflow for serving downloaded
+models through Odysseus/llama.cpp, so Windows users with an existing Ollama
+install usually only need to add the endpoint in Settings.
+
+**Useful checks.**
+
+```bash
+docker compose ps
+docker compose logs --tail=120 odysseus
+docker compose logs odysseus | grep -E 'ChromaDB|MemoryVectorStore|DEGRADED'
+```
+
+**macOS details.** `start-macos.sh` installs Homebrew deps, creates the venv,
+runs setup, and starts uvicorn on port `7860` because AirPlay often holds
+`7000`. It uses llama.cpp/Ollama for Metal. vLLM/SGLang are CUDA/ROCm-only and
+do not run on macOS. MLX-only models are not served by Odysseus.
+
+</details>
+
+### Native Windows
+
+**One-command launcher** (creates the venv, installs deps, runs setup, starts the
+server; safe to re-run):
+
+```powershell
+git clone https://github.com/pewdiepie-archdaemon/odysseus.git
+cd odysseus
+powershell -ExecutionPolicy Bypass -File .\launch-windows.ps1
+```
+
+Or do it by hand:
+
+```powershell
+git clone https://github.com/pewdiepie-archdaemon/odysseus.git
+cd odysseus
+py -3.11 -m venv venv
+venv\Scripts\Activate.ps1
+pip install -r requirements.txt
+python setup.py
+python -m uvicorn app:app --host 127.0.0.1 --port 7000
+```
+
+If `python` points at an older interpreter, use `py -3.12` (or another installed
+3.11+ version) for the venv step.
+
+**Requirements:** Python 3.11+. The core app (chat, agent, memory, documents,
+email, calendar, deep research) runs fully native. For full **Cookbook** background
+model downloads and the agent shell tool, also install
+[Git for Windows](https://git-scm.com/download/win) (provides `bash.exe`).
+Local GPU *serving* of vLLM/SGLang needs Linux/WSL2; for a local model on Windows,
+[Ollama](https://ollama.com/download) is the easiest path — point Odysseus at
+`http://localhost:11434/v1` in Settings.
+
+Open `http://localhost:7000`, log in with the generated admin password,
+and configure everything else inside **Settings**.
+
+## Troubleshooting & Advanced Setup
+
+### `chromadb-client` conflicts with embedded ChromaDB
+If `chromadb-client` (the lightweight HTTP-only package) is installed alongside the full `chromadb` package, Odysseus starts but ChromaDB silently falls back to HTTP-only mode and fails.
+
+**Fix:** uninstall `chromadb-client` and force-reinstall the full package:
+```bash
+./venv/bin/pip uninstall chromadb-client -y
+./venv/bin/pip install --force-reinstall chromadb
+```
+
+### HTTPS + LAN/Tailscale exposure
+To expose Odysseus on a local network or Tailscale with HTTPS:
+1. Change the bind address to `0.0.0.0` in `.env` (`APP_BIND=0.0.0.0` or `ODYSSEUS_HOST=0.0.0.0`).
+2. Generate a locally-trusted cert for your LAN/Tailscale IPs using [mkcert](https://github.com/FiloSottile/mkcert):
+   ```bash
+   mkcert -install
+   mkcert -cert-file cert.pem -key-file key.pem 192.168.1.100 tailscale-ip
+   ```
+3. Run `uvicorn` with the generated certs:
+   ```bash
+   python -m uvicorn app:app --host 0.0.0.0 --port 7000 --ssl-certfile=cert.pem --ssl-keyfile=key.pem
+   ```
+4. Install the `mkcert` CA on any other device you want to access Odysseus from (e.g., for iOS, email the `rootCA.pem` to yourself, install the profile, and trust it in Certificate Trust Settings).
+
+### Optional Dependencies
+`requirements-optional.txt` contains packages that unlock extra features. It is not installed by default.
+
+| Package | Feature unlocked |
+|---------|-----------------|
+| `faster-whisper` | Local speech-to-text (microphone -> text) via the "local" STT provider. |
+| `ddgs` | DuckDuckGo as a search provider option. |
+| `PyMuPDF` | PDF page rendering in the side viewer panel and form-filling. (Note: AGPL-3.0) |
+| `markitdown` | Office/EPUB document text extraction (converts .docx/.xlsx/.pptx/.xls/.epub to Markdown). |
+
+### Faster, reproducible installs with uv (optional)
+[uv](https://docs.astral.sh/uv/) works as a drop-in replacement for the
+venv + pip steps in the native install guides, no project changes are needed but this change results in faster installs along with a lockfile for reproducible environments. After [installing `uv`](https://docs.astral.sh/uv/getting-started/installation/), use:
+
+```bash
+uv venv venv --python 3.13
+uv pip install -r requirements.txt
+# then continue as usual: python setup.py, uvicorn, ...
+```
+
+`requirements.txt` is intentionally unpinned, so two installs at different times can produce different package versions. If you want a reproducible environment (e.g. across your own machines, or to roll back after a bad upgrade), snapshot and restore exact versions with:
+
+```bash
+uv pip compile requirements.txt -o requirements.lock   # snapshot current resolution
+uv pip sync requirements.lock                          # reproduce it exactly later
+```
+
+`requirements.lock` is gitignored and platform-specific (compile it on the OS you deploy to). Regenerate it deliberately when you want to take upgrades. The plain `uv pip install -r requirements.txt` keeps following the unpinned requirements like pip does.
+
+### Outlook / Office 365 email
+Odysseus email accounts currently use IMAP/SMTP username-password auth. Outlook
+and Microsoft 365 generally require OAuth instead, so normal Microsoft mailbox
+passwords will fail. See [docs/email-outlook.md](docs/email-outlook.md) for the
+current limitation and the planned integration direction.
+
+## Security Notes
+Odysseus is a self-hosted workspace with powerful local tools: shell access, file uploads, model downloads, web research, email/calendar integrations, and API tokens. Treat it like an admin console.
+
+- Keep `AUTH_ENABLED=true` for any network-accessible deployment.
+- Keep `LOCALHOST_BYPASS=false` outside local development.
+- Use `SECURE_COOKIES=true` when Odysseus is served through HTTPS by a trusted reverse proxy or private access gateway.
+- Do not expose it directly to the public internet without HTTPS and a trusted reverse proxy or private access layer.
+- Keep `.env`, `data/`, `logs/`, databases, uploads, generated media, backups, auth/session files, API keys, and model/provider tokens out of Git and private shares. They are ignored by default.
+- Review `data/auth.json` after first boot: disable open signup unless you intentionally want it, make only your own account admin, and keep demo/test accounts non-admin.
+- Non-admin users do not get shell/Python/file read/write by default, and admin-only routes/tools such as MCP management, API tokens, webhooks, model/cookbook serving, backup/vault, and app settings are admin-gated. Other features are controlled by per-user privileges, so review each user's privileges before exposing a deployment.
+- Rotate any API keys or tokens that were ever pasted into a shared chat, demo, screenshot, or log.
+- If you enable API tokens or webhooks, create separate tokens per integration and delete unused ones.
+- Prefer binding manual development runs to `127.0.0.1`; bind to `0.0.0.0` only when you intentionally want LAN/reverse-proxy access.
+- Keep ChromaDB, SearXNG, ntfy, Ollama, vLLM, llama.cpp, databases, and raw model/provider APIs internal-only. Expose only the authenticated Odysseus web/API entrypoint through your trusted proxy or private access layer.
+- Before publishing a fork, run `git status --short` and confirm no private files from `.env`, `data/`, `logs/`, uploads, backups, or local databases are staged.
+
+### Private or proxied deployments
+Odysseus serves plain HTTP on its app port. Docker Compose binds Odysseus and the bundled services to `127.0.0.1` by default, so a typical production/private setup is:
+
+1. Keep Odysseus on localhost, for example `127.0.0.1:7000`.
+2. Terminate HTTPS at a trusted reverse proxy or private access gateway.
+3. Put the authenticated Odysseus web/API entrypoint behind that layer.
+4. Keep raw service and model ports internal-only.
+
+Cloudflare Access, Tailscale, Caddy, nginx, and Traefik can all fit this pattern; none are required by Odysseus. If your access layer reaches Odysseus on the same host, proxy to `http://127.0.0.1:7000` and keep `AUTH_ENABLED=true`, `LOCALHOST_BYPASS=false`, and `SECURE_COOKIES=true`.
+`ALLOWED_ORIGINS` lists exact permitted origins for cross-origin browser/API clients; ordinary same-origin reverse-proxy access usually does not need a special CORS entry.
+
+Common internal-only ports from the default docs/compose setup:
+
+| Port | Service |
+|---|---|
+| `7000` | Odysseus raw app port |
+| `8080` | SearXNG |
+| `8091` | ntfy |
+| `8100` | ChromaDB host port for manual/compose access |
+| `11434` | Ollama |
+| `8000-8020` | Common local model/provider APIs |
+
+## Configuration
+Most setup is done inside the app with `/setup` or **Settings**. Use `.env`
+for deployment-level defaults and secrets you want present before first boot.
+Key settings:
+
+| Variable | Default | Description |
+|---|---|---|
+| `LLM_HOST` | `localhost` | Your LLM server (e.g. `llm-host.local:8000`) |
+| `LLM_HOSTS` | -- | Comma-separated list for model discovery |
+| `OPENAI_API_KEY` | -- | Optional OpenAI key. Prefer adding providers in the app unless pre-seeding. |
+| `SEARXNG_INSTANCE` | `http://localhost:8080` | SearXNG URL. Docker overrides this to `http://searxng:8080`. |
+| `SEARXNG_SECRET` | generated on first Docker boot | Optional SearXNG cookie/CSRF secret. Leave blank unless you need to pin it. |
+| `APP_BIND` | `127.0.0.1` | Docker Compose host bind address for the web UI. Use `0.0.0.0` only for intentional LAN/reverse-proxy access. |
+| `APP_PORT` | `7000` | Docker Compose host port for the web UI. |
+| `APP_DATA_DIR` | `./data` | Docker Compose host directory for application data volumes. |
+| `APP_LOGS_DIR` | `./logs` | Docker Compose host directory for application logs. |
+| `AUTH_ENABLED` | `true` | Enable/disable login |
+| `LOCALHOST_BYPASS` | `false` | Development-only auth bypass for loopback requests. Keep false for shared/network deployments. |
+| `ALLOWED_ORIGINS` | `http://localhost,http://127.0.0.1` | Comma-separated exact permitted origins for cross-origin browser/API clients. |
+| `SECURE_COOKIES` | `false` | Set true when serving Odysseus through HTTPS at a trusted proxy or private access gateway. |
+| `DATABASE_URL` | `sqlite:///./data/app.db` | Database connection string |
+| `CHROMADB_HOST` | `localhost` | ChromaDB host for vector memory. Docker overrides this to `chromadb`. |
+| `CHROMADB_PORT` | `8100` | ChromaDB port for manual host runs. Docker overrides this to `8000`. |
+| `EMBEDDING_URL` | -- | OpenAI-compatible embeddings endpoint |
+| `ODYSSEUS_CHAT_UPLOAD_MAX_BYTES` | `10485760` | Chat/agent attachment cap in bytes. Raise for larger local PDFs or text documents. |
+| `ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES` | `104857600` | Gallery image upload cap in bytes (100 MB). |
+| `ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES` | `26214400` | Gallery transform input cap in bytes (25 MB). |
+| `ODYSSEUS_MEMORY_IMPORT_MAX_BYTES` | `10485760` | Memory import file cap in bytes (10 MB). |
+| `ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES` | `26214400` | Personal document upload cap in bytes (25 MB). |
+| `ODYSSEUS_EMAIL_COMPOSE_UPLOAD_MAX_BYTES` | `26214400` | Email compose attachment cap in bytes (25 MB). |
+| `ODYSSEUS_STT_MAX_AUDIO_BYTES` | `26214400` | Speech-to-text audio cap in bytes (25 MB). |
+| `ODYSSEUS_ICS_MAX_BYTES` | `10485760` | Calendar `.ics` import cap in bytes (10 MB). |
+
+All upload-limit vars are validated (must be a positive integer) and optional; an invalid value fails fast at startup.
+
+### Built-in MCP servers (optional setup)
+
+Odysseus auto-registers a few built-in MCP servers at startup. The npx-based ones (currently the browser server, `@playwright/mcp`) only start when their npm package is already in the local npx cache. If a package isn't cached, that server is skipped with a startup log message explaining what to do, so a fresh install does not block on a multi-minute npm download or hang if Playwright system deps are missing.
+
+To enable the browser MCP (page navigation, screenshots, vision), run once:
+
+```bash
+npx -y @playwright/mcp@latest --version
+```
+
+That installs `@playwright/mcp` plus Playwright (~300MB total). Restart Odysseus and the server will register at startup.
+
+## Architecture
+```
+app.py                   # FastAPI entry point
+core/      auth, database, middleware, constants
+src/       llm_core, agent_loop, agent_tools, chat_processor, search/
+routes/    chat, session, document, memory, model … endpoints
+services/  docs, memory, search, hwfit (Cookbook) …
+static/    index.html + app.js + style.css + js/ (modular front-end)
+docs/      landing page (index.html) + preview clips
+```
+
+## Data
+All user data lives in `data/` (gitignored): `app.db` (sessions, messages, documents),
+`memory.json`, `presets.json`, `uploads/`, `personal_docs/`, `chroma/`, `settings.json`.
+
+To back up or restore everything in `data/`, see the
+[Backup & Restore guide](docs/backup-restore.md).
@@ -1,6 +1,6 @@
 ---
 name: odysseus
-description: Use when the user asks Claude Code to read or write Odysseus data (todos, email, calendar, memory, documents) through the scoped Claude Agent API. Requires ODYSSEUS_URL and ODYSSEUS_API_TOKEN.
+description: Use when the user asks Claude Code to read or write Odysseus data (todos, email, calendar, memory, documents) or to launch/monitor/stop a Cookbook model-serve task through the scoped Claude Agent API. Requires ODYSSEUS_URL and ODYSSEUS_API_TOKEN.
 ---

 # Odysseus
@@ -102,9 +102,53 @@ python3 ~/.claude/skills/odysseus/scripts/odysseus_api.py POST /api/codex/memory

 ## Email draft + send

+- Prefer `POST /api/codex/emails/draft-document` for agent-written email replies. It creates an editable Odysseus Document with `language: "email"` and does not touch IMAP/send.
 - `POST /api/codex/emails/draft` — body matches `SendEmailRequest` (`to`, `cc`, `bcc`, `subject`, `body`, `body_html`, `attachments`, `account_id`, `in_reply_to`, `references`). Requires `email:draft` (or `email:send`).
 - `POST /api/codex/emails/send` — same body. Requires `email:send`. Never send without explicit user instruction.

+## Cookbook serve (debug a failing model launch)
+
+The Cookbook surface lets you reproduce what a human would do in Odysseus → Cookbook: read which serves are running, tail their tmux output to see why they crashed, edit the launch command, relaunch, kill a stuck one. Use this when the user is debugging a model server that won't come up (compute-capability errors, OOM, missing kernels, wrong attention backend, etc.).
+
+- `GET /api/codex/cookbook/tasks` — list active serve/download/install tasks (sessionId, type, status, repo_id, remoteHost, payload._cmd). Requires `cookbook:read`.
+- `GET /api/codex/cookbook/servers` — list configured servers (name, host, port, env type + path, model dirs). Requires `cookbook:read`.
+- `GET /api/codex/cookbook/cached?host=<NAME>` — list models already cached on the named server (HF cache + Ollama + extra modelDirs). Call BEFORE `serve` to see what's already on disk. Requires `cookbook:read`.
+- `GET /api/codex/cookbook/presets` — list saved serve presets (model + host + port + cmd). The user's saved preset usually has a working cmd — try `preset NAME` before composing your own. Requires `cookbook:read`.
+- `GET /api/codex/cookbook/output/{session_id}?tail=400` — read the last N lines of the task's persistent log file (preferred) or tmux pane (fallback). The log file persists across vllm crashes, so this returns the actual Python traceback even after the bash prompt + neofetch banner overwrites the pane. Default tail=400. Requires `cookbook:read`.
+- `POST /api/codex/cookbook/serve` — launch a serve task. Body matches `ServeRequest`: `{ repo_id, cmd, remote_host?, ssh_port?, env_prefix?, gpus?, platform? }`. The `cmd` is validated: leading binary must be `vllm`/`python3`/`sglang`/`llama-server`/`ollama`/`node`/`npx`. NEVER prefix with `cd …`, `source …`, or chain with `&&`/`||`/`;`/`$(...)` — the validator rejects shell metacharacters. The venv activation (`env_prefix`) is added automatically from the host's saved settings, so pass the bare binary + args. Requires `cookbook:launch`.
+- `POST /api/codex/cookbook/preset/{name}` — launch a saved preset by name. Reuses the working cmd + host the user already saved. Requires `cookbook:launch`.
+- `POST /api/codex/cookbook/adopt` — register an externally-launched tmux session into cookbook tracking. Body: `{ tmux_session, model, host?, port? }`. Use this when serve_model rejected a cmd and you fell back to direct ssh+tmux — without adoption, the session is invisible to the UI. Requires `cookbook:launch`.
+- `POST /api/codex/cookbook/stop/{session_id}` — kill the tmux session for that task. Requires `cookbook:launch`.
+
+```bash
+# Survey what's running
+python3 ~/.claude/skills/odysseus/scripts/odysseus_api.py cookbook tasks
+
+# Tail the failing one (sessionId from `cookbook tasks`)
+python3 ~/.claude/skills/odysseus/scripts/odysseus_api.py cookbook output serve-abc12345 400
+
+# Stop the previous attempt before you try a new flag set
+python3 ~/.claude/skills/odysseus/scripts/odysseus_api.py cookbook stop serve-abc12345
+
+# Relaunch with new flags. cmd MUST begin with one of the allowlisted binaries.
+python3 ~/.claude/skills/odysseus/scripts/odysseus_api.py cookbook serve \
+  /mnt/HADES/models/Qwen3.5-397B-A17B-AWQ \
+  "vllm serve /mnt/HADES/models/Qwen3.5-397B-A17B-AWQ --host 0.0.0.0 --port 8001 --tensor-parallel-size 8 --max-model-len 262144 --gpu-memory-utilization 0.90 --dtype auto --max-num-seqs 8 --trust-remote-code --enable-expert-parallel --enable-auto-tool-choice --tool-call-parser qwen3_coder --reasoning-parser qwen3" \
+  pewds@192.168.1.12
+```
+
+**Debug loop pattern:** when a serve is failing, the productive sequence is
+
+1. `cookbook tasks` → find the failing sessionId.
+2. `cookbook output SID 600` → read the last 600 lines, find the actual root-cause line (often above the visible tail because tmux scrollback rolled — request a larger `tail` if the error references "above").
+3. `cookbook stop SID` — kill the previous attempt before relaunching; two serves on the same `--port` collide.
+4. `cookbook serve repo "new cmd"` — try the next variation. Wait ~20s, then `cookbook output` on the new sessionId.
+
+**Hard limits this surface enforces:**
+- `cookbook serve` cmd allowlist + shell-metacharacter rejection — you cannot run arbitrary shell, only model-server binaries.
+- `cookbook stop` only targets task sessionIds matching `[a-zA-Z0-9_-]+`.
+- The agent CAN spawn GPU-pinning long-lived processes — always `cookbook stop` your previous attempt before relaunching, and check `cookbook tasks` for collisions on the same `--port` before launching.
+
 ## Forbidden Bypass Pattern

 If you are about to reach the Odysseus host/container, import app internals, query the database, or call MCP helper modules directly, stop. Those paths bypass Odysseus Settings and token scopes. Ask the user to enable the relevant Claude Agent tool toggle instead.
@@ -17,6 +17,20 @@ def _usage() -> int:
    print("  odysseus_api.py todos add TITLE", file=sys.stderr)
    print("  odysseus_api.py emails list [limit]", file=sys.stderr)
    print("  odysseus_api.py emails read UID", file=sys.stderr)
+    print("  odysseus_api.py emails draft-doc JSON_PAYLOAD", file=sys.stderr)
+    print("  odysseus_api.py documents list [limit]", file=sys.stderr)
+    print("  odysseus_api.py documents read DOC_ID", file=sys.stderr)
+    print("  odysseus_api.py documents create JSON_PAYLOAD", file=sys.stderr)
+    print("  odysseus_api.py documents delete DOC_ID", file=sys.stderr)
+    print("  odysseus_api.py cookbook tasks", file=sys.stderr)
+    print("  odysseus_api.py cookbook servers", file=sys.stderr)
+    print("  odysseus_api.py cookbook cached [HOST]", file=sys.stderr)
+    print("  odysseus_api.py cookbook presets", file=sys.stderr)
+    print("  odysseus_api.py cookbook output SESSION_ID [tail]", file=sys.stderr)
+    print("  odysseus_api.py cookbook serve REPO_ID 'CMD' [REMOTE_HOST]", file=sys.stderr)
+    print("  odysseus_api.py cookbook preset NAME", file=sys.stderr)
+    print("  odysseus_api.py cookbook adopt SESSION_ID MODEL [HOST] [PORT]", file=sys.stderr)
+    print("  odysseus_api.py cookbook stop SESSION_ID", file=sys.stderr)
    print("  odysseus_api.py METHOD /api/codex/path [json-body]", file=sys.stderr)
    return 2

@@ -70,6 +84,88 @@ def main() -> int:
            method = "GET"
            path = f"/api/codex/emails/{sys.argv[3]}"
            body = None
+        elif action in ("draft-doc", "draft_document") and len(sys.argv) >= 4:
+            method = "POST"
+            path = "/api/codex/emails/draft-document"
+            body = " ".join(sys.argv[3:])
+        else:
+            return _usage()
+    elif command in ("documents", "docs"):
+        if len(sys.argv) < 3:
+            return _usage()
+        action = sys.argv[2].lower()
+        if action == "list":
+            method = "GET"
+            limit = sys.argv[3] if len(sys.argv) >= 4 else "50"
+            path = f"/api/codex/documents?limit={limit}"
+            body = None
+        elif action == "read" and len(sys.argv) >= 4:
+            method = "GET"
+            path = f"/api/codex/documents/{sys.argv[3]}"
+            body = None
+        elif action == "create" and len(sys.argv) >= 4:
+            method = "POST"
+            path = "/api/codex/documents"
+            body = " ".join(sys.argv[3:])
+        elif action == "delete" and len(sys.argv) >= 4:
+            method = "DELETE"
+            path = f"/api/codex/documents/{sys.argv[3]}"
+            body = None
+        else:
+            return _usage()
+    elif command == "cookbook":
+        if len(sys.argv) < 3:
+            return _usage()
+        action = sys.argv[2].lower()
+        if action == "tasks":
+            method = "GET"
+            path = "/api/codex/cookbook/tasks"
+            body = None
+        elif action == "servers":
+            method = "GET"
+            path = "/api/codex/cookbook/servers"
+            body = None
+        elif action == "output" and len(sys.argv) >= 4:
+            method = "GET"
+            sid = sys.argv[3]
+            tail = sys.argv[4] if len(sys.argv) >= 5 else "400"
+            path = f"/api/codex/cookbook/output/{sid}?tail={tail}"
+            body = None
+        elif action == "cached":
+            method = "GET"
+            if len(sys.argv) >= 4:
+                from urllib.parse import quote
+                path = f"/api/codex/cookbook/cached?host={quote(sys.argv[3])}"
+            else:
+                path = "/api/codex/cookbook/cached"
+            body = None
+        elif action == "presets":
+            method = "GET"
+            path = "/api/codex/cookbook/presets"
+            body = None
+        elif action == "preset" and len(sys.argv) >= 4:
+            from urllib.parse import quote
+            method = "POST"
+            path = f"/api/codex/cookbook/preset/{quote(sys.argv[3])}"
+            body = None
+        elif action == "adopt" and len(sys.argv) >= 5:
+            method = "POST"
+            path = "/api/codex/cookbook/adopt"
+            payload = {"tmux_session": sys.argv[3], "model": sys.argv[4]}
+            if len(sys.argv) >= 6: payload["host"] = sys.argv[5]
+            if len(sys.argv) >= 7: payload["port"] = int(sys.argv[6])
+            body = json.dumps(payload)
+        elif action == "serve" and len(sys.argv) >= 5:
+            method = "POST"
+            path = "/api/codex/cookbook/serve"
+            payload = {"repo_id": sys.argv[3], "cmd": sys.argv[4]}
+            if len(sys.argv) >= 6:
+                payload["remote_host"] = sys.argv[5]
+            body = json.dumps(payload)
+        elif action == "stop" and len(sys.argv) >= 4:
+            method = "POST"
+            path = f"/api/codex/cookbook/stop/{sys.argv[3]}"
+            body = None
        else:
            return _usage()
    else:
@@ -17,6 +17,20 @@ def _usage() -> int:
    print("  odysseus_api.py todos add TITLE", file=sys.stderr)
    print("  odysseus_api.py emails list [limit]", file=sys.stderr)
    print("  odysseus_api.py emails read UID", file=sys.stderr)
+    print("  odysseus_api.py emails draft-doc JSON_PAYLOAD", file=sys.stderr)
+    print("  odysseus_api.py documents list [limit]", file=sys.stderr)
+    print("  odysseus_api.py documents read DOC_ID", file=sys.stderr)
+    print("  odysseus_api.py documents create JSON_PAYLOAD", file=sys.stderr)
+    print("  odysseus_api.py documents delete DOC_ID", file=sys.stderr)
+    print("  odysseus_api.py cookbook tasks", file=sys.stderr)
+    print("  odysseus_api.py cookbook servers", file=sys.stderr)
+    print("  odysseus_api.py cookbook cached [HOST]", file=sys.stderr)
+    print("  odysseus_api.py cookbook presets", file=sys.stderr)
+    print("  odysseus_api.py cookbook output SESSION_ID [tail]", file=sys.stderr)
+    print("  odysseus_api.py cookbook serve REPO_ID 'CMD' [REMOTE_HOST]", file=sys.stderr)
+    print("  odysseus_api.py cookbook preset NAME", file=sys.stderr)
+    print("  odysseus_api.py cookbook adopt SESSION_ID MODEL [HOST] [PORT]", file=sys.stderr)
+    print("  odysseus_api.py cookbook stop SESSION_ID", file=sys.stderr)
    print("  odysseus_api.py METHOD /api/codex/path [json-body]", file=sys.stderr)
    return 2

@@ -70,6 +84,88 @@ def main() -> int:
            method = "GET"
            path = f"/api/codex/emails/{sys.argv[3]}"
            body = None
+        elif action in ("draft-doc", "draft_document") and len(sys.argv) >= 4:
+            method = "POST"
+            path = "/api/codex/emails/draft-document"
+            body = " ".join(sys.argv[3:])
+        else:
+            return _usage()
+    elif command in ("documents", "docs"):
+        if len(sys.argv) < 3:
+            return _usage()
+        action = sys.argv[2].lower()
+        if action == "list":
+            method = "GET"
+            limit = sys.argv[3] if len(sys.argv) >= 4 else "50"
+            path = f"/api/codex/documents?limit={limit}"
+            body = None
+        elif action == "read" and len(sys.argv) >= 4:
+            method = "GET"
+            path = f"/api/codex/documents/{sys.argv[3]}"
+            body = None
+        elif action == "create" and len(sys.argv) >= 4:
+            method = "POST"
+            path = "/api/codex/documents"
+            body = " ".join(sys.argv[3:])
+        elif action == "delete" and len(sys.argv) >= 4:
+            method = "DELETE"
+            path = f"/api/codex/documents/{sys.argv[3]}"
+            body = None
+        else:
+            return _usage()
+    elif command == "cookbook":
+        if len(sys.argv) < 3:
+            return _usage()
+        action = sys.argv[2].lower()
+        if action == "tasks":
+            method = "GET"
+            path = "/api/codex/cookbook/tasks"
+            body = None
+        elif action == "servers":
+            method = "GET"
+            path = "/api/codex/cookbook/servers"
+            body = None
+        elif action == "output" and len(sys.argv) >= 4:
+            method = "GET"
+            sid = sys.argv[3]
+            tail = sys.argv[4] if len(sys.argv) >= 5 else "400"
+            path = f"/api/codex/cookbook/output/{sid}?tail={tail}"
+            body = None
+        elif action == "cached":
+            method = "GET"
+            if len(sys.argv) >= 4:
+                from urllib.parse import quote
+                path = f"/api/codex/cookbook/cached?host={quote(sys.argv[3])}"
+            else:
+                path = "/api/codex/cookbook/cached"
+            body = None
+        elif action == "presets":
+            method = "GET"
+            path = "/api/codex/cookbook/presets"
+            body = None
+        elif action == "preset" and len(sys.argv) >= 4:
+            from urllib.parse import quote
+            method = "POST"
+            path = f"/api/codex/cookbook/preset/{quote(sys.argv[3])}"
+            body = None
+        elif action == "adopt" and len(sys.argv) >= 5:
+            method = "POST"
+            path = "/api/codex/cookbook/adopt"
+            payload = {"tmux_session": sys.argv[3], "model": sys.argv[4]}
+            if len(sys.argv) >= 6: payload["host"] = sys.argv[5]
+            if len(sys.argv) >= 7: payload["port"] = int(sys.argv[6])
+            body = json.dumps(payload)
+        elif action == "serve" and len(sys.argv) >= 5:
+            method = "POST"
+            path = "/api/codex/cookbook/serve"
+            payload = {"repo_id": sys.argv[3], "cmd": sys.argv[4]}
+            if len(sys.argv) >= 6:
+                payload["remote_host"] = sys.argv[5]
+            body = json.dumps(payload)
+        elif action == "stop" and len(sys.argv) >= 4:
+            method = "POST"
+            path = f"/api/codex/cookbook/stop/{sys.argv[3]}"
+            body = None
        else:
            return _usage()
    else:
@@ -1,6 +1,6 @@
 ---
 name: odysseus
-description: Use when the user asks Codex to read or write Odysseus data from a terminal Codex session through the scoped Codex Agent API. Requires ODYSSEUS_URL and ODYSSEUS_API_TOKEN.
+description: Use when the user asks Codex to read or write Odysseus data (todos, email, calendar, memory, documents) or to launch/monitor/stop a Cookbook model-serve task through the scoped Codex Agent API. Requires ODYSSEUS_URL and ODYSSEUS_API_TOKEN.
 ---

 # Odysseus
@@ -102,9 +102,41 @@ python3 integrations/codex/scripts/odysseus_api.py POST /api/codex/memory '{"tex

 ## Email draft + send

+- Prefer `POST /api/codex/emails/draft-document` for Codex-written email replies. It creates an editable Odysseus Document with `language: "email"` and does not touch IMAP/send.
 - `POST /api/codex/emails/draft` — body matches `SendEmailRequest` (`to`, `cc`, `bcc`, `subject`, `body`, `body_html`, `attachments`, `account_id`, `in_reply_to`, `references`). Requires `email:draft` (or `email:send`).
 - `POST /api/codex/emails/send` — same body. Requires `email:send`. Never send without explicit user instruction.

+## Cookbook serve (debug a failing model launch)
+
+The Cookbook surface lets you reproduce what a human would do in Odysseus → Cookbook: read which serves are running, tail their tmux output to see why they crashed, edit the launch command, relaunch, kill a stuck one. Use this when the user is debugging a model server that won't come up (compute-capability errors, OOM, missing kernels, wrong attention backend, etc.).
+
+- `GET /api/codex/cookbook/tasks` — list active serve/download/install tasks (sessionId, type, status, repo_id, remoteHost, payload._cmd). Requires `cookbook:read`.
+- `GET /api/codex/cookbook/servers` — list configured servers (name, host, port, env type + path, model dirs). Requires `cookbook:read`.
+- `GET /api/codex/cookbook/cached?host=<NAME>` — list models already cached on the named server (HF cache + Ollama + extra modelDirs). Call BEFORE `serve` to see what's already on disk. Requires `cookbook:read`.
+- `GET /api/codex/cookbook/presets` — list saved serve presets (model + host + port + cmd). The user's saved preset usually has a working cmd — try `preset NAME` before composing your own. Requires `cookbook:read`.
+- `GET /api/codex/cookbook/output/{session_id}?tail=400` — read the last N lines of the task's persistent log file (preferred) or tmux pane (fallback). The log file persists across vllm crashes, so this returns the actual Python traceback even after the bash prompt + neofetch banner overwrites the pane. Default tail=400. Requires `cookbook:read`.
+- `POST /api/codex/cookbook/serve` — launch a serve task. Body matches `ServeRequest`: `{ repo_id, cmd, remote_host?, ssh_port?, env_prefix?, gpus?, platform? }`. The `cmd` is validated: leading binary must be `vllm`/`python3`/`sglang`/`llama-server`/`ollama`/`node`/`npx`. NEVER prefix with `cd …`, `source …`, or chain with `&&`/`||`/`;`/`$(...)` — the validator rejects shell metacharacters. The venv activation (`env_prefix`) is added automatically from the host's saved settings, so pass the bare binary + args. Requires `cookbook:launch`.
+- `POST /api/codex/cookbook/preset/{name}` — launch a saved preset by name. Reuses the working cmd + host the user already saved. Requires `cookbook:launch`.
+- `POST /api/codex/cookbook/adopt` — register an externally-launched tmux session into cookbook tracking. Body: `{ tmux_session, model, host?, port? }`. Use this when serve_model rejected a cmd and you fell back to direct ssh+tmux — without adoption, the session is invisible to the UI. Requires `cookbook:launch`.
+- `POST /api/codex/cookbook/stop/{session_id}` — kill the tmux session. Requires `cookbook:launch`.
+
+```bash
+python3 ~/plugins/odysseus/scripts/odysseus_api.py cookbook tasks
+python3 ~/plugins/odysseus/scripts/odysseus_api.py cookbook output serve-abc12345 400
+python3 ~/plugins/odysseus/scripts/odysseus_api.py cookbook stop serve-abc12345
+python3 ~/plugins/odysseus/scripts/odysseus_api.py cookbook serve \
+  /mnt/HADES/models/Qwen3.5-397B-A17B-AWQ \
+  "vllm serve /mnt/HADES/models/Qwen3.5-397B-A17B-AWQ --host 0.0.0.0 --port 8001 --tensor-parallel-size 8 --max-model-len 262144 --gpu-memory-utilization 0.90 --dtype auto --max-num-seqs 8 --trust-remote-code --enable-expert-parallel --enable-auto-tool-choice --tool-call-parser qwen3_coder --reasoning-parser qwen3" \
+  pewds@192.168.1.12
+```
+
+**Debug loop pattern:** `tasks` → `output SID 600` (find root cause; request larger `tail` if it references "above") → `stop SID` → `serve repo "new cmd"` → wait ~20s → `output` on the new sessionId.
+
+**Hard limits this surface enforces:**
+- `cookbook serve` cmd allowlist + shell-metacharacter rejection.
+- `cookbook stop` requires sessionIds matching `[a-zA-Z0-9_-]+`.
+- Agent CAN spawn GPU-pinning long-lived processes — always `cookbook stop` your previous attempt before relaunching.
+
 ## Forbidden Bypass Pattern

 If you are about to reach the Odysseus host/container, import app internals, query the database, or call MCP helper modules directly, stop. Those paths bypass Odysseus Settings and token scopes. Ask the user to enable the relevant Codex Agent tool toggle instead.
@@ -30,14 +30,26 @@ function Fail($msg) {
    exit 1
 }

+function Test-WindowsBashStub($path) {
+    if (-not $path) { return $false }
+    $lowered = $path.ToLowerInvariant()
+    foreach ($stub in @("system32\bash.exe", "sysnative\bash.exe", "windowsapps\bash.exe")) {
+        if ($lowered.Contains($stub)) { return $true }
+    }
+    return $false
+}
+
 function Find-GitBash {
    $cmd = Get-Command bash -ErrorAction SilentlyContinue
-    if ($cmd) { return $cmd.Source }
+    if ($cmd -and -not (Test-WindowsBashStub $cmd.Source)) { return $cmd.Source }

    $roots = @()
    foreach ($name in @("ProgramFiles", "ProgramW6432", "ProgramFiles(x86)", "LocalAppData")) {
        $base = [Environment]::GetEnvironmentVariable($name)
-        if ($base) { $roots += (Join-Path $base "Git") }
+        if ($base) {
+            $roots += (Join-Path $base "Git")
+            if ($name -eq "LocalAppData") { $roots += (Join-Path $base "Programs\Git") }
+        }
    }
    $roots += @("C:\Program Files\Git", "C:\Program Files (x86)\Git")

@@ -129,7 +141,20 @@ if (-not (Find-GitBash)) {
    Write-Host "      https://git-scm.com/download/win" -ForegroundColor Yellow
 }

-# 6. Start the server (use `python -m uvicorn` - bare `uvicorn` may not be on PATH)
+# 6. Point CUDA_PATH at a real CUDA toolkit so GPU llama-cpp-python can import.
+$cudaBase = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA"
+if (Test-Path $cudaBase) {
+    $cudaBest = Get-ChildItem $cudaBase -Directory -ErrorAction SilentlyContinue |
+        Where-Object { Test-Path (Join-Path $_.FullName "bin") } |
+        Sort-Object { try { [version]($_.Name -replace "^v", "") } catch { [version]"0.0" } } -Descending |
+        Select-Object -First 1
+    if ($cudaBest) {
+        $env:CUDA_PATH = $cudaBest.FullName
+        Write-Host ("Using CUDA_PATH = " + $cudaBest.FullName) -ForegroundColor Cyan
+    }
+}
+
+# 7. Start the server (use `python -m uvicorn` - bare `uvicorn` may not be on PATH)
 Write-Step ("Starting Odysseus at http://{0}:{1}" -f $BindHost, $Port)
 Write-Host "Press Ctrl+C to stop."
 Write-Host ""
@@ -1,22 +0,0 @@
-"""
-_common.py
-
-Shared constants and helpers for built-in MCP servers.
-"""
-
-MAX_OUTPUT_CHARS = 10_000
-MAX_READ_CHARS = 20_000
-SHELL_TIMEOUT = 60
-PYTHON_TIMEOUT = 30
-SEARCH_TIMEOUT = 30
-
-
-def truncate(text: str, limit: int = MAX_OUTPUT_CHARS) -> str:
-    """Truncate text to *limit* characters with a suffix note."""
-    if not isinstance(text, str):
-        # Tool output is occasionally None or a non-string; len(None) would
-        # raise. Coerce so this shared helper never crashes a tool response.
-        text = "" if text is None else str(text)
-    if len(text) > limit:
-        return text[:limit] + f"\n... (truncated, {len(text)} chars total)"
-    return text
@@ -16,6 +16,8 @@ from mcp.types import Tool, TextContent

 sys.path.insert(0, str(Path(__file__).resolve().parent.parent))

+from src.constants import GENERATED_IMAGES_DIR
+
 server = Server("image_gen")


@@ -115,14 +117,18 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:

            img = images[0]
            image_url = None
+            # Prefix the instance's public base URL (existing app_public_url setting) so the
+            # link is fully-qualified and clickable when the model echoes it. Empty = relative
+            # same-origin path (unchanged default).
+            _pub_base = (get_setting("app_public_url", "") or "").rstrip("/")

            if img.get("b64_json"):
-                img_dir = Path("data/generated_images")
+                img_dir = Path(GENERATED_IMAGES_DIR)
                img_dir.mkdir(parents=True, exist_ok=True)
                filename = f"{uuid.uuid4().hex[:12]}.png"
                img_path = img_dir / filename
                img_path.write_bytes(base64.b64decode(img["b64_json"]))
-                image_url = f"/api/generated-image/{filename}"
+                image_url = f"{_pub_base}/api/generated-image/{filename}"

                # Save to gallery
                try:
@@ -146,7 +152,13 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
            else:
                return [TextContent(type="text", text="Error: Unexpected image API response format")]

-            result = f"Generated image for: {prompt[:100]}\nimage_url: {image_url}\nmodel: {model_id}\nsize: {size}"
+            # "Direct link:" rather than an "image_url:" label — small models copied the
+            # label token ("image_url") into the link href, producing a broken link.
+            result = (
+                f"Generated image for: {prompt[:100]}\n"
+                f"Direct link: {image_url}\n"
+                f"model: {model_id}\nsize: {size}"
+            )
            return [TextContent(type="text", text=result)]

    except httpx.TimeoutException:
@@ -93,16 +93,15 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
            if category_filter:
                msg += f" in category '{category_filter}'"
            return [TextContent(type="text", text=msg + ".")]
+
        lines = [f"Found {len(memories)} memory entries:\n"]
-        for m in memories[:100]:
+        for m in memories:
            cat = m.get("category", "fact")
            mid = m.get("id", "?")[:8]
            text = m.get("text", "")
            if len(text) > 150:
                text = text[:150] + "..."
            lines.append(f"- [{cat}] `{mid}` — {text}")
-        if len(memories) > 100:
-            lines.append(f"... and {len(memories) - 100} more")
        return [TextContent(type="text", text="\n".join(lines))]

    elif action == "add":
@@ -1,20 +1,20 @@
 {
-  "name": "odysseus-ui",
+  "name": "odysseus",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "dependencies": {
-        "@anthropic-ai/sdk": "^0.98.0"
+        "@anthropic-ai/sdk": "^0.104.1"
      },
      "devDependencies": {
-        "@antithesishq/bombadil": "^0.3.2"
+        "@antithesishq/bombadil": "^0.5.0"
      }
    },
    "node_modules/@anthropic-ai/sdk": {
-      "version": "0.98.0",
-      "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.98.0.tgz",
-      "integrity": "sha512-N7aXtCvC5g6T1Y4V29lJjceu/zTkVkIZF0jdBvagr0TRFHuKeImffalGWEfqZKrvjH+IQbzJWw6TmSmUzrlMgg==",
+      "version": "0.104.1",
+      "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.104.1.tgz",
+      "integrity": "sha512-gGACa/+IaiXzRRmF96aOhamoBgapKRBiFWbmmTFP8aMkpaEcuStF+Q61bjo4vPxBM7gqWJNZqsngslRdnLHv0Q==",
      "license": "MIT",
      "dependencies": {
        "json-schema-to-ts": "^3.1.1",
@@ -33,11 +33,14 @@
      }
    },
    "node_modules/@antithesishq/bombadil": {
-      "version": "0.3.2",
-      "resolved": "https://registry.npmjs.org/@antithesishq/bombadil/-/bombadil-0.3.2.tgz",
-      "integrity": "sha512-ATy1w9ZY5gbny1H8DFc7rxZitT7DLLLFDiGcRZe+8TQiUrV5tLO+IJGOVNNLp3RpCqjZqSsxGiKoQsx31ipV1g==",
+      "version": "0.5.0",
+      "resolved": "https://registry.npmjs.org/@antithesishq/bombadil/-/bombadil-0.5.0.tgz",
+      "integrity": "sha512-s0zImmr0iyvSP6QcVLvf40CUiZYIdWBAxiq20uhzujwvfitYa3PGJN652k/pLtVccHM/JrGQxZdvLnihZpltHA==",
      "dev": true,
-      "license": "MIT"
+      "license": "MIT",
+      "bin": {
+        "bombadil": "bin/bombadil.js"
+      }
    },
    "node_modules/@babel/runtime": {
      "version": "7.29.7",
@@ -4,9 +4,9 @@
    "url": "https://github.com/pewdiepie-archdaemon/odysseus.git"
  },
  "devDependencies": {
-    "@antithesishq/bombadil": "^0.3.2"
+    "@antithesishq/bombadil": "^0.5.0"
  },
  "dependencies": {
-    "@anthropic-ai/sdk": "^0.98.0"
+    "@anthropic-ai/sdk": "^0.104.1"
  }
 }
@@ -1,3 +1,22 @@
 [tool.pytest.ini_options]
 testpaths = ["tests"]
 asyncio_mode = "auto"
+# Test-taxonomy markers added at collection time by tests/conftest.py. The
+# stable area_* markers are declared here; the dynamic sub_<filename-token>
+# markers are registered before collection by pytest_configure in
+# tests/conftest.py, so unknown-mark warnings still flag genuine typos outside
+# the taxonomy. See tests/_taxonomy.py and tests/README.md.
+markers = [
+    "area_security: tests covering auth, owner-scope, SSRF, XSS, confinement, redaction",
+    "area_routes: tests covering HTTP route / API behavior",
+    "area_services: tests covering service-layer behavior (llm, cookbook, email, calendar, ...)",
+    "area_cli: tests covering CLI / script behavior",
+    "area_js: JavaScript / Node-backed tests",
+    "area_helpers: self-tests for the shared test helpers in tests/helpers/",
+    "area_unit: pure parser / utility tests that do not clearly belong elsewhere",
+    "area_uncategorized: tests not yet matched by the taxonomy (fallback)",
+    # Fast-lane marker (issue #3443). Opt-in and orthogonal to the area_*/sub_*
+    # taxonomy. The fast lane runs `not slow`; mark a test slow only with
+    # duration evidence (see tests/run_focus.py --durations and tests/README.md).
+    "slow: opt-in marker for known-slow tests; excluded by the fast lane (not slow)",
+]
@@ -15,7 +15,7 @@ faster-whisper
 # DuckDuckGo as a search provider option.
 # Install if you want DDG in the search-provider dropdown.
 # Alternatives: SearXNG, Brave, Tavily, Serper, Google PSE.
-duckduckgo-search
+ddgs

 # PDF form-filling feature (fillable AcroForm detection, field extraction,
 # value/annotation/signature stamping, page rendering for the form overlay).
@@ -33,4 +33,4 @@ PyMuPDF
 # magika (onnxruntime), already a core dep via fastembed. We avoid the
 # [all]/Azure/audio extras (cloud + heavy). Pinned to a release >30 days old per
 # the dependency-age discussion in issue #485.
-markitdown[docx,pptx,xlsx,xls]==0.1.5
+markitdown[docx,pptx,xlsx,xls]==0.1.6
@@ -3,8 +3,8 @@ uvicorn
 python-multipart
 python-dotenv
 httpx
-pydantic>=2.0
-pydantic-settings>=2.0
+pydantic>=2.13.4
+pydantic-settings>=2.14.1
 SQLAlchemy
 pypdf
 beautifulsoup4
@@ -43,3 +43,7 @@ qrcode[pil]
 croniter
 pytest
 pytest-asyncio
+# starlette.testclient prefers httpx2 since Starlette 1.2.0 and warns on every
+# TestClient import when only classic httpx is present. Runtime code keeps
+# using `httpx` above; this is test-client only.
+httpx2
@@ -0,0 +1,31 @@
+import re
+
+from fastapi import HTTPException
+
+
+_REMOTE_HOST_RE = re.compile(
+    r"^(?:[A-Za-z0-9][A-Za-z0-9._-]*@)?[A-Za-z0-9][A-Za-z0-9._-]*$"
+)
+_SSH_PORT_RE = re.compile(r"^\d{1,5}$")
+
+
+def validate_remote_host(v: str | None) -> str | None:
+    if v is None or v == "":
+        return None
+    if not _REMOTE_HOST_RE.match(v):
+        raise HTTPException(
+            400,
+            "Invalid remote_host — must be host or user@host, no SSH option syntax",
+        )
+    return v
+
+
+def validate_ssh_port(v: str | None) -> str | None:
+    if v is None or v == "":
+        return None
+    if not _SSH_PORT_RE.fullmatch(str(v)):
+        raise HTTPException(400, "Invalid ssh_port")
+    port = int(v)
+    if port < 1 or port > 65535:
+        raise HTTPException(400, "Invalid ssh_port")
+    return str(port)
@@ -31,7 +31,7 @@ from core.database import (
    CalendarEvent,
    CalendarCal,
 )
-from src.constants import DATA_DIR
+from src.constants import DATA_DIR, SKILLS_DIR, SKILLS_FILE, GALLERY_DIR, GALLERY_UPLOADS_DIR

 logger = logging.getLogger(__name__)

@@ -107,7 +107,7 @@ def setup_admin_wipe_routes(session_manager):
                # Skills live as SKILL.md files under data/skills/. Drop
                # the entire directory; the SkillsManager re-creates the
                # tree on next write.
-                skills_dir = os.path.join(DATA_DIR, "skills")
+                skills_dir = SKILLS_DIR
                count = 0
                if os.path.isdir(skills_dir):
                    # Count SKILL.md files for the response — quick walk.
@@ -115,7 +115,7 @@ def setup_admin_wipe_routes(session_manager):
                        count += sum(1 for f in files if f == "SKILL.md")
                    _rmtree_quiet(skills_dir)
                # Legacy fallback file
-                legacy = os.path.join(DATA_DIR, "skills.json")
+                legacy = SKILLS_FILE
                if os.path.exists(legacy):
                    try:
                        os.remove(legacy)
@@ -151,8 +151,8 @@ def setup_admin_wipe_routes(session_manager):
                db.query(GalleryAlbum).delete()
                db.commit()
                # Also drop the upload dir so disk doesn't keep orphans.
-                _rmtree_quiet(os.path.join(DATA_DIR, "gallery"))
-                _rmtree_quiet(os.path.join(DATA_DIR, "gallery_uploads"))
+                _rmtree_quiet(GALLERY_DIR)
+                _rmtree_quiet(GALLERY_UPLOADS_DIR)
                return {"status": "deleted", "kind": kind, "count": count}

            if kind == "calendar":
@@ -25,10 +25,13 @@ ALLOWED_SCOPES = {
    "calendar:write",
    "memory:read",
    "memory:write",
+    "cookbook:read",
+    "cookbook:launch",
 }
 TOKEN_PROFILES = {
    "chat": ["chat"],
    "codex_todos": ["todos:read", "todos:write"],
+    "codex_documents": ["documents:read", "documents:write"],
    "codex_email_drafts": ["email:read", "email:draft", "documents:read", "documents:write"],
 }

@@ -65,6 +68,7 @@ def _normalize_scopes(scopes: str | list[str] | None = None, profile: str | None
    ensure_before("calendar:write", "calendar:read")
    ensure_before("memory:write", "memory:read")
    ensure_before("email:draft", "email:read")
+    ensure_before("cookbook:launch", "cookbook:read")

    return normalized or [DEFAULT_SCOPES]

@@ -151,26 +155,37 @@ def setup_api_token_routes() -> APIRouter:
    @router.patch("/tokens/{token_id}")
    async def update_token(request: Request, token_id: str):
        require_admin(request)
+        current_user = get_current_user(request)
        try:
            payload = await request.json()
        except Exception:
            payload = {}
-        scope_list = _normalize_scopes(payload.get("scopes"))
-        scopes_value = ",".join(scope_list)
        with get_db_session() as db:
            token = db.query(ApiToken).filter(ApiToken.id == token_id).first()
            if not token:
                raise HTTPException(404, "Token not found")
+            if current_user and token.owner != current_user:
+                raise HTTPException(403, "Not your token")
            if isinstance(payload.get("name"), str) and payload["name"].strip():
                token.name = payload["name"].strip()[:MAX_NAME_LEN]
-            token.scopes = scopes_value
+            # Only touch scopes when the caller actually sent them. A partial
+            # update such as a rename ({"name": ...} with no "scopes" key) must
+            # not silently reset the token to the default scope — that dropped
+            # every previously granted scope.
+            if "scopes" in payload:
+                token.scopes = ",".join(_normalize_scopes(payload.get("scopes")))
            db.add(token)
+            current_scopes = [
+                s.strip()
+                for s in (getattr(token, "scopes", "") or DEFAULT_SCOPES).split(",")
+                if s.strip()
+            ]
            response = {
                "id": token_id,
                "name": getattr(token, "name", ""),
                "owner": getattr(token, "owner", None),
                "token_prefix": getattr(token, "token_prefix", ""),
-                "scopes": scope_list,
+                "scopes": current_scopes,
            }
        _invalidate_cache(request)
        return response
@@ -178,10 +193,14 @@ def setup_api_token_routes() -> APIRouter:
    @router.delete("/tokens/{token_id}")
    def delete_token(request: Request, token_id: str):
        require_admin(request)
+        current_user = get_current_user(request)
        with get_db_session() as db:
-            deleted = db.query(ApiToken).filter(ApiToken.id == token_id).delete()
-            if not deleted:
+            token = db.query(ApiToken).filter(ApiToken.id == token_id).first()
+            if not token:
                raise HTTPException(404, "Token not found")
+            if current_user and token.owner != current_user:
+                raise HTTPException(403, "Not your token")
+            db.delete(token)
        _invalidate_cache(request)
        return {"status": "deleted"}

@@ -7,7 +7,13 @@ import asyncio
 import logging
 import os

-from core.auth import AuthManager
+import json
+import re
+from pathlib import Path
+
+from core.atomic_io import atomic_write_json, atomic_write_text
+from core.auth import AuthManager, SetAdminResult
+from src.constants import DEEP_RESEARCH_DIR, MEMORY_FILE, SKILLS_DIR
 from src.rate_limiter import RateLimiter
 from src.settings_scrub import scrub_settings
 from src.settings import (
@@ -67,6 +73,11 @@ class DeleteUserRequest(BaseModel):
 class RenameUserRequest(BaseModel):
    username: str

+
+class SetAdminRequest(BaseModel):
+    is_admin: bool
+
+
 class SetOpenRegistrationRequest(BaseModel):
    enabled: bool

@@ -131,10 +142,8 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
                return {"ok": False, "requires_totp": True, "username": username}
            if not auth_manager.totp_verify(username, body.totp_code):
                raise HTTPException(401, "Invalid 2FA code")
-        # All checks passed — create session
-        token = await asyncio.to_thread(auth_manager.create_session, username, body.password)
-        if not token:
-            raise HTTPException(401, "Invalid credentials")
+        # All checks passed — create session (password already verified above)
+        token = await asyncio.to_thread(auth_manager.create_session_trusted, username)
        cookie_kwargs = dict(
            key=SESSION_COOKIE,
            value=token,
@@ -293,9 +302,30 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
        if new_username in auth_manager.users:
            raise HTTPException(409, "Username already taken")

+        # Gate on auth first. Every mutation below is contingent on this
+        # succeeding — doing it last meant a rejected rename (e.g. reserved
+        # username) left file-backed owner fields already rewritten with no
+        # way to roll them back.
+        ok = auth_manager.rename_user(old_username, new_username, user)
+        if not ok:
+            raise HTTPException(400, "Cannot rename user")
+
+        def _rollback_auth_rename() -> bool:
+            # On self-rename the admin session has already moved to the new
+            # username, so the rollback must authenticate as the new user.
+            rollback_user = new_username if user == old_username else user
+            try:
+                return bool(auth_manager.rename_user(new_username, old_username, rollback_user))
+            except Exception as rollback_err:
+                logger.error(
+                    "Failed to roll back auth rename %s -> %s after owner migration failure: %s",
+                    new_username, old_username, rollback_err,
+                )
+                return False
+
        # Usernames are ownership keys for user data. Rename the common
-        # owner-scoped DB rows before changing auth so the account keeps
-        # access to its sessions, docs, email accounts, tasks, etc.
+        # owner-scoped DB rows so the account keeps access to its sessions,
+        # docs, email accounts, tasks, etc.
        try:
            from sqlalchemy import func
            from core.database import Base, SessionLocal
@@ -318,6 +348,11 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
                db.close()
        except Exception as e:
            logger.error("Failed to rename owner references %s -> %s: %s", old_username, new_username, e)
+            if not _rollback_auth_rename():
+                logger.error(
+                    "Auth rename %s -> %s could not be rolled back after owner migration failure",
+                    old_username, new_username,
+                )
            raise HTTPException(500, "Failed to rename user data")

        # Per-user prefs are JSON-backed, not SQL-backed.
@@ -337,9 +372,116 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
        except Exception as e:
            logger.warning("Failed to rename user prefs %s -> %s: %s", old_username, new_username, e)

-        ok = auth_manager.rename_user(old_username, new_username, user)
-        if not ok:
-            raise HTTPException(400, "Cannot rename user")
+        # In-flight deep-research tasks live in the process-local
+        # ResearchHandler registry. They are not covered by the persisted JSON
+        # migration above, but the research routes filter and cancel by this
+        # owner field while the job is running. Do this before sweeping
+        # completed JSON files so a job that finishes during the rename saves
+        # with the new owner or is caught by the disk sweep below.
+        try:
+            rh = getattr(request.app.state, "research_handler", None)
+            rename_owner = getattr(rh, "rename_owner", None)
+            if callable(rename_owner):
+                rename_owner(old_username, new_username)
+        except Exception as e:
+            logger.warning("Failed to rename active research tasks %s -> %s: %s", old_username, new_username, e)
+
+        # deep_research: each completed report is a standalone JSON file with
+        # an `owner` field. research_routes filters by d.get("owner") == user,
+        # so a stale owner makes every report invisible to the renamed user.
+        try:
+            dr_dir = Path(DEEP_RESEARCH_DIR)
+            if dr_dir.is_dir():
+                for p in dr_dir.glob("*.json"):
+                    try:
+                        d = json.loads(p.read_text(encoding="utf-8"))
+                        if str(d.get("owner", "")).strip().lower() == old_username:
+                            d["owner"] = new_username
+                            atomic_write_json(str(p), d)
+                    except Exception as err:
+                        logger.warning("Failed to update research owner in %s: %s", p.name, err)
+        except Exception as e:
+            logger.warning("Failed to rename research owner references %s -> %s: %s", old_username, new_username, e)
+
+        # memory.json: a flat JSON array where each entry carries an `owner`
+        # field. memory_manager.load(owner=user) filters on it, so stale
+        # entries disappear from the memory panel.
+        try:
+            if os.path.isfile(MEMORY_FILE):
+                with open(MEMORY_FILE, encoding="utf-8") as fh:
+                    entries = json.loads(fh.read())
+                if isinstance(entries, list):
+                    changed = False
+                    for entry in entries:
+                        if isinstance(entry, dict) and str(entry.get("owner", "")).strip().lower() == old_username:
+                            entry["owner"] = new_username
+                            changed = True
+                    if changed:
+                        atomic_write_json(MEMORY_FILE, entries)
+        except Exception as e:
+            logger.warning("Failed to rename memory.json owner references %s -> %s: %s", old_username, new_username, e)
+
+        # uploads.json: upload rows use owner metadata for access checks and
+        # owner-prefixed index keys for dedupe. Rename both so attachments keep
+        # resolving after the account username changes.
+        try:
+            upload_handler = getattr(request.app.state, "upload_handler", None)
+            rename_owner = getattr(upload_handler, "rename_owner", None)
+            if callable(rename_owner):
+                rename_owner(old_username, new_username)
+        except Exception as e:
+            logger.warning("Failed to rename upload owner references %s -> %s: %s", old_username, new_username, e)
+
+        # skills: SKILL.md frontmatter carries owner: <username>; the usage
+        # sidecar (_usage.json) keys entries as owner::skill-name. Both must
+        # be updated or the renamed user's Skills panel goes empty.
+        try:
+            skills_root = Path(SKILLS_DIR)
+            if skills_root.is_dir():
+                _owner_re = re.compile(
+                    r'(?m)^(owner:\s*)' + re.escape(old_username) + r'\s*$',
+                    re.IGNORECASE,
+                )
+                for p in skills_root.rglob("SKILL.md"):
+                    try:
+                        text = p.read_text(encoding="utf-8")
+                        new_text = _owner_re.sub(r'\g<1>' + new_username, text)
+                        if new_text != text:
+                            atomic_write_text(str(p), new_text)
+                    except Exception as err:
+                        logger.warning("Failed to update skill owner in %s: %s", p, err)
+                usage_path = skills_root / "_usage.json"
+                if usage_path.is_file():
+                    try:
+                        usage = json.loads(usage_path.read_text(encoding="utf-8"))
+                        if isinstance(usage, dict):
+                            new_usage = {}
+                            changed = False
+                            for k, v in usage.items():
+                                owner_part, sep, skill_part = k.partition("::")
+                                if sep and owner_part.lower() == old_username:
+                                    new_usage[new_username + "::" + skill_part] = v
+                                    changed = True
+                                else:
+                                    new_usage[k] = v
+                            if changed:
+                                atomic_write_json(str(usage_path), new_usage)
+                    except Exception as err:
+                        logger.warning("Failed to update skills usage keys %s -> %s: %s", old_username, new_username, err)
+        except Exception as e:
+            logger.warning("Failed to rename skills owner references %s -> %s: %s", old_username, new_username, e)
+
+        # The in-memory session cache (session_manager.sessions) stores each
+        # session's owner at load time. Without this patch the renamed user's
+        # sessions are invisible on the next /api/sessions call because
+        # get_sessions_for_user does an exact `s.owner == username` comparison
+        # against stale in-memory values.
+        sm = getattr(request.app.state, "session_manager", None)
+        if sm is not None:
+            for sess in list(getattr(sm, "sessions", {}).values()):
+                if str(getattr(sess, "owner", None) or "").strip().lower() == old_username:
+                    sess.owner = new_username
+
        # The owner-rename loop above updated ApiToken.owner in the DB, but the
        # bearer-token cache still maps each token to the OLD owner. Without
        # refreshing it, the renamed user's API tokens resolve to the old (now
@@ -350,6 +492,31 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
            invalidator()
        return {"ok": True, "username": new_username, "renamed_self": old_username == user}

+    @router.put("/users/{username}/admin")
+    async def set_user_admin(username: str, body: SetAdminRequest, request: Request):
+        """Promote/demote a user to/from admin. Admin only.
+
+        The last remaining admin can't be demoted (no lockout). Self-demotion
+        is allowed while another admin exists; the `self` flag tells the UI to
+        reload the acting user into the normal-user view.
+        """
+        user = _get_current_user(request)
+        if not user or not auth_manager.is_admin(user):
+            raise HTTPException(403, "Admin only")
+        result = auth_manager.set_admin(username, body.is_admin, user)
+        if result is SetAdminResult.USER_NOT_FOUND:
+            raise HTTPException(404, "User not found")
+        if result is SetAdminResult.NOT_AUTHORIZED:
+            raise HTTPException(403, "Admin only")
+        if result is SetAdminResult.LAST_ADMIN:
+            raise HTTPException(400, "Cannot demote the last admin")
+        target = (username or "").strip().lower()
+        return {
+            "ok": True,
+            "is_admin": body.is_admin,
+            "self": target == (user or "").strip().lower(),
+        }
+
    @router.post("/signup-toggle", deprecated=True)
    async def toggle_signup(request: Request):
        """
@@ -380,7 +547,23 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
        user = _get_current_user(request)
        if not user or not auth_manager.is_admin(user):
            raise HTTPException(403, "Admin only")
-        ok = auth_manager.delete_user(body.username, user)
+
+        def _invalidate_api_token_cache():
+            try:
+                invalidator = getattr(request.app.state, "invalidate_token_cache", None)
+                if invalidator:
+                    invalidator()
+            except Exception:
+                pass
+
+        try:
+            ok = auth_manager.delete_user(body.username, user)
+        except Exception:
+            # delete_user can touch ApiToken rows before a later auth-store write
+            # fails. Dirty the bearer cache anyway so a partial token purge does
+            # not leave already-cached tokens authenticating until restart.
+            _invalidate_api_token_cache()
+            raise
        if not ok:
            raise HTTPException(400, "Cannot delete user")
        # delete_user removes the user's ApiToken rows, but the bearer-auth
@@ -388,12 +571,7 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
        # rebuilds when flagged dirty. Without this, a deleted user's already
        # cached token keeps authenticating until some other token op or a
        # restart clears the cache. Mirror what the token routes do.
-        try:
-            invalidator = getattr(request.app.state, "invalidate_token_cache", None)
-            if invalidator:
-                invalidator()
-        except Exception:
-            pass
+        _invalidate_api_token_cache()
        return {"ok": True}

    # ---- Feature visibility (admin-managed) ----
@@ -585,6 +763,27 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
                    hint = " If this is Docker Compose ntfy, set NTFY_BIND to that host/Tailscale IP and NTFY_BASE_URL to the same server URL in .env, then recreate ntfy."
                return {"ok": False, "message": f"ntfy publish to {full_url} failed: {e}.{hint}"[:500]}

+        if preset == "discord_webhook":
+            import httpx
+            webhook_url = (integ.get("base_url") or "").strip()
+            if not webhook_url:
+                return {"ok": False, "message": "No webhook URL set — paste the full Discord webhook URL into the Base URL field."}
+            payload = {
+                "embeds": [{
+                    "title": "Odysseus connectivity test",
+                    "description": "If you see this, your Discord Webhook integration is wired up correctly.",
+                    "color": 5793266,
+                }]
+            }
+            try:
+                async with httpx.AsyncClient(timeout=8.0) as client:
+                    r = await client.post(webhook_url, json=payload)
+                if r.is_success:
+                    return {"ok": True, "message": "Test embed sent — check your Discord channel to confirm it arrived."}
+                return {"ok": False, "message": f"Discord returned HTTP {r.status_code}: {r.text[:200]}"}
+            except Exception as e:
+                return {"ok": False, "message": f"Request failed: {e}"[:400]}
+
        # All other presets: GET against a known health endpoint.
        # Fall back to detecting from name if preset is missing.
        health_paths = {
@@ -101,24 +101,74 @@ def setup_backup_routes(memory_manager, preset_manager, skills_manager) -> APIRo
        # ── Skills ──
        if "skills" in body and isinstance(body["skills"], list):
            existing = skills_manager.load_all()
-            existing_ids = {s.get("id") for s in existing}
-            existing_titles = {s.get("title", "").strip().lower() for s in existing}
+            # Dedup against THIS user's own skills only. Using every tenant's
+            # rows (load_all) meant a skill whose id/name/title matched any
+            # other user's was silently skipped, so the importing user lost
+            # their own data — same cross-tenant bug fixed for memories above.
+            # The full store is still saved back below.
+            own = [s for s in existing if s.get("owner") == user]
+            existing_names = {s.get("name") for s in own if s.get("name")}
+            existing_ids = {s.get("id") for s in own if s.get("id")}
+            existing_titles = {
+                (s.get("title") or s.get("description") or "").strip().lower()
+                for s in own
+            }
            added = 0
            for skill in body["skills"]:
-                if not isinstance(skill, dict) or not skill.get("title"):
+                if not isinstance(skill, dict):
                    continue
-                # Skip if same id or same title already exists
-                if skill.get("id") in existing_ids:
+                title = (
+                    skill.get("title") or skill.get("description")
+                    or skill.get("name") or ""
+                ).strip()
+                if not title:
                    continue
-                if skill["title"].strip().lower() in existing_titles:
+                sid = skill.get("id") or skill.get("name")
+                if sid and sid in existing_ids:
                    continue
-                if user and not skill.get("owner"):
-                    skill["owner"] = user
-                existing.append(skill)
-                existing_ids.add(skill.get("id"))
-                existing_titles.add(skill["title"].strip().lower())
+                nm = skill.get("name")
+                if nm and nm in existing_names:
+                    continue
+                if title.lower() in existing_titles:
+                    continue
+                owner = skill.get("owner")
+                if user and not owner:
+                    owner = user
+                # Skills live on disk as SKILL.md files; the old JSON-era
+                # skills_manager.save() no longer exists. Write each new skill
+                # via add_skill (source="user" skips auto-dedup — this is an
+                # explicit backup restore).
+                result = skills_manager.add_skill(
+                    title=title,
+                    name=skill.get("name"),
+                    description=skill.get("description"),
+                    problem=skill.get("problem", ""),
+                    solution=skill.get("solution", ""),
+                    steps=skill.get("steps"),
+                    tags=skill.get("tags"),
+                    source="user",
+                    teacher_model=skill.get("teacher_model"),
+                    confidence=skill.get("confidence", 0.8),
+                    owner=owner,
+                    category=skill.get("category", "general"),
+                    when_to_use=skill.get("when_to_use"),
+                    procedure=skill.get("procedure"),
+                    pitfalls=skill.get("pitfalls"),
+                    verification=skill.get("verification"),
+                    platforms=skill.get("platforms"),
+                    requires_toolsets=skill.get("requires_toolsets"),
+                    fallback_for_toolsets=skill.get("fallback_for_toolsets"),
+                    status=skill.get("status", "draft"),
+                    version=skill.get("version", "1.0.0"),
+                )
+                if result.get("_deduped"):
+                    continue
+                if result.get("name"):
+                    existing_names.add(result["name"])
+                if result.get("id"):
+                    existing_ids.add(result["id"])
+                existing_titles.add(title.lower())
                added += 1
-            skills_manager.save(existing)
            imported.append(f"{added} skills")

        # ── Presets ──
@@ -1,6 +1,7 @@
 """Calendar routes — local SQLite-backed calendar CRUD."""

 import logging
+import re
 import uuid
 from datetime import datetime, date, timedelta
 from typing import Optional, List
@@ -10,9 +11,9 @@ from pydantic import BaseModel
 from sqlalchemy import or_, and_
 from dateutil.rrule import rrulestr

-from core.database import SessionLocal, CalendarCal, CalendarEvent
+from core.database import SessionLocal, CalendarCal, CalendarDeletedEvent, CalendarEvent
 from src.auth_helpers import require_user
-from src.upload_limits import read_upload_limited
+from src.upload_limits import read_upload_limited, ICS_MAX_BYTES

 logger = logging.getLogger(__name__)

@@ -100,6 +101,15 @@ def _ics_escape(text: str) -> str:
    )


+def _safe_ics_filename(name: str) -> str:
+    """Return a conservative .ics filename safe for Content-Disposition."""
+    stem = name if isinstance(name, str) else ""
+    stem = re.sub(r"[^A-Za-z0-9._-]", "_", stem).strip("._-")
+    if not stem:
+        stem = "calendar"
+    return f"{stem[:128]}.ics"
+
+
 def _resolve_base_uid(uid: str) -> str:
    """Extract the base series UID from a compound occurrence UID.

@@ -116,6 +126,54 @@ def _resolve_base_uid(uid: str) -> str:
        raise ValueError("malformed compound UID: missing base before ::")
    return base

+
+async def _push_caldav_event_after_commit(owner: str, uid: str, action: str):
+    """Best-effort CalDAV write-through. Local writes stay authoritative if
+    the remote server is unreachable; pending flags let /sync retry later."""
+    try:
+        result = {"ok": True}
+        if action == "create":
+            from src.caldav_sync import push_event_create
+            result = await push_event_create(owner, uid)
+        elif action == "update":
+            from src.caldav_sync import push_event_update
+            result = await push_event_update(owner, uid)
+        elif action == "delete":
+            from src.caldav_sync import push_event_delete
+            result = await push_event_delete(owner, uid)
+        if result and not result.get("ok") and not result.get("skipped"):
+            raise RuntimeError(result.get("error") or result)
+    except Exception as e:
+        logger.warning("CalDAV %s push failed for uid=%s: %s", action, uid, e)
+        if action in {"create", "update"}:
+            db = SessionLocal()
+            try:
+                ev = _get_or_404_event(db, uid, owner)
+                ev.caldav_sync_pending = action
+                db.commit()
+            except Exception:
+                db.rollback()
+            finally:
+                db.close()
+
+
+def _record_caldav_delete_tombstone(db, ev: CalendarEvent, owner: str) -> None:
+    if not (ev.calendar and ev.calendar.source == "caldav"):
+        return
+    tombstone = db.query(CalendarDeletedEvent).filter(
+        CalendarDeletedEvent.uid == ev.uid,
+        CalendarDeletedEvent.owner == owner,
+    ).first()
+    if not tombstone:
+        tombstone = CalendarDeletedEvent(uid=ev.uid, owner=owner)
+        db.add(tombstone)
+    tombstone.calendar_id = ev.calendar_id
+    tombstone.remote_href = ev.remote_href
+    tombstone.remote_etag = ev.remote_etag
+    tombstone.caldav_base_url = getattr(ev.calendar, "caldav_base_url", None)
+    tombstone.summary = ev.summary or ""
+    tombstone.last_error = None
+
 # ── Pydantic models ──

 class EventCreate(BaseModel):
@@ -248,6 +306,17 @@ def parse_due_for_user(s: str) -> str:
        if t is not None:
            return base.replace(hour=t[0], minute=t[1]).isoformat()

+    # Time-first: "3pm today", "11pm today", "9am tomorrow"
+    m = _re.match(r'^(.+?)\s+(today|tonight|tomorrow|tmrw|yesterday)$', lower)
+    if m:
+        time_part, word = m.group(1).strip(), m.group(2)
+        base = today
+        if word in ("tomorrow", "tmrw"): base = today + _td(days=1)
+        elif word == "yesterday":        base = today - _td(days=1)
+        t = _parse_time(time_part)
+        if t is not None:
+            return base.replace(hour=t[0], minute=t[1]).isoformat()
+
    m = _re.match(r'^in\s+(\d+)\s*(hour|hr|minute|min|day)s?\s*$', lower)
    if m:
        n = int(m.group(1)); unit = m.group(2)
@@ -399,7 +468,17 @@ def _parse_dt(s: str) -> datetime:
    # Last resort: dateutil's fuzzy parser
    try:
        from dateutil import parser as _du
-        return _du.parse(s)
+        parsed = _du.parse(s)
+        # Strip tz like every other return path above — this function's
+        # contract is naive datetimes (CalendarEvent.dtstart is naive). An
+        # offset-bearing non-ISO input (e.g. RFC-2822 "Mon, 05 Jan 2026
+        # 14:00:00 +0900") otherwise leaked tz-aware into the naive column and
+        # crashed read-back comparisons in _expand_rrule with "can't compare
+        # offset-naive and offset-aware datetimes".
+        if parsed.tzinfo is not None:
+            from datetime import timezone as _tz
+            return parsed.astimezone(_tz.utc).replace(tzinfo=None)
+        return parsed
    except Exception:
        raise ValueError(f"could not parse datetime: {s!r}")

@@ -440,6 +519,9 @@ def _event_to_dict(ev: CalendarEvent) -> dict:

 # ── Recurrence expansion ──

+_RRULE_EXPANSION_LIMIT = 1000
+
+
 def _expand_rrule(
    ev: CalendarEvent, start: datetime, end: datetime
 ) -> List[dict]:
@@ -462,6 +544,7 @@ def _expand_rrule(
        d = _event_to_dict(ev)
        d["is_recurrence"] = False
        d["series_uid"] = ev.uid
+        d["truncated"] = False
        return [d]

    # Parse the rrule, applying it to the base dtstart.
@@ -487,6 +570,7 @@ def _expand_rrule(
        d = _event_to_dict(ev)
        d["is_recurrence"] = False
        d["series_uid"] = ev.uid
+        d["truncated"] = False
        # Malformed RRULE rows are fetched by the recurring SQL branch
        # with only dtstart < end_dt — the base event may not actually
        # overlap the window. Only return if it does.
@@ -499,22 +583,26 @@ def _expand_rrule(
    # (matching non-recurring overlap semantics: dtstart < end AND
    # dtend > start).
    expand_start = start - duration
-    occurrences = rule.between(expand_start, end, inc=True)
-    if not occurrences:
-        return []
-
    results = []
+    truncated = False
    base = _event_to_dict(ev)

-    for occ_start in occurrences:
+    for occ_start in rule.xafter(expand_start, inc=True):
+        if occ_start >= end:
+            break
+
        occ_end = occ_start + duration

        # Overlap filter: occurrence must intersect [start, end).
        # This enforces exclusive-end semantics (occ_start >= end is
        # excluded) and includes multi-day crossings (occ_end > start).
-        if occ_start >= end or occ_end <= start:
+        if occ_end <= start:
            continue

+        if len(results) >= _RRULE_EXPANSION_LIMIT:
+            truncated = True
+            break
+
        # Build the compound uid: {base_uid}::{date} or ::{datetime}
        if ev.all_day:
            occ_uid = f"{ev.uid}::{occ_start.strftime('%Y-%m-%d')}"
@@ -525,6 +613,7 @@ def _expand_rrule(
        d["uid"] = occ_uid
        d["series_uid"] = ev.uid
        d["is_recurrence"] = True
+        d["truncated"] = False

        if ev.all_day:
            d["dtstart"] = occ_start.strftime("%Y-%m-%d")
@@ -537,6 +626,10 @@ def _expand_rrule(

        results.append(d)

+    if truncated:
+        for d in results:
+            d["truncated"] = True
+
    return results


@@ -545,72 +638,178 @@ def _expand_rrule(
 def setup_calendar_routes() -> APIRouter:
    router = APIRouter(prefix="/api/calendar", tags=["calendar"])

-    # CalDAV connect form (Integrations → Calendar). Storage is local
-    # SQLite; sync (src/caldav_sync.py) pulls remote events into it on
-    # calendar open and periodically via the scheduler.
+    # ── CalDAV multi-account helpers ─────────────────────────────────────────
+
+    def _get_caldav_accounts(owner: str) -> list:
+        from src.caldav_sync import _load_caldav_accounts
+        return _load_caldav_accounts(owner)
+
+    def _save_caldav_accounts(owner: str, accounts: list) -> None:
+        from routes.prefs_routes import _load_for_user, _save_for_user
+        prefs = _load_for_user(owner) or {}
+        prefs["caldav_accounts"] = accounts
+        prefs.pop("caldav", None)
+        _save_for_user(owner, prefs)
+
+    # ── CalDAV config routes (backward-compat single-account API) ────────────
+
    @router.get("/config")
    async def get_config(request: Request):
+        """Legacy single-account endpoint — returns the first configured account."""
        owner = _require_user(request)
-        from routes.prefs_routes import _load_for_user
-        cfg = (_load_for_user(owner) or {}).get("caldav", {}) or {}
-        caldav_password = cfg.get("password") or ""
-        if caldav_password:
+        accounts = _get_caldav_accounts(owner)
+        if not accounts:
+            return {"url": "", "username": "", "password": "", "has_password": False, "local": True}
+        first = accounts[0]
+        pw = first.get("password") or ""
+        has_pw = False
+        if pw:
            try:
                from src.secret_storage import decrypt
-                caldav_password = decrypt(caldav_password)
+                has_pw = bool(decrypt(pw))
            except Exception:
-                pass
-        # Surface url+username but never hand the password back to the
-        # client — saved-state UI shouldn't leak the credential.
+                has_pw = bool(pw)
        return {
-            "url": cfg.get("url", "") or "",
-            "username": cfg.get("username", "") or "",
+            "url": first.get("url", "") or "",
+            "username": first.get("username", "") or "",
            "password": "",
-            "has_password": bool(caldav_password),
-            "local": not bool(cfg.get("url")),
+            "has_password": has_pw,
+            "local": not bool(first.get("url")),
        }

    @router.post("/config")
    async def save_config(request: Request):
+        """Legacy single-account endpoint — upserts the first account."""
        owner = _require_user(request)
-        from routes.prefs_routes import _load_for_user, _save_for_user
        try:
            body = await request.json()
        except Exception:
            body = {}
-        prefs = _load_for_user(owner) or {}
-        cfg = dict(prefs.get("caldav") or {})
-        # Empty url => clear the whole entry (treat as "remove integration").
+        accounts = _get_caldav_accounts(owner)
        if not (body.get("url") or "").strip():
-            prefs.pop("caldav", None)
-            _save_for_user(owner, prefs)
+            _save_caldav_accounts(owner, [])
            return {"ok": True, "cleared": True}
        from src.caldav_sync import validate_caldav_url
        try:
-            cfg["url"] = validate_caldav_url(body.get("url", ""))
+            validated_url = validate_caldav_url(body.get("url", ""))
        except ValueError as e:
            raise HTTPException(400, str(e))
-        cfg["username"] = (body.get("username") or "").strip()
-        # Preserve the stored password when the client sends an empty
-        # one (edit form re-submitted without re-typing the password).
-        # cfg already holds the existing (already-encrypted) password from
-        # prefs, so we only touch it when a new password is supplied —
-        # re-encrypting the stored value would double-encrypt it.
+        if accounts:
+            acc = dict(accounts[0])
+        else:
+            import uuid as _uuid
+            acc = {"id": str(_uuid.uuid4()), "label": "CalDAV"}
+        acc["url"] = validated_url
+        acc["username"] = (body.get("username") or "").strip()
        if body.get("password"):
            from src.secret_storage import encrypt
-            cfg["password"] = encrypt(body["password"])
-        prefs["caldav"] = cfg
-        _save_for_user(owner, prefs)
+            acc["password"] = encrypt(body["password"])
+        new_accounts = [acc] + (accounts[1:] if len(accounts) > 1 else [])
+        _save_caldav_accounts(owner, new_accounts)
+        return {"ok": True}
+
+    # ── CalDAV multi-account CRUD ─────────────────────────────────────────────
+
+    @router.get("/config/accounts")
+    async def list_caldav_accounts(request: Request):
+        """Return all configured CalDAV accounts (passwords never returned)."""
+        owner = _require_user(request)
+        accounts = _get_caldav_accounts(owner)
+        safe = []
+        for acc in accounts:
+            pw = acc.get("password") or ""
+            has_pw = False
+            if pw:
+                try:
+                    from src.secret_storage import decrypt
+                    has_pw = bool(decrypt(pw))
+                except Exception:
+                    has_pw = bool(pw)
+            safe.append({
+                "id": acc.get("id", ""),
+                "label": acc.get("label", "") or acc.get("url", ""),
+                "url": acc.get("url", "") or "",
+                "username": acc.get("username", "") or "",
+                "has_password": has_pw,
+            })
+        return {"accounts": safe}
+
+    @router.post("/config/accounts")
+    async def add_caldav_account(request: Request):
+        """Add a new CalDAV account."""
+        import uuid as _uuid
+        owner = _require_user(request)
+        try:
+            body = await request.json()
+        except Exception:
+            body = {}
+        from src.caldav_sync import validate_caldav_url
+        try:
+            url = validate_caldav_url(body.get("url", ""))
+        except ValueError as e:
+            raise HTTPException(400, str(e))
+        if not body.get("password"):
+            raise HTTPException(400, "Password is required")
+        from src.secret_storage import encrypt
+        new_acc = {
+            "id": str(_uuid.uuid4()),
+            "label": (body.get("label") or "").strip() or "CalDAV",
+            "url": url,
+            "username": (body.get("username") or "").strip(),
+            "password": encrypt(body["password"]),
+        }
+        accounts = _get_caldav_accounts(owner)
+        accounts.append(new_acc)
+        _save_caldav_accounts(owner, accounts)
+        return {"ok": True, "id": new_acc["id"]}
+
+    @router.put("/config/accounts/{account_id}")
+    async def update_caldav_account(account_id: str, request: Request):
+        """Update an existing CalDAV account by id."""
+        owner = _require_user(request)
+        try:
+            body = await request.json()
+        except Exception:
+            body = {}
+        accounts = _get_caldav_accounts(owner)
+        idx = next((i for i, a in enumerate(accounts) if a.get("id") == account_id), None)
+        if idx is None:
+            raise HTTPException(404, "Account not found")
+        acc = dict(accounts[idx])
+        if body.get("url"):
+            from src.caldav_sync import validate_caldav_url
+            try:
+                acc["url"] = validate_caldav_url(body["url"])
+            except ValueError as e:
+                raise HTTPException(400, str(e))
+        if body.get("label") is not None:
+            acc["label"] = (body.get("label") or "").strip() or "CalDAV"
+        if body.get("username") is not None:
+            acc["username"] = (body.get("username") or "").strip()
+        if body.get("password"):
+            from src.secret_storage import encrypt
+            acc["password"] = encrypt(body["password"])
+        accounts[idx] = acc
+        _save_caldav_accounts(owner, accounts)
+        return {"ok": True}
+
+    @router.delete("/config/accounts/{account_id}")
+    async def delete_caldav_account(account_id: str, request: Request):
+        """Remove a CalDAV account by id."""
+        owner = _require_user(request)
+        accounts = _get_caldav_accounts(owner)
+        new_accounts = [a for a in accounts if a.get("id") != account_id]
+        if len(new_accounts) == len(accounts):
+            raise HTTPException(404, "Account not found")
+        _save_caldav_accounts(owner, new_accounts)
        return {"ok": True}

    @router.post("/test")
    async def test_connection(request: Request):
-        """Actually probe the configured CalDAV server with a PROPFIND
-        request (the same handshake every CalDAV client uses). Accepts
-        an optional {url, username, password} body so the user can test
-        a configuration BEFORE saving it; falls back to the stored
-        creds otherwise. Returns {ok, error?} with a useful message on
-        failure (status code, auth issue, network error)."""
+        """Probe a CalDAV server with a PROPFIND. Accepts an optional body:
+        {url, username, password} to test before saving, or {account_id} to
+        test an already-saved account. Falls back to the first saved account
+        when nothing is provided."""
        owner = _require_user(request)
        try:
            body = await request.json()
@@ -620,19 +819,24 @@ def setup_calendar_routes() -> APIRouter:
        user = (body.get("username") or "").strip()
        pw = body.get("password") or ""
        if not (url and user and pw):
-            # Fall back to saved settings for this user.
-            from routes.prefs_routes import _load_for_user
-            cfg = (_load_for_user(owner) or {}).get("caldav", {}) or {}
-            url = url or (cfg.get("url") or "")
-            user = user or (cfg.get("username") or "")
-            if not pw:
-                pw = cfg.get("password") or ""
-                if pw:
-                    try:
-                        from src.secret_storage import decrypt
-                        pw = decrypt(pw)
-                    except Exception:
-                        pass
+            # Look up a saved account: by id if supplied, else first account.
+            accounts = _get_caldav_accounts(owner)
+            acc = None
+            if body.get("account_id"):
+                acc = next((a for a in accounts if a.get("id") == body["account_id"]), None)
+            if acc is None and accounts:
+                acc = accounts[0]
+            if acc:
+                url = url or (acc.get("url") or "")
+                user = user or (acc.get("username") or "")
+                if not pw:
+                    pw = acc.get("password") or ""
+                    if pw:
+                        try:
+                            from src.secret_storage import decrypt
+                            pw = decrypt(pw)
+                        except Exception:
+                            pass
        if not (url and user and pw):
            return {"ok": False, "error": "Missing URL, username, or password"}
        from src.caldav_sync import validate_caldav_url
@@ -687,13 +891,34 @@ def setup_calendar_routes() -> APIRouter:
            return {"ok": False, "error": str(e)[:200]}

    @router.post("/sync")
-    async def sync_caldav_endpoint(request: Request):
-        """Pull events from the configured CalDAV server into local DB.
+    async def sync_caldav_endpoint(request: Request, direction: str = "pull"):
+        """Sync events with the configured CalDAV server.
        Returns counts + any per-calendar errors. Called by the frontend
        on calendar open and by the periodic scheduler loop."""
        owner = _require_user(request)
-        from src.caldav_sync import sync_caldav
-        return await sync_caldav(owner)
+        from src.caldav_sync import sync_caldav_direction
+        return await sync_caldav_direction(owner, direction)
+
+
+    @router.delete("/calendars/{cal_id}")
+    async def delete_calendar(request: Request, cal_id: str):
+        owner = _require_user(request)
+        db = SessionLocal()
+        try:
+            cal = _get_or_404_calendar(db, cal_id, owner)
+            db.query(CalendarEvent).filter(CalendarEvent.calendar_id == cal_id).delete()
+            db.delete(cal)
+            db.commit()
+            return {"ok": True}
+        except HTTPException:
+            raise
+        except Exception as e:
+            db.rollback()
+            logger.error("Failed to delete calendar %s: %s", cal_id, e)
+            raise HTTPException(500, "Failed to delete calendar")
+        finally:
+            db.close()
+

    @router.get("/calendars")
    async def list_calendars(request: Request):
@@ -703,7 +928,7 @@ def setup_calendar_routes() -> APIRouter:
            _ensure_default_calendar(db, owner)
            cals = db.query(CalendarCal).filter(CalendarCal.owner == owner).all()
            return {"calendars": [
-                {"name": c.name, "href": c.id, "color": c.color}
+                {"name": c.name, "href": c.id, "color": c.color, "source": c.source}
                for c in cals
            ]}
        except HTTPException:
@@ -766,8 +991,12 @@ def setup_calendar_routes() -> APIRouter:
                expanded.extend(_expand_rrule(e, start_dt, end_dt))

            # Sort by occurrence start time for consistent frontend ordering.
+            truncated = any(e.get("truncated") for e in expanded)
            expanded.sort(key=lambda d: d["dtstart"])
-            return {"events": expanded}
+            response: dict = {"events": expanded}
+            if truncated:
+                response["truncated"] = True
+            return response
        except HTTPException:
            raise
        except Exception as e:
@@ -821,19 +1050,12 @@ def setup_calendar_routes() -> APIRouter:
                is_utc=_is_utc and not data.all_day,
                rrule=data.rrule or "",
                color=data.color or None,
+                caldav_sync_pending="create" if cal.source == "caldav" else None,
            )
            db.add(ev)
            db.commit()
            if cal.source == "caldav":
-                # Push the new event to the remote so it appears on the user's
-                # other devices — the sync is otherwise pull-only (#800).
-                from src.caldav_writeback import writeback_event
-                await writeback_event(owner, cal.source, cal.id, {
-                    "uid": uid, "summary": data.summary, "description": data.description,
-                    "location": data.location, "dtstart": dtstart, "dtend": dtend,
-                    "all_day": data.all_day, "is_utc": _is_utc and not data.all_day,
-                    "rrule": data.rrule or "",
-                })
+                await _push_caldav_event_after_commit(owner, uid, "create")
            return {"ok": True, "uid": uid}
        except HTTPException:
            raise
@@ -879,15 +1101,12 @@ def setup_calendar_routes() -> APIRouter:
                ev.rrule = data.rrule
            if data.color is not None:
                ev.color = data.color if data.color else None
+            is_caldav = ev.calendar and ev.calendar.source == "caldav"
+            if is_caldav:
+                ev.caldav_sync_pending = "update"
            db.commit()
-            cal = db.query(CalendarCal).filter(CalendarCal.id == ev.calendar_id).first()
-            if cal and cal.source == "caldav":
-                from src.caldav_writeback import writeback_event
-                await writeback_event(owner, cal.source, cal.id, {
-                    "uid": ev.uid, "summary": ev.summary, "description": ev.description,
-                    "location": ev.location, "dtstart": ev.dtstart, "dtend": ev.dtend,
-                    "all_day": ev.all_day, "is_utc": ev.is_utc, "rrule": ev.rrule or "",
-                })
+            if is_caldav:
+                await _push_caldav_event_after_commit(owner, base_uid, "update")
            return {"ok": True}
        except HTTPException:
            raise
@@ -908,15 +1127,13 @@ def setup_calendar_routes() -> APIRouter:
        db = SessionLocal()
        try:
            ev = _get_or_404_event(db, base_uid, owner)
-            # Capture what the remote push needs BEFORE the row is gone.
-            _cal = db.query(CalendarCal).filter(CalendarCal.id == ev.calendar_id).first()
-            _is_caldav = bool(_cal and _cal.source == "caldav")
-            _cal_id, _ev_uid = ev.calendar_id, ev.uid
+            is_caldav = ev.calendar and ev.calendar.source == "caldav"
+            if is_caldav:
+                _record_caldav_delete_tombstone(db, ev, owner)
            db.delete(ev)
            db.commit()
-            if _is_caldav:
-                from src.caldav_writeback import writeback_event
-                await writeback_event(owner, "caldav", _cal_id, {"uid": _ev_uid}, delete=True)
+            if is_caldav:
+                await _push_caldav_event_after_commit(owner, base_uid, "delete")
            return {"ok": True}
        except HTTPException:
            raise
@@ -970,27 +1187,10 @@ def setup_calendar_routes() -> APIRouter:
        finally:
            db.close()

-    @router.delete("/calendars/{cal_id}")
-    async def delete_calendar(request: Request, cal_id: str):
-        owner = _require_user(request)
-        db = SessionLocal()
-        try:
-            cal = _get_or_404_calendar(db, cal_id, owner)
-            db.query(CalendarEvent).filter(CalendarEvent.calendar_id == cal_id).delete()
-            db.delete(cal)
-            db.commit()
-            return {"ok": True}
-        except HTTPException:
-            raise
-        except Exception as e:
-            db.rollback()
-            return {"error": str(e)}
-        finally:
-            db.close()

-    # 10 MB hard cap on ICS upload. Loading the whole file into memory is
-    # unavoidable with python-icalendar, so an unbounded upload would OOM.
-    _ICS_MAX_BYTES = 10 * 1024 * 1024
+    # Hard cap on ICS upload (ICS_MAX_BYTES, default 10 MB). Loading the whole
+    # file into memory is unavoidable with python-icalendar, so an unbounded
+    # upload would OOM.

    @router.post("/import")
    async def import_ics(request: Request, file: UploadFile = File(...), calendar_name: str = ""):
@@ -1000,7 +1200,7 @@ def setup_calendar_routes() -> APIRouter:
        owner = _require_user(request)
        db = SessionLocal()
        try:
-            content = await read_upload_limited(file, _ICS_MAX_BYTES, "ICS file")
+            content = await read_upload_limited(file, ICS_MAX_BYTES, "ICS file")
            try:
                cal_data = iCal.from_ical(content)
            except Exception as e:
@@ -1168,11 +1368,14 @@ def setup_calendar_routes() -> APIRouter:
            lines.append("END:VCALENDAR")

            ics_data = "\r\n".join(lines)
-            safe_name = cal.name.replace(" ", "_").replace("/", "_")
+            download_name = _safe_ics_filename(cal.name)
            return Response(
                content=ics_data,
                media_type="text/calendar",
-                headers={"Content-Disposition": f'attachment; filename="{safe_name}.ics"'},
+                headers={
+                    "Content-Disposition": f'attachment; filename="{download_name}"',
+                    "X-Content-Type-Options": "nosniff",
+                },
            )
        except HTTPException:
            raise
@@ -1194,7 +1397,7 @@ def setup_calendar_routes() -> APIRouter:
        "tomorrow", "next Tuesday", "in 30 minutes" resolve correctly.
        Uses the "utility" endpoint (small / fast model) to keep latency low.
        """
-        _require_user(request)
+        owner = _require_user(request)
        from src.endpoint_resolver import resolve_endpoint
        from src.llm_core import llm_call_async
        from src.text_helpers import strip_think
@@ -1220,9 +1423,9 @@ def setup_calendar_routes() -> APIRouter:
        if tz_hint:
            set_user_tz_name(tz_hint)

-        url, model, headers = resolve_endpoint("utility")
+        url, model, headers = resolve_endpoint("utility", owner=owner or None)
        if not url:
-            url, model, headers = resolve_endpoint("default")
+            url, model, headers = resolve_endpoint("default", owner=owner or None)
        if not url or not model:
            return {"ok": False, "error": "No LLM endpoint configured"}

@@ -75,7 +75,7 @@ def _enforce_chat_privileges(request, sess) -> None:
    allowlist, or HTTPException(429) if the user has hit their daily message
    cap. No-op for unauthenticated callers or when auth_manager is absent
    (single-user mode). Admins receive ADMIN_PRIVILEGES from get_privileges,
-    which means empty allowed_models / zero cap → no-op for them.
+    which means unrestricted allowed_models / zero cap -> no-op for them.
    """
    try:
        user = get_current_user(request)
@@ -88,8 +88,18 @@ def _enforce_chat_privileges(request, sess) -> None:
        return

    privs = auth_manager.get_privileges(user) or {}
-    allowed = privs.get("allowed_models") or []
-    if allowed and sess.model and sess.model not in allowed:
+
+    # Explicit "block everything" sentinel takes precedence over the
+    # allowlist — it's the only way to distinguish "user clicked [None]"
+    # (block all) from "user clicked [All]" (no restriction), since both
+    # otherwise produce an empty `allowed_models` list.
+    if privs.get("block_all_models"):
+        raise HTTPException(403, f"Your account is not allowed to use model '{sess.model}'.")
+
+    allowed_raw = privs.get("allowed_models")
+    allowed = allowed_raw if isinstance(allowed_raw, list) else []
+    restricted = bool(privs.get("allowed_models_restricted")) or bool(allowed)
+    if restricted and sess.model and sess.model not in allowed:
        raise HTTPException(403, f"Your account is not allowed to use model '{sess.model}'.")

    cap = int(privs.get("max_messages_per_day") or 0)
@@ -149,9 +159,17 @@ async def auto_name_session(session_manager, sess):
            return

        owner = getattr(sess, "owner", None)
-        t_url, t_model, t_headers = resolve_task_endpoint(
-            sess.endpoint_url, sess.model, sess.headers, owner=owner,
-        )
+        t_url, t_model, t_headers = resolve_task_endpoint(owner=owner)
+        if not t_model:
+            # If no task/utility model is configured at all, fall back to
+            # the session's own model so auto-naming still works even on
+            # minimal setups.
+            from src.endpoint_resolver import resolve_endpoint
+            _fallback = resolve_endpoint("default", owner=owner)
+            if _fallback and _fallback[1]:
+                t_url, t_model, t_headers = _fallback
+            else:
+                t_url, t_model, t_headers = sess.endpoint_url, sess.model, sess.headers
        if not t_model:
            logger.debug("[auto-name] No model provided, skipping")
            return
@@ -194,14 +212,26 @@ def try_fallback_endpoint(sess, session_id: str) -> dict | None:
    Returns {"model": ..., "endpoint_url": ..., "endpoint_name": ...} or None.
    """
    import requests as _req
-    from src.endpoint_resolver import build_chat_url, build_headers, build_models_url, normalize_base
+    from src.endpoint_resolver import (
+        build_chat_url,
+        build_headers,
+        build_models_url,
+        normalize_base,
+        resolve_endpoint_runtime,
+    )
+    from src.chatgpt_subscription import is_chatgpt_subscription_base

    current_url = sess.endpoint_url or ""
+    owner = getattr(sess, "owner", None)
    db = SessionLocal()
    try:
-        endpoints = db.query(ModelEndpoint).filter(
+        q = db.query(ModelEndpoint).filter(
            ModelEndpoint.is_enabled == True
-        ).all()
+        )
+        if owner:
+            from src.auth_helpers import owner_filter
+            q = owner_filter(q, ModelEndpoint, owner)
+        endpoints = q.all()
    finally:
        db.close()

@@ -210,26 +240,33 @@ def try_fallback_endpoint(sess, session_id: str) -> dict | None:
        # Skip current endpoint
        if current_url and base in current_url:
            continue
-        # Quick ping
-        ping_url = build_models_url(base)
-        headers = build_headers(ep.api_key, base)
        try:
-            r = _req.get(ping_url, headers=headers, timeout=5)
-            r.raise_for_status()
-            data = r.json()
-            models = [m.get("id") for m in (data.get("data") or []) if m.get("id")]
-            if not models:
-                models = [
-                    m.get("name") or m.get("model")
-                    for m in (data.get("models") or [])
-                    if m.get("name") or m.get("model")
-                ]
+            base, api_key = resolve_endpoint_runtime(ep, owner=owner)
+        except Exception:
+            continue
+        ping_url = build_models_url(base)
+        headers = build_headers(api_key, base)
+        try:
+            if ping_url:
+                r = _req.get(ping_url, headers=headers, timeout=5)
+                r.raise_for_status()
+                data = r.json()
+                models = [m.get("id") for m in (data.get("data") or []) if m.get("id")]
+                if not models:
+                    models = [
+                        m.get("name") or m.get("model")
+                        for m in (data.get("models") or [])
+                        if m.get("name") or m.get("model")
+                    ]
+            else:
+                models = json.loads(ep.cached_models or "[]")
            if not models:
                continue
            # Found a working endpoint — update session
            new_model = models[0]
            chat_url = build_chat_url(base)
-            new_headers = build_headers(ep.api_key, base)
+            new_headers = build_headers(api_key, base)
+            persisted_headers = {} if is_chatgpt_subscription_base(base) else new_headers

            sess.model = new_model
            sess.endpoint_url = chat_url
@@ -241,7 +278,7 @@ def try_fallback_endpoint(sess, session_id: str) -> dict | None:
                _db.query(DBSession).filter(DBSession.id == session_id).update({
                    "model": new_model,
                    "endpoint_url": chat_url,
-                    "headers": json.dumps(new_headers),
+                    "headers": persisted_headers,
                })
                _db.commit()
            finally:
@@ -275,11 +312,16 @@ def extract_preset(chat_handler, preset_id) -> PresetInfo:
 async def preprocess(
    chat_handler, message, att_ids, sess,
    auto_opened_docs: Optional[list] = None,
+    allow_tool_preprocessing: bool = True,
 ) -> PreprocessedMessage:
    """Run chat_handler.preprocess_message and wrap the result."""
    enhanced, user_content, text_ctx, yt_transcripts, att_meta = (
        await chat_handler.preprocess_message(
-            message, att_ids, sess, auto_opened_docs=auto_opened_docs
+            message,
+            att_ids,
+            sess,
+            auto_opened_docs=auto_opened_docs,
+            allow_tool_preprocessing=allow_tool_preprocessing,
        )
    )
    return PreprocessedMessage(
@@ -329,16 +371,26 @@ def _session_url_matches_endpoint(session_url: str, endpoint_base: str) -> bool:
        return False


+def _has_auth_keys(headers) -> bool:
+    """True if a headers dict carries an Authorization/x-api-key entry."""
+    return isinstance(headers, dict) and any(
+        k.lower() in ('authorization', 'x-api-key') for k in headers
+    )
+
+
 def resolve_session_auth(sess, session_id: str, owner: Optional[str] = None):
    """Ensure session has auth headers — resolve from endpoint DB if missing."""
-    has_auth = sess.headers and isinstance(sess.headers, dict) and any(
-        k.lower() in ('authorization', 'x-api-key') for k in sess.headers
-    )
-    if has_auth:
+    try:
+        from src.chatgpt_subscription import is_chatgpt_subscription_base
+        is_chatgpt_subscription = is_chatgpt_subscription_base(getattr(sess, "endpoint_url", "") or "")
+    except Exception:
+        is_chatgpt_subscription = False
+    has_auth = _has_auth_keys(sess.headers)
+    if has_auth and not is_chatgpt_subscription:
        return

    try:
-        from src.endpoint_resolver import build_headers, normalize_base
+        from src.endpoint_resolver import build_headers, resolve_endpoint_runtime
        db = SessionLocal()
        try:
            target_url = getattr(sess, "endpoint_url", "") or ""
@@ -354,10 +406,30 @@ def resolve_session_auth(sess, session_id: str, owner: Optional[str] = None):
            for ep in q.all():
                if not _session_url_matches_endpoint(target_url, ep.base_url or ""):
                    continue
-                if not ep.api_key:
+                try:
+                    base, api_key = resolve_endpoint_runtime(ep, owner=owner)
+                except Exception as e:
+                    logger.warning("Failed to resolve provider auth for session %s: %s", session_id, e)
+                    return
+                if not api_key:
+                    # No usable key (e.g. ChatGPT Subscription needs re-auth).
+                    return
+                sess.headers = build_headers(api_key, base)
+                if is_chatgpt_subscription:
+                    # The bearer is short-lived and re-resolved per request, so it
+                    # stays request-local and is never written to the plaintext
+                    # sessions.headers column. Proactively strip any bearer an
+                    # older code path may have persisted so it does not linger.
+                    stale_q = db.query(DBSession).filter(DBSession.id == session_id)
+                    if owner:
+                        stale_q = stale_q.filter(DBSession.owner == owner)
+                    stored = stale_q.first()
+                    if stored is not None and _has_auth_keys(stored.headers):
+                        stale_q.update({"headers": {}})
+                        db.commit()
+                        logger.info(f"Cleared persisted ChatGPT Subscription bearer from session {session_id}")
+                    logger.debug(f"Resolved request-local ChatGPT Subscription auth for session {session_id}")
                    return
-                base = normalize_base(ep.base_url or "")
-                sess.headers = build_headers(ep.api_key, base)
                update_q = db.query(DBSession).filter(DBSession.id == session_id)
                if owner:
                    update_q = update_q.filter(DBSession.owner == owner)
@@ -401,7 +473,12 @@ def _normalize_model_id_from_cache(sess) -> Optional[str]:

    db = SessionLocal()
    try:
-        endpoints = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True).all()
+        q = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True)
+        owner = getattr(sess, "owner", None)
+        if owner:
+            from src.auth_helpers import owner_filter
+            q = owner_filter(q, ModelEndpoint, owner)
+        endpoints = q.all()
        for ep in endpoints:
            try:
                if normalize_base(getattr(ep, "base_url", "") or "") != session_base:
@@ -428,6 +505,29 @@ def _normalize_model_id_from_cache(sess) -> Optional[str]:
    return None


+def _session_is_research_spinoff(sess) -> bool:
+    """True if this session was created via research "Discuss" spin-off.
+
+    Detected by the primer system message the spin-off endpoint seeds into
+    history (metadata ``research_spinoff_from``). Such sessions are grounded
+    on the seeded report, so global memory + personal-doc RAG injection is
+    suppressed for them (the report is the sole knowledge base). Handles both
+    ChatMessage objects and plain dicts.
+    """
+    for m in getattr(sess, "history", []) or []:
+        role = getattr(m, "role", None)
+        if role is None and isinstance(m, dict):
+            role = m.get("role")
+        if role != "system":
+            continue
+        md = getattr(m, "metadata", None)
+        if md is None and isinstance(m, dict):
+            md = m.get("metadata")
+        if (md or {}).get("research_spinoff_from"):
+            return True
+    return False
+
+
 async def build_chat_context(
    sess,
    request,
@@ -448,6 +548,7 @@ async def build_chat_context(
    webhook_manager=None,
    use_enhanced_message: bool = False,
    agent_mode: bool = False,
+    allow_tool_preprocessing: bool = True,
 ) -> ChatContext:
    """Build the full context (preface + messages) for an LLM call.

@@ -465,6 +566,7 @@ async def build_chat_context(
    preprocessed = await preprocess(
        chat_handler, message, att_ids or [], sess,
        auto_opened_docs=auto_opened_docs,
+        allow_tool_preprocessing=allow_tool_preprocessing,
    )

    # Add user message to history
@@ -483,18 +585,29 @@ async def build_chat_context(
    # Skills injection respects its own enable toggle (mirrors memory_enabled).
    # When off, the "Available skills" index is not added to the prompt.
    skills_enabled = not incognito and uprefs.get("skills_enabled", True)
+    if not allow_tool_preprocessing:
+        mem_enabled = False
+        skills_enabled = False
    logger.debug(
        "Memory enabled=%s for user=%s (incognito=%s, no_memory=%s, pref=%s)",
        mem_enabled, user, incognito, no_memory, uprefs.get("memory_enabled", "NOT_SET"),
    )

+    # Research-spinoff ("Discuss") sessions are grounded on the seeded report:
+    # the primer system message IS the knowledge base. Injecting global memory
+    # or personal-doc RAG on every turn pulls in keyword-matched but off-topic
+    # facts ("wrong data") and competes with the report, so suppress both here.
+    is_research_spinoff = _session_is_research_spinoff(sess)
+    if is_research_spinoff:
+        mem_enabled = False
+
    # Use RAG?
    use_rag_val = (str(use_rag).lower() != "false") if use_rag is not None else True
-    if incognito:
+    if incognito or not allow_tool_preprocessing or is_research_spinoff:
        use_rag_val = False

    # If pre-fetched search context was provided (compare mode), skip live web search
-    skip_web = bool(search_context)
+    skip_web = bool(search_context) or not allow_tool_preprocessing

    # Build context preface
    # The stream path uses enhanced_message (with CoT/preprocessing applied),
@@ -513,7 +626,7 @@ async def build_chat_context(
        incognito=incognito,
        use_skills=skills_enabled,
    )
-    if use_rag is not None:
+    if use_rag is not None or is_research_spinoff:
        _preface_kwargs["use_rag"] = use_rag_val
    preface, rag_sources, web_sources = chat_processor.build_context_preface(**_preface_kwargs)

@@ -521,7 +634,7 @@ async def build_chat_context(
    used_memories = getattr(chat_processor, '_last_used_memories', [])

    # Inject pre-fetched search context (compare mode)
-    if search_context:
+    if search_context and allow_tool_preprocessing:
        preface.append(untrusted_context_message("prefetched search context", search_context))

    # YouTube transcripts
@@ -530,16 +643,40 @@ async def build_chat_context(

    # Normalize model ID. Prefer cached endpoint models so group chat does not
    # re-hit slow local /models endpoints on every participant turn.
-    norm = _normalize_model_id_from_cache(sess) or normalize_model_id(sess.endpoint_url, sess.model)
+    norm = _normalize_model_id_from_cache(sess) or normalize_model_id(
+        sess.endpoint_url,
+        sess.model,
+        owner=getattr(sess, "owner", None),
+    )
    if norm:
        sess.model = norm

    # Build messages
    messages = preface + sess.get_context_messages()

+    # Current date/time — injected as a standalone *user*-role context message
+    # placed immediately before the latest user turn, NOT folded into the
+    # system prompt. Its text changes every minute, and local OpenAI-compatible
+    # backends (llama.cpp / LM Studio) key their KV-cache prefix off the
+    # system message byte-for-byte; mixing ever-changing timestamp text into
+    # it would invalidate the cached prefix on every request (issue #2927).
+    # Placing it at the tail also keeps it out of the stable
+    # preface+history prefix, so that prefix stays byte-identical turn over
+    # turn (modulo the genuinely new history entries) and the cache survives.
+    if not agent_mode:
+        try:
+            from src.user_time import current_datetime_context_message
+            _dt_msg = current_datetime_context_message()
+            if messages and messages[-1].get("role") == "user":
+                messages.insert(len(messages) - 1, _dt_msg)
+            else:
+                messages.append(_dt_msg)
+        except Exception:
+            logger.debug("Failed to add current date/time context", exc_info=True)
+
    # Auto-compact
    messages, context_length, was_compacted = await maybe_compact(
-        sess, sess.endpoint_url, sess.model, messages, sess.headers,
+        sess, sess.endpoint_url, sess.model, messages, sess.headers, owner=user,
    )
    messages = trim_for_context(messages, context_length)

@@ -772,7 +909,19 @@ def save_assistant_response(
 ):
    """Add assistant response to session history. In incognito mode, keeps in-memory context but skips DB persistence."""
    md = dict(last_metrics) if last_metrics else {}
-    md["model"] = sess.model
+    def _model_value(value) -> str:
+        if value is None:
+            return ""
+        if not isinstance(value, str):
+            value = str(value)
+        return value.strip()
+
+    requested_model = _model_value(md.get("requested_model") or md.get("selected_model") or getattr(sess, "model", ""))
+    actual_model = _model_value(md.get("model") or md.get("actual_model") or requested_model)
+    if requested_model:
+        md["requested_model"] = requested_model
+    if actual_model:
+        md["model"] = actual_model
    if character_name:
        md["character_name"] = character_name
    if web_sources:
@@ -821,6 +970,54 @@ def save_assistant_response(
    return None


+def _is_session_stream_active(session_id: str) -> bool:
+    """Best-effort check for "is a chat completion currently streaming for
+    this session?" — used to keep background extraction from overlapping a
+    main completion and competing for the local backend's processing slots
+    (issue #2927). Lazily imports the route module's live registry to avoid
+    a circular import (chat_routes imports this module at load time)."""
+    try:
+        from routes import chat_routes as _cr
+        return session_id in getattr(_cr, "_active_streams", {})
+    except Exception:
+        return False
+
+
+async def _run_extraction_jobs_sequentially(session_id: str, jobs: list, max_wait_s: float = 120.0):
+    """Run queued background-extraction coroutines one at a time, only once
+    no chat completion is actively streaming for this session.
+
+    As diagnosed in issue #2927, firing memory/skill extraction concurrently
+    with the main chat completion (or with each other) makes them compete for
+    the local backend's limited processing slots, evicting the main
+    conversation's cached KV-cache checkpoint and forcing a full prompt
+    re-evaluation on the next turn. Waiting for the stream to go idle and then
+    running the jobs strictly in sequence keeps at most one "side" request in
+    flight against the backend at any time, and never alongside the user's
+    own conversation.
+    """
+    # Wait for the triggering turn's own stream to finish winding down (it
+    # almost always already has by the time this task gets scheduled — this
+    # is a small safety margin, not the primary mechanism).
+    waited = 0.0
+    poll = 0.25
+    while _is_session_stream_active(session_id) and waited < max_wait_s:
+        await asyncio.sleep(poll)
+        waited += poll
+
+    for name, job in jobs:
+        # Re-check before each job: a fast follow-up message from the user
+        # may have started a new stream for this session while we waited.
+        waited = 0.0
+        while _is_session_stream_active(session_id) and waited < max_wait_s:
+            await asyncio.sleep(poll)
+            waited += poll
+        try:
+            await job
+        except Exception:
+            logger.warning("[bg-extract] %s extraction job failed for session %s", name, session_id, exc_info=True)
+
+
 def run_post_response_tasks(
    sess,
    session_manager,
@@ -841,21 +1038,37 @@ def run_post_response_tasks(
    skills_manager=None,
    owner: str = None,
    extract_skills: bool = True,
+    allow_background_extraction: bool = True,
 ):
-    """Fire background tasks after a completed response: memory extraction, webhooks, auto-name, skill extraction."""
+    """Fire background tasks after a completed response: memory extraction, webhooks, auto-name, skill extraction.
+
+    Memory/skill extraction are queued to run *sequentially*, after the main
+    completion stream for this session has fully wound down — never
+    concurrently with it or with each other. As diagnosed in issue #2927,
+    firing these "side" LLM calls in parallel with the main chat completion
+    makes them compete for the local backend's limited processing slots
+    (llama.cpp defaults to 4), evicting the main conversation's cached
+    checkpoint and forcing a full prompt re-evaluation on the next turn. By
+    the time this function runs the main response is already saved, but the
+    extraction calls themselves are still async — queuing them through
+    ``_queue_background_extraction`` keeps them from overlapping the *next*
+    turn's request too.
+    """
+    _extraction_jobs: list = []
+
    # Memory extraction — only every 4th message pair to avoid excess LLM calls
    _msg_count = len(sess.history) if hasattr(sess, 'history') else 0
    _should_extract = (_msg_count >= 4) and (_msg_count % 4 == 0)
-    if not incognito and not compare_mode and _should_extract and uprefs.get("auto_memory", True):
+    if allow_background_extraction and not incognito and not compare_mode and _should_extract and uprefs.get("auto_memory", True):
        from services.memory.memory_extractor import extract_and_store
        from src.task_endpoint import resolve_task_endpoint
        t_url, t_model, t_headers = resolve_task_endpoint(
            sess.endpoint_url, sess.model, sess.headers, owner=owner,
        )
-        asyncio.create_task(extract_and_store(
+        _extraction_jobs.append(("memory", extract_and_store(
            sess, memory_manager, memory_vector,
            t_url, t_model, t_headers,
-        ))
+        )))

    # Skill extraction from complex agent runs. Only when the user actually
    # chose agent mode — not a chat we auto-escalated for a notes/calendar
@@ -873,6 +1086,7 @@ def run_post_response_tasks(
    )
    if (
        extract_skills
+        and allow_background_extraction
        and auto_skills_enabled
        and not incognito
        and not compare_mode
@@ -890,12 +1104,15 @@ def run_post_response_tasks(
                sess.endpoint_url, sess.model, sess.headers, owner=owner,
            )
            logger.debug("[skill-extract] dispatching extractor (model=%s)", s_model)
-            asyncio.create_task(maybe_extract_skill(
+            _extraction_jobs.append(("skill", maybe_extract_skill(
                sess, skills_manager,
                s_url, s_model, s_headers,
                agent_rounds, agent_tool_calls,
                owner=owner,
-            ))
+            )))
+
+    if _extraction_jobs:
+        asyncio.create_task(_run_extraction_jobs_sequentially(session_id, _extraction_jobs))

    # Token accumulation
    if last_metrics:
@@ -6,7 +6,7 @@ import os
 import time
 import logging
 from datetime import datetime
-from typing import Dict, Any, AsyncGenerator, List
+from typing import Dict, Any, AsyncGenerator, List, Optional

 from fastapi import APIRouter, Request, HTTPException, Form, Query
 from fastapi.responses import StreamingResponse
@@ -20,6 +20,7 @@ from src import agent_runs
 from src.model_context import estimate_tokens
 from src.chat_helpers import coerce_message_and_session
 from src.endpoint_resolver import normalize_base as _normalize_base, build_chat_url
+from src.session_search import search_session_messages
 from src.prompt_security import untrusted_context_message
 from core.exceptions import SessionNotFoundError
 from src.auth_helpers import get_current_user
@@ -39,6 +40,7 @@ from routes.chat_helpers import (
    _enforce_chat_privileges,
 )
 from src.action_intents import classify_tool_intent as _classify_tool_intent
+from src.tool_policy import build_effective_tool_policy

 logger = logging.getLogger(__name__)

@@ -60,6 +62,33 @@ def _stream_set(session_id: str, **fields) -> None:
    rec.update(fields)


+def _resolve_request_workspace(request, raw_value) -> tuple:
+    """Resolve the posted workspace for this request: (workspace, rejected).
+
+    Privilege is checked BEFORE the path ever touches the filesystem. Only
+    admin/single-user callers can use the workspace-backed file/shell tools,
+    so only they get vet_workspace() and the workspace_rejected signal. For
+    any other caller the submitted value is dropped uniformly, with no vetting
+    and no event: otherwise the presence/absence of workspace_rejected would
+    let a non-admin chat caller probe which host paths exist.
+
+    vet_workspace rejects non-directories, sensitive roots (.ssh, .gnupg,
+    ...), and filesystem roots; on rejection there is no confinement and the
+    default tool-path allowlist applies. The rejected value is surfaced so the
+    stream can tell an admin client (which believes a workspace is active)
+    that it was dropped.
+    """
+    requested = (raw_value or "").strip()
+    if not requested:
+        return "", ""
+    from src.tool_security import owner_is_admin_or_single_user
+    if not owner_is_admin_or_single_user(get_current_user(request)):
+        return "", ""
+    from src.tool_execution import vet_workspace
+    workspace = vet_workspace(requested) or ""
+    return workspace, (requested if not workspace else "")
+
+
 def _session_url_matches_endpoint(session_url: str, endpoint_base: str) -> bool:
    if not session_url or not endpoint_base:
        return False
@@ -97,7 +126,8 @@ def _clear_orphaned_session_endpoint(sess, owner: str | None = None) -> bool:
        sess.model = ""
        sess.headers = {}
        return True
-    except Exception:
+    except Exception as e:
+        logger.warning("Failed to clear orphaned session endpoint", exc_info=e)
        db.rollback()
        return False
    finally:
@@ -115,7 +145,8 @@ def _endpoint_cache_contains_model(endpoint, model: str) -> bool:
        return True
    try:
        models = json.loads(raw) if isinstance(raw, str) else raw
-    except Exception:
+    except Exception as e:
+        logger.warning("Failed to parse cached models list, treating as containing model", exc_info=e)
        return True
    if not isinstance(models, list) or not models:
        return True
@@ -167,13 +198,20 @@ def _recover_empty_session_model(sess, session_id: str, owner: str | None = None
    Covers the window between endpoint setup and the first chat send: the
    picker showed a model in the dropdown but the session record never got
    written (Issue #587 — UI uses the cached endpoint list, not s.model).
-    Without this, we'd POST the upstream with model="" and get a generic
-    401/503 instead of using the model the user already picked.
-
-    Returns True iff sess.model was repaired.
+    For ChatGPT Subscription, also repairs stale OpenAI API model names such as
+    ``gpt-5`` that are not accepted by the Codex-backed ChatGPT account route.
    """
-    if getattr(sess, "model", None):
-        return False
+    current_model = (getattr(sess, "model", "") or "").strip()
+    endpoint_url = (getattr(sess, "endpoint_url", "") or "").strip()
+    is_chatgpt_subscription = False
+    if current_model:
+        try:
+            from src.chatgpt_subscription import is_chatgpt_subscription_base
+            is_chatgpt_subscription = is_chatgpt_subscription_base(endpoint_url)
+            if not is_chatgpt_subscription:
+                return False
+        except Exception:
+            return False
    db = SessionLocal()
    try:
        # Prefer the endpoint whose base URL matches the session — we know the
@@ -192,16 +230,52 @@ def _recover_empty_session_model(sess, session_id: str, owner: str | None = None
                    break
        if not ep:
            return False
+        if not is_chatgpt_subscription:
+            try:
+                from src.chatgpt_subscription import is_chatgpt_subscription_base
+                is_chatgpt_subscription = is_chatgpt_subscription_base(getattr(ep, "base_url", "") or endpoint_url)
+            except Exception:
+                is_chatgpt_subscription = False
        try:
            cached = json.loads(ep.cached_models) if isinstance(ep.cached_models, str) else (ep.cached_models or [])
-        except Exception:
+        except Exception as e:
+            logger.warning("Failed to parse cached_models for endpoint %r", getattr(ep, "id", "?"), exc_info=e)
            cached = []
        if not cached:
+            visible = []
+        else:
+            try:
+                visible = _visible_models(cached, getattr(ep, "hidden_models", None))
+            except Exception:
+                visible = cached
+        if current_model and current_model in {str(item).strip() for item in visible}:
            return False
-        try:
-            visible = _visible_models(cached, getattr(ep, "hidden_models", None))
-        except Exception:
-            visible = cached
+        if is_chatgpt_subscription:
+            live_models = []
+            if getattr(ep, "provider_auth_id", None):
+                try:
+                    from src.chatgpt_subscription import fetch_available_models
+                    from src.endpoint_resolver import resolve_endpoint_runtime
+                    _base, api_key = resolve_endpoint_runtime(ep, owner=owner)
+                    if api_key:
+                        live_models = fetch_available_models(api_key)
+                        if live_models:
+                            ep.cached_models = json.dumps(live_models)
+                            db.commit()
+                except Exception:
+                    live_models = []
+            # ChatGPT Subscription recovery must use the live Codex catalog.
+            # Cached rows are only trusted above to avoid revalidating a model
+            # that is already present in the visible picker list.
+            cached = live_models
+            if not cached:
+                return False
+            try:
+                visible = _visible_models(cached, getattr(ep, "hidden_models", None))
+            except Exception:
+                visible = cached
+            if current_model and current_model in {str(item).strip() for item in visible}:
+                return False
        if not visible:
            return False
        model = visible[0]
@@ -211,14 +285,17 @@ def _recover_empty_session_model(sess, session_id: str, owner: str | None = None
        # Persist so the next request, websocket reconnect, or page reload
        # picks up the same model (we'd otherwise re-pick on every send
        # and silently switch on the user if the cached order shifts).
-        db_session = db.query(DBSession).filter(DBSession.id == session_id).first()
+        db_session_q = db.query(DBSession).filter(DBSession.id == session_id)
+        if owner:
+            db_session_q = db_session_q.filter(DBSession.owner == owner)
+        db_session = db_session_q.first()
        if db_session:
            db_session.model = model
            db_session.updated_at = datetime.utcnow()
            db.commit()
        sess.model = model
        logger.info(
-            "Recovered empty session model for %s — picked %r from endpoint %s",
+            "Recovered session model for %s — picked %r from endpoint %s",
            session_id, model, ep.id,
        )
        return True
@@ -304,8 +381,13 @@ def setup_chat_routes(
        # non-streaming path can't be used to bypass).
        _enforce_chat_privileges(request, sess)

+        tool_policy = build_effective_tool_policy(last_user_message=message)
+        allow_tool_preprocessing = not tool_policy.block_all_tool_calls
+
        # Inline memory command
-        memory_response = await chat_handler.handle_memory_command(sess, message)
+        memory_response = None
+        if not tool_policy.blocks("manage_memory"):
+            memory_response = await chat_handler.handle_memory_command(sess, message)
        if memory_response:
            return {"response": memory_response}

@@ -319,10 +401,15 @@ def setup_chat_routes(
            use_web=use_web,
            time_filter=time_filter,
            webhook_manager=webhook_manager,
+            allow_tool_preprocessing=allow_tool_preprocessing,
        )

        # Research injection
-        if use_research:
+        research_blocked_by_policy = (
+            tool_policy.blocks("trigger_research")
+            or tool_policy.blocks("manage_research")
+        )
+        if use_research and not research_blocked_by_policy:
            try:
                _r_ep, _r_model, _r_headers = _resolve_research_endpoint(sess)
                research_ctx = await research_handler.call_research_service(
@@ -343,6 +430,7 @@ def setup_chat_routes(
            temperature=ctx.preset.temperature,
            max_tokens=ctx.preset.max_tokens,
            prompt_type=preset_id,
+            session_id=session,
        )
        _clean_reply, _clean_md = clean_thinking_for_save(reply, {"model": sess.model})
        sess.add_message(ChatMessage("assistant", _clean_reply, metadata=_clean_md))
@@ -357,6 +445,7 @@ def setup_chat_routes(
            ctx.uprefs, memory_manager, memory_vector, webhook_manager,
            character_name=ctx.preset.character_name,
            owner=ctx.user,
+            allow_background_extraction=not tool_policy.block_all_tool_calls,
        )

        return {"response": reply}
@@ -388,19 +477,34 @@ def setup_chat_routes(
        use_research = form_data.get("use_research")
        time_filter = form_data.get("time_filter")
        preset_id = form_data.get("preset_id")
-        allow_bash = form_data.get("allow_bash")
-        allow_web_search = form_data.get("allow_web_search")
+        # Issue #3229: API callers send JSON, not FormData.  Read from the
+        # JSON body as fallback so callers who send {"allow_bash": true}
+        # actually get bash enabled.
+        allow_bash = form_data.get("allow_bash") or (body or {}).get("allow_bash")
+        allow_web_search = form_data.get("allow_web_search") or (body or {}).get("allow_web_search")
        use_rag = form_data.get("use_rag")
        search_context = form_data.get("search_context")  # pre-fetched web search results (compare mode)
        compare_mode = str(form_data.get("compare_mode", "")).lower() == "true"
        incognito = str(form_data.get("incognito", "")).lower() == "true"
+        # Plan mode is not part of the merge-ready UI. Ignore stale clients or
+        # manual form posts that still send plan_mode=true.
+        plan_mode = False
        chat_mode = str(form_data.get("mode", "")).lower()  # 'chat' or 'agent'
-        # Workspace: confine the agent's file/shell tools to this folder. Validate
-        # it's a real directory; ignore (no confinement) otherwise.
-        workspace = (form_data.get("workspace") or "").strip()
-        if workspace:
-            _ws_real = os.path.realpath(os.path.expanduser(workspace))
-            workspace = _ws_real if os.path.isdir(_ws_real) else ""
+        # Workspace: confine the agent's file/shell tools to this folder.
+        workspace, workspace_rejected = _resolve_request_workspace(
+            request, form_data.get("workspace")
+        )
+        # Plan mode is a modifier on agent mode — it only makes sense with tools.
+        if plan_mode:
+            chat_mode = "agent"
+        # An approved plan being EXECUTED: the frontend sends the checklist back
+        # on each turn so we can pin it in context. This way a long plan on a
+        # weak model survives history truncation — the agent can always re-read
+        # the plan. Ignored while still proposing (plan_mode on). Capped so a
+        # huge plan can't blow the prompt.
+        approved_plan = ""
+        if not plan_mode:
+            approved_plan = (form_data.get("approved_plan") or "").strip()[:8192]
        # Did the USER explicitly pick agent mode? (vs. us auto-escalating
        # below). Skill extraction should only learn from real agent sessions,
        # not chats we quietly promoted for a notes/calendar intent.
@@ -425,6 +529,66 @@ def setup_chat_routes(
        active_doc_id = form_data.get("active_doc_id", "").strip()
        logger.info(f"[doc-inject] chat_mode={chat_mode}, active_doc_id={active_doc_id!r}")

+        # Active email reader — when the user has an email open in the UI, the
+        # frontend passes its uid/folder/account so "reply", "summarize this",
+        # etc. resolve to the real email instead of the agent inventing a
+        # fake markdown draft.
+        active_email_uid = form_data.get("active_email_uid", "").strip()
+        active_email_folder = form_data.get("active_email_folder", "INBOX").strip() or "INBOX"
+        active_email_account = form_data.get("active_email_account", "").strip()
+        active_email_ctx: Optional[Dict[str, str]] = None
+        # Always reset between requests so a stale active-email pointer from
+        # a previous turn (different reader closed, different account, etc.)
+        # can't leak in when the user has no email open this turn.
+        try:
+            from src.tool_implementations import clear_active_email
+            clear_active_email()
+        except Exception:
+            pass
+        if active_email_uid:
+            active_email_ctx = {
+                "uid": active_email_uid,
+                "folder": active_email_folder,
+                "account": active_email_account,
+            }
+            # Try to enrich with subject + from so the agent's system prompt
+            # block can quote them. Best-effort: a stale cache is fine, a
+            # missing email just means we pass uid/folder/account only.
+            try:
+                from routes.email_routes import _read_cache_get, _read_cache_key
+                _ck = _read_cache_key(active_email_account or None, active_email_folder, active_email_uid, owner=get_current_user(request))
+                _cached_email = _read_cache_get(_ck)
+                if _cached_email and isinstance(_cached_email, dict):
+                    active_email_ctx["subject"] = str(_cached_email.get("subject") or "")
+                    active_email_ctx["from"] = str(
+                        _cached_email.get("from_address")
+                        or _cached_email.get("from")
+                        or _cached_email.get("from_name")
+                        or ""
+                    )
+                    _body_preview = (_cached_email.get("body") or "")[:2000]
+                    if _body_preview:
+                        active_email_ctx["body_preview"] = _body_preview
+            except Exception as _e:
+                logger.debug(f"[email-inject] cache enrich skipped: {_e}")
+            # Stash so email tools can resolve "this email" without UID guessing.
+            try:
+                from src.tool_implementations import set_active_email
+                set_active_email(
+                    uid=active_email_uid,
+                    folder=active_email_folder,
+                    account=active_email_account or None,
+                    subject=active_email_ctx.get("subject"),
+                    sender=active_email_ctx.get("from"),
+                )
+            except Exception as _e:
+                logger.debug(f"[email-inject] set_active_email failed: {_e}")
+            logger.info(
+                "[email-inject] active_email uid=%s folder=%s account=%s subject=%r",
+                active_email_uid, active_email_folder, active_email_account or "(default)",
+                active_email_ctx.get("subject", ""),
+            )
+
        try:
            # Attachment-only sends: skip the message-required check when the
            # user has attached one or more files (the attachment IS the action).
@@ -479,21 +643,20 @@ def setup_chat_routes(
                do_research = True
                logger.info(f"Session {session} in research_pending — auto-triggering research")

-        # Persist session mode (research > agent > chat)
-        _effective_mode = 'research' if do_research else (chat_mode or 'chat')
-        if _effective_mode in ('agent', 'research', 'chat'):
-            set_session_mode(session, _effective_mode)
-
        att_ids = []
        if body and isinstance(body.get("attachments"), list):
            att_ids = [str(x) for x in body["attachments"]]
        elif attachments:
            try:
                att_ids = [str(x) for x in json.loads(attachments)]
-            except Exception:
-                pass
+            except Exception as e:
+                logger.warning("Failed to parse attachments JSON, ignoring attachments", exc_info=e)

        no_memory = str(form_data.get("no_memory", "")).lower() == "true"
+        pre_context_tool_policy = build_effective_tool_policy(
+            last_user_message=message,
+        )
+        allow_tool_preprocessing = not pre_context_tool_policy.block_all_tool_calls

        # Build shared context (stream path uses enhanced_message for context preface)
        ctx = await build_chat_context(
@@ -515,6 +678,7 @@ def setup_chat_routes(
            # manage_skills (agent mode). In plain chat or incognito the
            # index would be useless / unwanted noise.
            agent_mode=(chat_mode == "agent"),
+            allow_tool_preprocessing=allow_tool_preprocessing,
        )

        _research_flags = {"do": do_research}  # Mutable container for generator scope
@@ -540,15 +704,27 @@ def setup_chat_routes(
                            active_doc_id,
                        )
                        active_doc = None
-                    elif doc_session and doc_session != session:
-                        logger.warning(
-                            "[doc-inject] ignoring stale active_doc_id %s from session %s while in session %s",
-                            active_doc_id,
-                            doc_session,
-                            session,
-                        )
-                        active_doc = None
                    else:
+                        # NOTE: previously dropped the doc when doc.session_id
+                        # != current chat session — but that broke the common
+                        # case of "open an email draft from one chat, ask a
+                        # different chat to write into it". The frontend only
+                        # sends active_doc_id for docs currently visible in
+                        # the UI, and we already owner-checked above, so trust
+                        # the explicit signal. We just log the mismatch and
+                        # re-bind the doc to the current session so future
+                        # turns find it via the session-fallback path too.
+                        if doc_session and doc_session != session:
+                            logger.info(
+                                "[doc-inject] cross-session active_doc_id %s (was session %s, now %s) — accepting and rebinding",
+                                active_doc_id, doc_session, session,
+                            )
+                            try:
+                                active_doc.session_id = session
+                                _doc_db.commit()
+                            except Exception as _e:
+                                _doc_db.rollback()
+                                logger.warning(f"[doc-inject] session rebind failed: {_e}")
                        logger.info(f"[doc-inject] found by ID: title={active_doc.title!r}, lang={active_doc.language!r}, is_active={active_doc.is_active}, content_len={len(active_doc.current_content or '')}")
                else:
                    logger.warning(f"[doc-inject] NOT FOUND by ID {active_doc_id}")
@@ -568,7 +744,7 @@ def setup_chat_routes(
            # leak a doc that belongs to a DIFFERENT session.
            if not active_doc:
                try:
-                    from src.tool_implementations import get_active_document
+                    from src.agent_tools.document_tools import get_active_document
                    _mem_id = get_active_document()
                    if _mem_id:
                        _mem_q = _doc_db.query(DBDocument).filter(DBDocument.id == _mem_id)
@@ -589,9 +765,18 @@ def setup_chat_routes(

        # Build disabled-tools set from frontend toggles + user privileges
        disabled_tools = set()
-        if str(allow_bash).lower() != "true":
+        # Only disable bash/web_search when the caller *explicitly* set them
+        # to a falsy value.  When unset (None), defer to per-user privilege
+        # checks below — this lets admins with can_use_bash=True use bash
+        # by default without having to send allow_bash in every request.
+        if allow_bash is not None and str(allow_bash).lower() != "true":
            disabled_tools.add("bash")
-        if str(allow_web_search).lower() != "true":
+        _explicit_web_intent = bool(_tool_intent and _tool_intent.category == "web")
+        if (
+            allow_web_search is not None
+            and str(allow_web_search).lower() != "true"
+            and not _explicit_web_intent
+        ):
            disabled_tools.add("web_search")
            disabled_tools.add("web_fetch")

@@ -604,6 +789,21 @@ def setup_chat_routes(
                "manage_skills",      # skill presets tied to user
            })

+        # Active email reader open → strip the tools that let the agent
+        # "drift" to a new compose: create_document (writes a fake email-
+        # shaped .md file) and send_email (sends fresh to a recipient the
+        # agent invented). With those gone, the only paths left for "write
+        # email saying X" are ui_control open_email_reply (draft) and
+        # reply_to_email (immediate send) — both of which use the open
+        # email's UID. Code-level enforcement instead of relying on a
+        # prompt rule the model can ignore.
+        if active_email_ctx and active_email_ctx.get("uid"):
+            disabled_tools.update({
+                "create_document",
+                "send_email",
+                "mcp__email__send_email",
+            })
+
        # Enforce per-user privileges
        _privs = {}
        _user = ctx.user
@@ -659,6 +859,32 @@ def setup_chat_routes(
            if chat_mode == 'chat':
                disabled_tools.update({"bash", "python", "read_file", "write_file", "web_search", "web_fetch", "search_chats", "manage_tasks"})

+        # Plan mode: investigate read-only, propose a plan, don't mutate. Block
+        # every tool not on the read-only allowlist. (stream_agent_loop enforces
+        # this again + drops MCP, so this is belt-and-suspenders.)
+        if plan_mode:
+            from src.tool_security import plan_mode_disabled_tools
+            disabled_tools.update(plan_mode_disabled_tools())
+
+        tool_policy = build_effective_tool_policy(
+            disabled_tools=disabled_tools,
+            last_user_message=message,
+        )
+        disabled_tools = tool_policy.all_disabled_names()
+        research_blocked_by_policy = bool(
+            tool_policy.blocks("trigger_research")
+            or tool_policy.blocks("manage_research")
+        )
+        effective_do_research = bool(
+            do_research and _research_flags["do"] and not research_blocked_by_policy
+        )
+
+        # Persist session mode after policy/privilege gates so blocked research
+        # turns remain ordinary chat/agent streams and saved messages.
+        _effective_mode = 'research' if effective_do_research else (chat_mode or 'chat')
+        if _effective_mode in ('agent', 'research', 'chat'):
+            set_session_mode(session, _effective_mode)
+
        async def stream_with_save() -> AsyncGenerator[str, None]:
            # _effective_mode is read-only here; closure captures it from
            # the outer scope. (Was `nonlocal` but never reassigned.)
@@ -666,7 +892,14 @@ def setup_chat_routes(
            web_sources = ctx.web_sources

            # Register active stream for partial-save safety net
-            _active_streams[session] = {"status": "streaming", "partial": "", "query": message, "is_research": do_research, "mode": _effective_mode}
+            _active_streams[session] = {"status": "streaming", "partial": "", "query": message, "is_research": effective_do_research, "mode": _effective_mode}
+
+            # The client sent a workspace the server refused to bind (deleted
+            # folder, file path, sensitive dir, filesystem root). Tell it up
+            # front so the UI can clear the pill instead of displaying a
+            # confinement that is not actually in effect.
+            if workspace_rejected:
+                yield f"data: {json.dumps({'type': 'workspace_rejected', 'data': {'path': workspace_rejected}})}\n\n"

            if ctx.preprocessed.attachment_meta:
                yield f"data: {json.dumps({'type': 'attachments', 'data': ctx.preprocessed.attachment_meta})}\n\n"
@@ -690,7 +923,7 @@ def setup_chat_routes(
                yield f"data: {json.dumps({'type': 'memories_used', 'data': ctx.used_memories})}\n\n"

            # Run research as a background task (survives page refresh)
-            if do_research and _research_flags["do"]:
+            if effective_do_research:
                _r_ep, _r_model, _r_headers = _resolve_research_endpoint(sess)
                _auth_keys = list(_r_headers.keys()) if _r_headers else []
                logger.info(f"Research endpoint resolved: model={_r_model}, endpoint={_r_ep}, auth_keys={_auth_keys}, sess_headers_keys={list(sess.headers.keys()) if isinstance(sess.headers, dict) else type(sess.headers)}")
@@ -829,7 +1062,7 @@ def setup_chat_routes(
                _fallback_candidates = []

            # Send model name early so the frontend can show it during streaming
-            _model_suffix = "Research" if do_research else None
+            _model_suffix = "Research" if effective_do_research else None
            _model_info = {"type": "model_info", "model": sess.model}
            if _model_suffix:
                _model_info["suffix"] = _model_suffix
@@ -839,6 +1072,12 @@ def setup_chat_routes(

            if _is_image_generation_session(sess, owner=_user):
                from src.settings import get_setting
+                if tool_policy.blocks("generate_image"):
+                    _blocked_msg = tool_policy.reason_for("generate_image")
+                    yield f'data: {json.dumps({"delta": _blocked_msg})}\n\n'
+                    yield "data: [DONE]\n\n"
+                    _active_streams.pop(session, None)
+                    return
                if not get_setting("image_gen_enabled", True):
                    yield f'data: {json.dumps({"delta": "Image generation is disabled by the administrator."})}\n\n'
                    yield "data: [DONE]\n\n"
@@ -873,6 +1112,8 @@ def setup_chat_routes(
            elif chat_mode == "chat":
                _chat_start = time.time()
                _answered_by = None  # set if the selected model failed and a fallback answered
+                _requested_model = sess.model
+                _actual_model = None
                # ── Chat mode: call stream_llm directly, NO tools, NO document access ──
                try:
                    _chat_candidates = [(sess.endpoint_url, sess.model, sess.headers)] + _fallback_candidates
@@ -888,6 +1129,7 @@ def setup_chat_routes(
                        max_tokens=ctx.preset.max_tokens,
                        prompt_type=preset_id,
                        tools=None,
+                        session_id=session,
                    ):
                        if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
                            try:
@@ -905,10 +1147,18 @@ def setup_chat_routes(
                                    # Selected model failed; a fallback answered.
                                    # Forward the notice and remember the real model.
                                    _answered_by = data.get("answered_by") or _answered_by
+                                    _actual_model = _actual_model or _answered_by
+                                    data["selected_model"] = data.get("selected_model") or _requested_model
                                    yield chunk
+                                elif data.get("type") == "model_actual":
+                                    _actual_model = data.get("model") or _actual_model
+                                    data["requested_model"] = _requested_model
+                                    yield f'data: {json.dumps(data)}\n\n'
                                elif data.get("type") == "usage":
                                    last_metrics = data.get("data", {})
-                                    last_metrics["model"] = _answered_by or sess.model
+                                    _reported_model = last_metrics.get("model")
+                                    last_metrics["requested_model"] = _requested_model
+                                    last_metrics["model"] = _reported_model or _actual_model or _answered_by or _requested_model
                                    if ctx.context_length and last_metrics.get("input_tokens"):
                                        pct = min(round((last_metrics["input_tokens"] / ctx.context_length) * 100, 1), 100.0)
                                        last_metrics["context_percent"] = pct
@@ -945,7 +1195,8 @@ def setup_chat_routes(
                                    "tokens_per_second": _tps,
                                    "context_percent": _ctx_pct,
                                    "context_length": ctx.context_length,
-                                    "model": sess.model,
+                                    "model": _actual_model or _answered_by or _requested_model,
+                                    "requested_model": _requested_model,
                                    "usage_source": "estimated",
                                }
                                yield f'data: {json.dumps({"type": "metrics", "data": last_metrics})}\n\n'
@@ -957,7 +1208,7 @@ def setup_chat_routes(
                                    rag_sources=ctx.rag_sources,
                                    research_sources=research_sources,
                                    used_memories=ctx.used_memories,
-                                    do_research=do_research,
+                                    do_research=effective_do_research,
                                    incognito=incognito,
                                )
                                if _saved_id:
@@ -967,14 +1218,22 @@ def setup_chat_routes(
                                    last_metrics, ctx.uprefs, memory_manager, memory_vector, webhook_manager,
                                    incognito=incognito, compare_mode=compare_mode,
                                    character_name=ctx.preset.character_name,
-                                                            owner=_user,
+                                    owner=_user,
+                                    allow_background_extraction=not tool_policy.block_all_tool_calls,
                                )
                            _stream_set(session, status="done")
                            yield chunk
                except (asyncio.CancelledError, GeneratorExit):
                    if full_response:
                        logger.info("Client disconnected mid-stream (chat mode) for session %s, saving partial (%d chars)", session, len(full_response))
-                        _stopped_content, _stopped_md = clean_thinking_for_save(full_response, {"stopped": True, "model": sess.model})
+                        _stopped_content, _stopped_md = clean_thinking_for_save(
+                            full_response,
+                            {
+                                "stopped": True,
+                                "model": _actual_model or _answered_by or _requested_model,
+                                "requested_model": _requested_model,
+                            },
+                        )
                        sess.add_message(ChatMessage("assistant", _stopped_content, metadata=_stopped_md))
                        if not incognito:
                            session_manager.save_sessions()
@@ -986,6 +1245,8 @@ def setup_chat_routes(
                _agent_rounds = 0
                _agent_tool_calls = 0
                _answered_by = None  # set if the selected model failed and a fallback answered
+                _requested_model = sess.model
+                _actual_model = None
                try:
                    from src.settings import get_setting
                    from src.agent_tools import MAX_AGENT_ROUNDS as _DEFAULT_ROUNDS
@@ -1010,10 +1271,14 @@ def setup_chat_routes(
                        max_rounds=_max_rounds,
                        context_length=ctx.context_length,
                        active_document=active_doc,
+                        active_email=active_email_ctx,
                        session_id=session,
                        disabled_tools=disabled_tools if disabled_tools else None,
+                        tool_policy=tool_policy,
                        owner=_user,
                        fallbacks=_fallback_candidates,
+                        plan_mode=plan_mode,
+                        approved_plan=approved_plan or None,
                        workspace=workspace or None,
                    ):
                        if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
@@ -1035,6 +1300,8 @@ def setup_chat_routes(
                                    "doc_stream_open", "doc_stream_delta",
                                    "doc_update", "doc_suggestions", "ui_control",
                                    "rounds_exhausted",
+                                    "ask_user",
+                                    "plan_update",
                                ):
                                    if data.get("type") == "agent_step":
                                        _agent_rounds = max(_agent_rounds, data.get("round", 1))
@@ -1047,10 +1314,18 @@ def setup_chat_routes(
                                    # model so metrics reflect it, not the masked
                                    # selected model.
                                    _answered_by = data.get("answered_by") or _answered_by
+                                    _actual_model = _actual_model or _answered_by
+                                    data["selected_model"] = data.get("selected_model") or _requested_model
                                    yield chunk
+                                elif data.get("type") == "model_actual":
+                                    _actual_model = data.get("model") or _actual_model
+                                    data["requested_model"] = _requested_model
+                                    yield f'data: {json.dumps(data)}\n\n'
                                elif data.get("type") == "metrics":
                                    last_metrics = data.get("data", {})
-                                    last_metrics["model"] = _answered_by or sess.model
+                                    _reported_model = last_metrics.get("model")
+                                    last_metrics["requested_model"] = last_metrics.get("requested_model") or _requested_model
+                                    last_metrics["model"] = _reported_model or _actual_model or _answered_by or _requested_model
                                    yield f'data: {json.dumps({"type": "metrics", "data": last_metrics})}\n\n'
                            except json.JSONDecodeError:
                                yield chunk
@@ -1078,6 +1353,7 @@ def setup_chat_routes(
                                    skills_manager=skills_manager,
                                    owner=_user,
                                    extract_skills=user_requested_agent,
+                                    allow_background_extraction=not tool_policy.block_all_tool_calls,
                                )
                            _stream_set(session, status="done")
                            yield chunk
@@ -1091,7 +1367,14 @@ def setup_chat_routes(
                    try:
                        if full_response:
                            logger.info("Client disconnected mid-stream for session %s, saving partial response (%d chars)", session, len(full_response))
-                            _stopped_content2, _stopped_md2 = clean_thinking_for_save(full_response, {"stopped": True, "model": sess.model})
+                            _stopped_content2, _stopped_md2 = clean_thinking_for_save(
+                                full_response,
+                                {
+                                    "stopped": True,
+                                    "model": _actual_model or _answered_by or _requested_model,
+                                    "requested_model": _requested_model,
+                                },
+                            )
                            sess.add_message(ChatMessage("assistant", _stopped_content2, metadata=_stopped_md2))
                            if not incognito:
                                session_manager.save_sessions()
@@ -1110,11 +1393,29 @@ def setup_chat_routes(
            finally:
                _active_streams.pop(session, None)

-        # Run the stream as a DETACHED background task so it survives the client
-        # closing the tab / navigating away (true terminal-agent behavior). The
-        # SSE response just subscribes (replay buffered output + live); dropping
-        # the SSE only removes a subscriber — the run keeps going and saves the
-        # assistant message on completion regardless. Reconnect via /api/chat/resume.
+        # Compare panes are short-lived, single-shot generations whose sessions
+        # exist only to drive that one pane — there's nothing to "resume" and
+        # the user expects the pane's Stop button (which aborts the fetch,
+        # closing this SSE) to promptly cancel the upstream LLM call. Detaching
+        # them would keep burning upstream tokens/compute after the pane is
+        # stopped or the comparison is abandoned, and would surface a stale
+        # "still streaming" /resume target for a session nobody will revisit.
+        #
+        # So: stream them directly (no agent_runs wrapping). Starlette cancels
+        # the underlying async generator (raising CancelledError/GeneratorExit
+        # inside it) as soon as it notices the client disconnected — which the
+        # mode-specific except blocks above already handle by saving the
+        # partial response exactly once. This stops the upstream call promptly
+        # without waiting on the next streamed chunk.
+        #
+        # Normal chat/agent streams keep the DETACHED behavior below: they
+        # survive the client closing the tab / navigating away. The SSE response just subscribes (replay
+        # buffered output + live); dropping the SSE only removes a subscriber —
+        # the run keeps going and saves the assistant message on completion
+        # regardless. Reconnect via /api/chat/resume.
+        if compare_mode:
+            return StreamingResponse(_safe_stream(), media_type="text/event-stream")
+
        agent_runs.start(session, _safe_stream())
        return StreamingResponse(agent_runs.subscribe(session), media_type="text/event-stream")

@@ -1185,45 +1486,16 @@ def setup_chat_routes(
            return []

        _user = get_current_user(request)
-        query_term = q.strip()
-        db = SessionLocal()
-        try:
-            base_q = (
-                db.query(DBChatMessage, DBSession.name)
-                .join(DBSession, DBChatMessage.session_id == DBSession.id)
-                .filter(
-                    DBSession.archived == False,
-                    DBChatMessage.content.ilike(f"%{query_term}%"),
-                    DBChatMessage.role.in_(["user", "assistant"]),
-                )
+        return [
+            result.to_dict()
+            for result in search_session_messages(
+                q,
+                limit=limit,
+                owner=_user,
+                restrict_owner=_user is not None,
+                include_legacy_owner=False,
            )
-            if _user:
-                base_q = base_q.filter(DBSession.owner == _user)
-            rows = base_q.order_by(DBChatMessage.timestamp.desc()).limit(limit).all()
-
-            results = []
-            for msg, session_name in rows:
-                content = msg.content or ""
-                lower_content = content.lower()
-                idx = lower_content.find(query_term.lower())
-                if idx == -1:
-                    snippet = content[:120]
-                else:
-                    start = max(0, idx - 50)
-                    end = min(len(content), idx + len(query_term) + 50)
-                    snippet = ("..." if start > 0 else "") + content[start:end] + ("..." if end < len(content) else "")
-
-                results.append({
-                    "session_id": msg.session_id,
-                    "session_name": session_name or "Untitled",
-                    "role": msg.role,
-                    "content_snippet": snippet,
-                    "timestamp": msg.timestamp.isoformat() if msg.timestamp else None,
-                })
-
-            return results
-        finally:
-            db.close()
+        ]

    # ------------------------------------------------------------------ #
    # POST /api/rewrite — lightweight rewrite of last AI message (no tools)
@@ -0,0 +1,170 @@
+"""ChatGPT Subscription device-flow setup routes."""
+
+import json
+import logging
+import uuid
+from typing import Dict, Optional
+
+from fastapi import HTTPException, Request
+
+from core.database import ModelEndpoint, ProviderAuthSession, SessionLocal, utcnow_naive
+from routes.device_flow import (
+    DeviceFlowPoll,
+    DeviceFlowStart,
+    PendingDeviceFlowStore,
+    create_device_flow_router,
+)
+from src.auth_helpers import get_current_user
+from src import chatgpt_subscription
+
+logger = logging.getLogger(__name__)
+
+_DEVICE_FLOW_STORE = PendingDeviceFlowStore()
+
+
+def _provision_endpoint(tokens: Dict, owner: Optional[str]) -> Dict:
+    access_token = tokens.get("access_token")
+    refresh_token = tokens.get("refresh_token")
+    if not access_token or not refresh_token:
+        raise ValueError("ChatGPT token response was missing access_token or refresh_token")
+
+    base = chatgpt_subscription.DEFAULT_CHATGPT_SUBSCRIPTION_BASE_URL
+    models = chatgpt_subscription.fetch_available_models(access_token)
+    if not models:
+        raise ValueError("ChatGPT Subscription connected, but no usable Codex models were discovered for this account.")
+    db = SessionLocal()
+    try:
+        auth = (
+            db.query(ProviderAuthSession)
+            .filter(
+                ProviderAuthSession.provider == chatgpt_subscription.CHATGPT_SUBSCRIPTION_PROVIDER,
+                ProviderAuthSession.owner == owner,
+            )
+            .first()
+        )
+        if auth is None:
+            auth = ProviderAuthSession(
+                id=str(uuid.uuid4())[:8],
+                provider=chatgpt_subscription.CHATGPT_SUBSCRIPTION_PROVIDER,
+                owner=owner,
+                label="ChatGPT Subscription",
+                base_url=base,
+                auth_mode="chatgpt",
+            )
+            db.add(auth)
+        auth.base_url = base
+        auth.access_token = access_token
+        auth.refresh_token = refresh_token
+        auth.last_refresh = utcnow_naive()
+        auth.auth_mode = "chatgpt"
+
+        ep = (
+            db.query(ModelEndpoint)
+            .filter(
+                ModelEndpoint.base_url == base,
+                ModelEndpoint.provider_auth_id == auth.id,
+                ModelEndpoint.owner == owner,
+            )
+            .first()
+        )
+        if ep is None:
+            ep = ModelEndpoint(
+                id=str(uuid.uuid4())[:8],
+                name="ChatGPT Subscription",
+                base_url=base,
+                model_type="llm",
+                endpoint_kind="api",
+                owner=owner,
+            )
+            db.add(ep)
+        ep.name = "ChatGPT Subscription"
+        ep.base_url = base
+        ep.api_key = None
+        ep.provider_auth_id = auth.id
+        ep.is_enabled = True
+        ep.supports_tools = False
+        ep.model_type = "llm"
+        ep.endpoint_kind = "api"
+        ep.model_refresh_mode = "manual"
+        ep.cached_models = json.dumps(models)
+        db.commit()
+        result = {
+            "id": ep.id,
+            "name": ep.name,
+            "base_url": ep.base_url,
+            "models": models,
+        }
+    finally:
+        db.close()
+
+    try:
+        from routes.model_routes import _invalidate_models_cache
+
+        _invalidate_models_cache()
+    except Exception:
+        pass
+    return result
+
+
+def _start_device_flow(request: Request, _form) -> DeviceFlowStart:
+    try:
+        data = chatgpt_subscription.request_device_code()
+    except Exception as exc:
+        raise chatgpt_subscription.to_http_exception(exc)
+
+    device_auth_id = data.get("device_auth_id")
+    user_code = data.get("user_code")
+    if not device_auth_id or not user_code:
+        raise HTTPException(502, "ChatGPT did not return a complete device code")
+    verification_uri = data.get("verification_uri") or f"{chatgpt_subscription.CHATGPT_OAUTH_ISSUER}/codex/device"
+    return DeviceFlowStart(
+        pending={
+            "device_auth_id": device_auth_id,
+            "user_code": user_code,
+            "owner": get_current_user(request) or None,
+        },
+        response={
+            "user_code": user_code,
+            "verification_uri": verification_uri,
+        },
+        interval=int(data.get("interval") or 5),
+        expires_in=int(data.get("expires_in") or 900),
+    )
+
+
+def _poll_device_flow(_request: Request, pending: Dict) -> DeviceFlowPoll:
+    try:
+        data = chatgpt_subscription.poll_device_auth(pending["device_auth_id"], pending["user_code"])
+    except Exception as exc:
+        logger.debug("ChatGPT device poll failed: %s", exc)
+        return DeviceFlowPoll.pending(str(exc))
+
+    authorization_code = data.get("authorization_code")
+    code_verifier = data.get("code_verifier")
+    if authorization_code and code_verifier:
+        try:
+            tokens = chatgpt_subscription.exchange_authorization_code(authorization_code, code_verifier)
+            result = _provision_endpoint(tokens, pending["owner"])
+        except Exception as exc:
+            logger.exception("ChatGPT Subscription endpoint provisioning failed")
+            raise chatgpt_subscription.to_http_exception(exc)
+        return DeviceFlowPoll.authorized(result)
+
+    err = data.get("error") or data.get("status")
+    if err in ("authorization_pending", "pending", None):
+        return DeviceFlowPoll.pending()
+    if err == "slow_down":
+        return DeviceFlowPoll.slow_down(int(data.get("interval") or 0) or None)
+    if err in ("expired_token", "access_denied", "denied"):
+        return DeviceFlowPoll.failed(err)
+    return DeviceFlowPoll.pending(err or "unknown")
+
+
+def setup_chatgpt_subscription_routes():
+    return create_device_flow_router(
+        prefix="/api/chatgpt-subscription",
+        tags=["chatgpt-subscription"],
+        store=_DEVICE_FLOW_STORE,
+        start_flow=_start_device_flow,
+        poll_flow=_poll_device_flow,
+    )
@@ -15,10 +15,14 @@ from typing import Any
 from fastapi import APIRouter, BackgroundTasks, Body, HTTPException, Request
 from fastapi.responses import StreamingResponse

-from src.auth_helpers import require_user
+from src.auth_helpers import require_authenticated_request, require_user
 from src.tool_implementations import do_manage_notes
+from src.constants import COOKBOOK_STATE_FILE
+from routes._validators import validate_remote_host, validate_ssh_port


+COOKBOOK_READ_SCOPES = {"cookbook:read", "cookbook:launch"}
+COOKBOOK_LAUNCH_SCOPES = {"cookbook:launch"}
 TODO_READ_SCOPES = {"todos:read", "todos:write"}
 TODO_WRITE_SCOPES = {"todos:write"}
 EMAIL_READ_SCOPES = {"email:read", "email:draft", "email:send"}
@@ -33,13 +37,30 @@ DOCS_WRITE_SCOPES = {"documents:write"}
 WRITE_ACTIONS = {"add", "create", "new", "save", "remind", "update", "delete", "toggle_item", "remove", "remove_item"}


+def _ssh_prefix_for_task(task: dict) -> tuple[str, str]:
+    """Resolve a cookbook task's stored SSH target into ``(host, port_flag)``.
+
+    ``host`` is ``""`` for a local task. ``remoteHost`` / ``sshPort`` come from
+    cookbook_state.json and get interpolated into an ``ssh`` command string, so
+    validate them the same way the cookbook routes do. A tampered entry with
+    shell metacharacters in ``remoteHost`` is rejected with 400 rather than
+    injected.
+    """
+    host = validate_remote_host((task.get("remoteHost") or "").strip() or None) or ""
+    ssh_port = validate_ssh_port((task.get("sshPort") or "").strip() or None) or ""
+    port_flag = f"-p {ssh_port} " if ssh_port and ssh_port != "22" else ""
+    return host, port_flag
+
+
 async def _as_owner(request: Request, owner: str, fn, *args, **kwargs):
    """Run an existing route handler with request.state.current_user temporarily
    set to ``owner`` so its internal get_current_user/require_user calls see
    the scope-gated owner (not the "api" pseudo-user the bearer middleware sets).
    Restores the original value when done. Works for sync and async handlers."""
    orig = getattr(request.state, "current_user", None)
+    orig_api_token = getattr(request.state, "api_token", None)
    request.state.current_user = owner
+    request.state.api_token = False
    try:
        result = fn(*args, **kwargs)
        if asyncio.iscoroutine(result):
@@ -47,6 +68,13 @@ async def _as_owner(request: Request, owner: str, fn, *args, **kwargs):
        return result
    finally:
        request.state.current_user = orig
+        if orig_api_token is None:
+            try:
+                delattr(request.state, "api_token")
+            except AttributeError:
+                pass
+        else:
+            request.state.api_token = orig_api_token


 def _scope_owner(request: Request, allowed: set[str]) -> str:
@@ -63,6 +91,20 @@ def _scope_owner(request: Request, allowed: set[str]) -> str:
    return require_user(request)


+def _scope_owner_all(request: Request, required: set[str]) -> str:
+    """Return owner only when an API token has every required scope."""
+    if getattr(request.state, "api_token", False):
+        scopes = set(getattr(request.state, "api_token_scopes", []) or [])
+        missing = required - scopes
+        if missing:
+            raise HTTPException(403, f"API token missing required scope: {' and '.join(sorted(missing))}")
+        owner = getattr(request.state, "api_token_owner", None)
+        if not owner:
+            raise HTTPException(403, "API token has no owner")
+        return owner
+    return require_user(request)
+
+
 def _find_endpoint(router: APIRouter | None, method: str, path: str):
    if router is None:
        return None
@@ -110,7 +152,7 @@ def setup_codex_routes(
                    "read": scoped(EMAIL_READ_SCOPES),
                    "draft": scoped(EMAIL_DRAFT_SCOPES),
                    "send": scoped(EMAIL_SEND_SCOPES),
-                    "actions": ["list", "read", "draft", "send"],
+                    "actions": ["list", "read", "draft_document", "draft", "send"],
                },
                "memory": {
                    "read": scoped(MEMORY_READ_SCOPES),
@@ -130,6 +172,11 @@ def setup_codex_routes(
                    "actions": ["library", "read", "create", "delete"],
                    "available": documents_library_endpoint is not None,
                },
+                "cookbook": {
+                    "read": scoped(COOKBOOK_READ_SCOPES),
+                    "launch": scoped(COOKBOOK_LAUNCH_SCOPES),
+                    "actions": ["tasks", "servers", "output", "serve", "stop"],
+                },
            },
            "safety": {
                "email_send_requires_confirmation": True,
@@ -139,7 +186,7 @@ def setup_codex_routes(

    @router.get("/plugin.zip")
    def plugin_zip(request: Request):
-        require_user(request)
+        require_authenticated_request(request)
        root = Path(__file__).resolve().parent.parent / "integrations" / "codex"
        if not root.exists():
            raise HTTPException(404, "Codex plugin bundle not found")
@@ -229,6 +276,56 @@ def setup_codex_routes(
    # Both handlers in routes/email_routes.py already accept `owner=` via
    # FastAPI Depends, so we call them directly without patching state.

+    def _email_draft_document_content(body: dict[str, Any]) -> str:
+        def clean(v: Any) -> str:
+            if isinstance(v, list):
+                return ", ".join(str(x).strip() for x in v if str(x).strip())
+            return str(v or "").strip()
+
+        to = clean(body.get("to"))
+        cc = clean(body.get("cc"))
+        bcc = clean(body.get("bcc"))
+        subject = clean(body.get("subject"))
+        in_reply_to = clean(body.get("in_reply_to"))
+        references = clean(body.get("references"))
+        body_text = str(body.get("body") or body.get("body_html") or "").strip()
+        lines = [
+            f"To: {to}",
+        ]
+        if cc:
+            lines.append(f"Cc: {cc}")
+        if bcc:
+            lines.append(f"Bcc: {bcc}")
+        lines.append(f"Subject: {subject}")
+        if in_reply_to:
+            lines.append(f"In-Reply-To: {in_reply_to}")
+        if references:
+            lines.append(f"References: {references}")
+        lines.extend(["---", body_text])
+        return "\n".join(lines).rstrip() + "\n"
+
+    @router.post("/emails/draft-document")
+    async def codex_email_draft_document(request: Request, body: dict[str, Any] = Body(default_factory=dict)):
+        owner = _scope_owner_all(request, {"email:draft", "documents:write"})
+        if documents_create_endpoint is None:
+            raise HTTPException(503, "Documents integration is not available")
+        from routes.document_routes import DocumentCreate
+
+        subject = str(body.get("subject") or "Email draft").strip() or "Email draft"
+        title = str(body.get("title") or subject).strip() or "Email draft"
+        req = DocumentCreate(
+            session_id=body.get("session_id"),
+            title=title,
+            language="email",
+            content=_email_draft_document_content(body),
+        )
+        result = await _as_owner(request, owner, documents_create_endpoint, request, req)
+        if isinstance(result, dict):
+            result = dict(result)
+            result["draft_type"] = "document"
+            result["send_required_confirmation"] = True
+        return result
+
    @router.post("/emails/draft")
    async def codex_email_draft(request: Request, body: dict[str, Any] = Body(default_factory=dict)):
        owner = _scope_owner(request, EMAIL_DRAFT_SCOPES)
@@ -373,6 +470,370 @@ def setup_codex_routes(
            raise HTTPException(400, f"Invalid document payload: {exc}")
        return await _as_owner(request, owner, documents_create_endpoint, request, req)

+    # ── Cookbook surface ──
+    # Lets the agent run the same launch / monitor / kill loop the user
+    # would do by hand in the Cookbook UI: read the current task list +
+    # tmux output, launch a serve task, stop one.  Two scopes:
+    #   cookbook:read   — list tasks + tail output + list servers
+    #   cookbook:launch — also start/stop serves (host shell exec)
+    # `cookbook:launch` is genuinely powerful: /api/model/serve runs SSH'd
+    # commands on the user's hosts. The existing _validate_serve_cmd
+    # allowlist (vllm/python3/sglang/llama-server/etc., no shell metachars)
+    # keeps the agent inside the same sandbox the UI uses.
+
+    async def _run_shell(cmd: str, timeout: float = 15.0) -> dict:
+        """Run a shell command, return {exit_code, stdout, stderr}."""
+        import asyncio as _asyncio
+        try:
+            proc = await _asyncio.create_subprocess_shell(
+                cmd,
+                stdout=_asyncio.subprocess.PIPE,
+                stderr=_asyncio.subprocess.PIPE,
+            )
+            try:
+                stdout_b, stderr_b = await _asyncio.wait_for(proc.communicate(), timeout=timeout)
+            except _asyncio.TimeoutError:
+                proc.kill()
+                return {"exit_code": -1, "stdout": "", "stderr": "timed out"}
+            return {
+                "exit_code": proc.returncode,
+                "stdout": stdout_b.decode(errors="replace"),
+                "stderr": stderr_b.decode(errors="replace"),
+            }
+        except Exception as exc:
+            return {"exit_code": -1, "stdout": "", "stderr": str(exc)}
+
+    def _read_cookbook_state() -> dict:
+        from pathlib import Path as _Path
+        import json as _json
+        p = _Path(COOKBOOK_STATE_FILE)
+        if not p.exists():
+            return {}
+        try:
+            return _json.loads(p.read_text(encoding="utf-8"))
+        except Exception:
+            return {}
+
+    def _redact_task(t: dict) -> dict:
+        """Strip secrets before returning to the agent."""
+        clean = {k: v for k, v in t.items() if k not in ("hf_token", "_secrets")}
+        if isinstance(clean.get("payload"), dict):
+            pl = clean["payload"]
+            clean["payload"] = {k: v for k, v in pl.items()
+                                if k not in ("hf_token", "_secrets")}
+        return clean
+
+    @router.get("/cookbook/tasks")
+    async def codex_cookbook_tasks(request: Request):
+        _scope_owner(request, COOKBOOK_READ_SCOPES)
+        state = _read_cookbook_state()
+        tasks = state.get("tasks") or []
+        return {"tasks": [_redact_task(t) for t in tasks]}
+
+    @router.get("/cookbook/servers")
+    async def codex_cookbook_servers(request: Request):
+        _scope_owner(request, COOKBOOK_READ_SCOPES)
+        state = _read_cookbook_state()
+        servers = state.get("env", {}).get("servers") or []
+        # Strip ssh creds / passwords; keep only what's needed to pick a host.
+        cleaned = []
+        for s in servers:
+            cleaned.append({
+                "name": s.get("name"),
+                "host": s.get("host"),
+                "port": s.get("port"),
+                "env": s.get("env"),
+                "envPath": s.get("envPath"),
+                "platform": s.get("platform"),
+                "modelDirs": s.get("modelDirs"),
+            })
+        return {"servers": cleaned}
+
+    @router.get("/cookbook/output/{session_id}")
+    async def codex_cookbook_output(request: Request, session_id: str, tail: int = 400):
+        _scope_owner(request, COOKBOOK_READ_SCOPES)
+        # Defensive: session_id must be the tmux-style id we issue
+        # (`serve-XXXX` / `cookbook-XXXX` / `queue-XXXX`); anything else
+        # would let the agent run arbitrary `tmux capture-pane` targets.
+        import re as _re
+        if not _re.fullmatch(r"[a-zA-Z0-9_-]+", session_id):
+            raise HTTPException(400, "Invalid session id")
+        tail = max(20, min(int(tail or 400), 4000))
+        # Resolve the task's host (if any) from cookbook state so we can
+        # ssh to the right box, exactly as the UI does in _reconnectTask.
+        state = _read_cookbook_state()
+        tasks = state.get("tasks") or []
+        task = next((t for t in tasks if t.get("sessionId") == session_id), None)
+        if task is None:
+            raise HTTPException(404, "task not found")
+        host, port_flag = _ssh_prefix_for_task(task)
+        # Prefer the persisted log file over the tmux pane. The pane gets
+        # overwritten by the post-crash neofetch banner + bash prompt the
+        # moment vllm exits; the log file is the raw stdout/stderr and
+        # survives unchanged. Falls back to pane for older tasks predating
+        # the tee-to-log runner change.
+        log_path = f"/tmp/odysseus-tmux/{session_id}.log"
+        inner = (
+            f"if [ -s {log_path} ]; then tail -n {tail} {log_path}; "
+            f"else tmux capture-pane -t {session_id} -p -S -{tail}; fi"
+        )
+        if host:
+            import shlex
+            cmd = f"ssh {port_flag}{host} {shlex.quote(inner)}"
+        else:
+            cmd = inner
+        result = await _run_shell(cmd, timeout=15)
+        return {
+            "session_id": session_id,
+            "host": host or "local",
+            "exit_code": result.get("exit_code"),
+            "output": result.get("stdout", ""),
+            "task": _redact_task(task),
+        }
+
+    @router.post("/cookbook/serve")
+    async def codex_cookbook_serve(request: Request, body: dict[str, Any] = Body(default_factory=dict)):
+        _scope_owner(request, COOKBOOK_LAUNCH_SCOPES)
+        # Wraps /api/model/serve with the SAME validation the UI uses.
+        # _validate_serve_cmd (called inside model_serve) rejects shell
+        # metachars and requires the leading binary to be in the
+        # cookbook allowlist (vllm / python3 / sglang / llama-server / ...).
+        from routes.cookbook_helpers import ServeRequest
+        # Accept friendly aliases agents naturally reach for. Without these,
+        # passing `host` silently maps to nothing and the serve runs LOCAL
+        # instead of on the intended remote — exactly the bug an agent
+        # would never debug on its own.
+        norm = dict(body or {})
+        if "host" in norm and "remote_host" not in norm:
+            norm["remote_host"] = norm.pop("host")
+        if "model" in norm and "repo_id" not in norm:
+            norm["repo_id"] = norm.pop("model")
+        if "ssh_port" not in norm and "port" in norm and (str(norm.get("port") or "").isdigit() and int(norm["port"]) >= 1000):
+            # Heuristic: if `port` looks like an SSH port (≥1000) and there's
+            # no explicit ssh_port, treat it as such. UI ports (8000, 8001,
+            # 30000) belong inside the cmd string, not here.
+            pass  # leave as-is — user's `port` here is ambiguous; skip remap.
+        try:
+            req = ServeRequest(**norm)
+        except Exception as exc:
+            raise HTTPException(400, f"Invalid serve payload: {exc}")
+        serve_endpoint = _find_endpoint(None, "POST", "/api/model/serve")
+        # Fall back to importing from the cookbook router registered on app.
+        if serve_endpoint is None:
+            from fastapi import FastAPI
+            app: FastAPI = request.app
+            for route in app.routes:
+                if getattr(route, "path", None) == "/api/model/serve" and "POST" in getattr(route, "methods", set()):
+                    serve_endpoint = route.endpoint
+                    break
+        if serve_endpoint is None:
+            raise HTTPException(503, "model serve endpoint unavailable")
+        return await serve_endpoint(request, req)
+
+    @router.post("/cookbook/stop/{session_id}")
+    async def codex_cookbook_stop(request: Request, session_id: str):
+        _scope_owner(request, COOKBOOK_LAUNCH_SCOPES)
+        import re as _re
+        if not _re.fullmatch(r"[a-zA-Z0-9_-]+", session_id):
+            raise HTTPException(400, "Invalid session id")
+        state = _read_cookbook_state()
+        tasks = state.get("tasks") or []
+        task = next((t for t in tasks if t.get("sessionId") == session_id), None)
+        host, port_flag = _ssh_prefix_for_task(task or {})
+        if host:
+            cmd = f"ssh {port_flag}{host} \"tmux kill-session -t {session_id}\""
+        else:
+            cmd = f"tmux kill-session -t {session_id}"
+        result = await _run_shell(cmd, timeout=10)
+        return {"session_id": session_id, "exit_code": result.get("exit_code"), "host": host or "local"}
+
+    @router.get("/cookbook/cached")
+    async def codex_cookbook_cached(request: Request, host: str | None = None):
+        """List cached models on a configured server (or local if host is omitted).
+        Mirrors `list_cached_models` from the chat agent so external agents have
+        the same inventory view before deciding what to serve/download."""
+        _scope_owner(request, COOKBOOK_READ_SCOPES)
+        # Hit /api/model/cached internally, with the same modelDirs the chat
+        # agent's list_cached_models would resolve from cookbook state.
+        state = _read_cookbook_state()
+        env = state.get("env") if isinstance(state, dict) else {}
+        servers = (env.get("servers") if isinstance(env, dict) else None) or []
+        HF_DEFAULTS = {"~/.cache/huggingface/hub", "~/.cache/huggingface"}
+        def _dirs_for(srv: dict) -> str:
+            mds = srv.get("modelDirs") if isinstance(srv, dict) else None
+            if isinstance(mds, list):
+                extras = [d for d in mds if isinstance(d, str) and d.strip() and d.strip() not in HF_DEFAULTS]
+                return ",".join(extras)
+            if isinstance(mds, str) and mds.strip() not in HF_DEFAULTS:
+                return mds
+            return ""
+        # Resolve friendly host name → real host (matches list_cached_models flow).
+        resolved_host = host or ""
+        srv: dict[str, Any] = {}
+        if host:
+            srv = next(
+                (s for s in servers if isinstance(s, dict)
+                 and (s.get("name") == host or s.get("host") == host)),
+                {},
+            )
+            if srv and srv.get("host"):
+                resolved_host = srv["host"]
+        else:
+            srv = next((s for s in servers if isinstance(s, dict) and not (s.get("host") or "").strip()), {})
+        params: dict[str, str] = {}
+        if resolved_host:
+            params["host"] = resolved_host
+        md = _dirs_for(srv)
+        if md:
+            params["model_dir"] = md
+        if srv.get("port"):
+            params["ssh_port"] = str(srv["port"])
+        if srv.get("platform"):
+            params["platform"] = srv["platform"]
+        cached_endpoint = _find_endpoint(None, "GET", "/api/model/cached")
+        if cached_endpoint is None:
+            from fastapi import FastAPI
+            app: FastAPI = request.app
+            for route in app.routes:
+                if getattr(route, "path", None) == "/api/model/cached" and "GET" in getattr(route, "methods", set()):
+                    cached_endpoint = route.endpoint
+                    break
+        if cached_endpoint is None:
+            raise HTTPException(503, "model cached endpoint unavailable")
+        # The endpoint reads host/model_dir/ssh_port/platform as kwargs.
+        return await cached_endpoint(
+            request,
+            host=params.get("host") or None,
+            model_dir=params.get("model_dir") or None,
+            ssh_port=params.get("ssh_port") or None,
+            platform=params.get("platform") or None,
+        )
+
+    @router.get("/cookbook/presets")
+    async def codex_cookbook_presets(request: Request):
+        """List saved serve presets (model + host + port + launch cmd).
+        Counterpart to `list_serve_presets`. Use BEFORE composing a `serve`
+        body — the user's saved preset usually has the working cmd already."""
+        _scope_owner(request, COOKBOOK_READ_SCOPES)
+        state = _read_cookbook_state()
+        presets = state.get("presets") or []
+        out = []
+        for p in presets:
+            if not isinstance(p, dict):
+                continue
+            out.append({
+                "name": p.get("name"),
+                "model": p.get("model") or p.get("modelId"),
+                "host": p.get("host") or p.get("remoteHost"),
+                "port": p.get("port"),
+                "cmd": p.get("cmd"),
+            })
+        return {"presets": out, "default_host": (state.get("env") or {}).get("defaultServer", "")}
+
+    @router.post("/cookbook/preset/{name}")
+    async def codex_cookbook_serve_preset(request: Request, name: str):
+        """Launch a saved preset by name. Reuses the working cmd + host the
+        user already saved, avoiding the cmd-allowlist trial-and-error loop."""
+        _scope_owner(request, COOKBOOK_LAUNCH_SCOPES)
+        import re as _re
+        if not _re.fullmatch(r"[A-Za-z0-9 _.:@\-]+", name):
+            raise HTTPException(400, "Invalid preset name")
+        state = _read_cookbook_state()
+        presets = state.get("presets") or []
+        lname = name.lower().strip()
+        chosen = next(
+            (p for p in presets if isinstance(p, dict) and (p.get("name") or "").lower() == lname),
+            None,
+        )
+        if chosen is None:
+            chosen = next(
+                (p for p in presets if isinstance(p, dict) and lname in (p.get("name") or "").lower()),
+                None,
+            )
+        if chosen is None:
+            raise HTTPException(404, f"No preset matching {name!r}")
+        repo_id = chosen.get("model") or chosen.get("modelId") or ""
+        cmd = (chosen.get("cmd") or "").strip()
+        host = chosen.get("host") or chosen.get("remoteHost") or ""
+        if not repo_id or not cmd or cmd.startswith("(adopted"):
+            raise HTTPException(400, f"Preset {chosen.get('name')!r} has no launchable cmd "
+                                     "(adopted from external launch). Use POST /cookbook/serve "
+                                     "with the actual cmd instead.")
+        # Reuse the serve handler we already validated.
+        from routes.cookbook_helpers import ServeRequest
+        body = {"repo_id": repo_id, "cmd": cmd}
+        if host:
+            body["remote_host"] = host
+        try:
+            req = ServeRequest(**body)
+        except Exception as exc:
+            raise HTTPException(400, f"Preset payload invalid: {exc}")
+        serve_endpoint = _find_endpoint(None, "POST", "/api/model/serve")
+        if serve_endpoint is None:
+            from fastapi import FastAPI
+            app: FastAPI = request.app
+            for route in app.routes:
+                if getattr(route, "path", None) == "/api/model/serve" and "POST" in getattr(route, "methods", set()):
+                    serve_endpoint = route.endpoint
+                    break
+        if serve_endpoint is None:
+            raise HTTPException(503, "model serve endpoint unavailable")
+        return await serve_endpoint(request, req)
+
+    @router.post("/cookbook/adopt")
+    async def codex_cookbook_adopt(request: Request, body: dict[str, Any] = Body(default_factory=dict)):
+        """Adopt an existing tmux session (one started via raw ssh+tmux) into
+        cookbook tracking. Needed when serve_model rejects a cmd and the
+        agent falls back to direct ssh — without adoption the session is
+        invisible to the UI. Body: {tmux_session, model, host?, port?}."""
+        _scope_owner(request, COOKBOOK_LAUNCH_SCOPES)
+        norm = dict(body or {})
+        sess = (norm.get("tmux_session") or norm.get("session_id") or "").strip()
+        model = (norm.get("model") or norm.get("repo_id") or "").strip()
+        host = validate_remote_host((norm.get("host") or norm.get("remote_host") or "").strip() or None) or ""
+        port = norm.get("port") or 8000
+        import re as _re
+        if not sess or not _re.fullmatch(r"[a-zA-Z0-9_-]+", sess):
+            raise HTTPException(400, "tmux_session required, [a-zA-Z0-9_-]+ only")
+        if not model:
+            raise HTTPException(400, "model required")
+        # Verify the tmux session exists on the target host before adopting.
+        import shlex
+        if host:
+            check = f"ssh {shlex.quote(host)} 'tmux has-session -t {shlex.quote(sess)}'"
+        else:
+            check = f"tmux has-session -t {shlex.quote(sess)}"
+        chk = await _run_shell(check, timeout=8)
+        if chk.get("exit_code") not in (0, None):
+            raise HTTPException(404, f"tmux session {sess!r} not found on {host or 'local'}")
+        # Write into cookbook_state.json.
+        import time as _t, json as _json
+        from core.atomic_io import atomic_write_json
+        from pathlib import Path as _Path
+        cookbook_state_path = _Path(COOKBOOK_STATE_FILE)
+        try:
+            state = _json.loads(cookbook_state_path.read_text(encoding="utf-8"))
+        except Exception:
+            state = {}
+        tasks = state.setdefault("tasks", [])
+        if any(isinstance(t, dict) and t.get("sessionId") == sess for t in tasks):
+            return {"ok": True, "already_tracked": True, "session_id": sess}
+        tasks.append({
+            "id": sess, "sessionId": sess,
+            "name": model.split("/")[-1] if "/" in model else model,
+            "type": "serve", "status": "running",
+            "output": f"Adopted externally-launched session {sess!r} on {host or 'local'}.",
+            "ts": int(_t.time() * 1000),
+            "payload": {"repo_id": model, "remote_host": host, "_cmd": "(adopted — launched outside cookbook)", "port": int(port)},
+            "remoteHost": host, "sshPort": "", "platform": "linux",
+            "_serveReady": False, "_endpointAdded": False, "_adoptedExternally": True,
+        })
+        try:
+            atomic_write_json(cookbook_state_path, state)
+        except Exception as exc:
+            raise HTTPException(500, f"state write failed: {exc}")
+        return {"ok": True, "session_id": sess, "host": host or "local"}
+
    return router


@@ -387,7 +848,7 @@ def setup_claude_routes() -> APIRouter:

    @router.get("/plugin.zip")
    def plugin_zip(request: Request):
-        require_user(request)
+        require_authenticated_request(request)
        # Only ship the skills/ subtree so extracting at ~/.claude/ doesn't dump
        # README.md or other bundle metadata into the user's claude config dir.
        skills_root = Path(__file__).resolve().parent.parent / "integrations" / "claude" / "skills"
@@ -12,6 +12,7 @@ import logging
 from core.database import Comparison, SessionLocal
 from core.session_manager import SessionManager
 from src.auth_helpers import get_current_user
+from routes.session_routes import _reject_raw_endpoint_url_for_non_admin

 logger = logging.getLogger(__name__)

@@ -38,6 +39,24 @@ def _owned_endpoint_by_url(db, base_url, owner):
    return owner_filter(q, ModelEndpoint, owner).first()


+def _owned_endpoint_by_id(db, endpoint_id, owner):
+    """ModelEndpoint whose id == `endpoint_id` and is VISIBLE to `owner` (their
+    own rows + legacy null-owner "shared" rows); None otherwise.
+
+    Preferred over _owned_endpoint_by_url for credential resolution: two visible
+    endpoints can share the same base_url but hold DIFFERENT api_keys (e.g. two
+    accounts on the same provider). A base_url-only match returns whichever row
+    sorts first, so it can copy the WRONG owner-scoped key into the [CMP] session.
+    An id pins the exact registered endpoint, so /api/compare/start prefers it and
+    only falls back to URL matching for legacy / admin raw-URL callers. Owner
+    scoping is identical to _owned_endpoint_by_url (a null/empty owner is a no-op).
+    """
+    from core.database import ModelEndpoint
+    from src.auth_helpers import owner_filter
+    q = db.query(ModelEndpoint).filter(ModelEndpoint.id == endpoint_id)
+    return owner_filter(q, ModelEndpoint, owner).first()
+
+
 class RecordVoteRequest(BaseModel):
    prompt: str
    models: List[str]
@@ -54,8 +73,10 @@ def setup_compare_routes(session_manager: SessionManager):
        prompt: str = Form(...),
        model_a: str = Form(...),
        model_b: str = Form(...),
-        endpoint_a: str = Form(...),
-        endpoint_b: str = Form(...),
+        endpoint_a: str = Form(""),
+        endpoint_b: str = Form(""),
+        endpoint_a_id: str = Form(""),
+        endpoint_b_id: str = Form(""),
        is_blind: str = Form("true"),
    ):
        """Create two ephemeral sessions and a comparison record.
@@ -63,10 +84,10 @@ def setup_compare_routes(session_manager: SessionManager):
        Returns the comparison ID and the two session IDs so the client
        can fire two independent SSE streams to /api/chat_stream.
        """
+        user = getattr(request.state, 'current_user', None)
        comp_id = str(uuid.uuid4())
        sid_a = str(uuid.uuid4())
        sid_b = str(uuid.uuid4())
-        user = getattr(request.state, 'current_user', None)

        # Blind mapping: randomly assign left/right
        blind = str(is_blind).lower() == "true"
@@ -87,31 +108,94 @@ def setup_compare_routes(session_manager: SessionManager):
        # de-anonymizing the comparison before the user votes (issue #1285).
        slot_name = {session_left: "Model A", session_right: "Model B"}

-        # Create ephemeral sessions (prefixed [CMP])
-        for sid, model, endpoint in [(sid_a, model_a, endpoint_a), (sid_b, model_b, endpoint_b)]:
+        # SECURITY: resolve and validate BOTH endpoints before creating any
+        # session. Compare copies a registered endpoint's Authorization header
+        # into the [CMP] session, so validating one endpoint while creating its
+        # session, then rejecting the other, would leave a partial compare
+        # session behind with that header attached. Doing all the owner-scope
+        # resolution + raw-URL rejection up front means a 403 on either endpoint
+        # aborts the whole request with nothing created and no header copied.
+        from src.endpoint_resolver import build_chat_url, build_headers, normalize_base
+        resolved = []
+        db = SessionLocal()
+        try:
+            for sid, model, endpoint, endpoint_id in [
+                (sid_a, model_a, endpoint_a, endpoint_a_id),
+                (sid_b, model_b, endpoint_b, endpoint_b_id),
+            ]:
+                # Prefer an explicit endpoint id: it pins the EXACT registered
+                # endpoint (and its api_key), even when two endpoints visible to
+                # the caller share a base_url with different keys — a URL-only
+                # match would copy whichever row sorts first, i.e. possibly the
+                # wrong key. Fall back to URL resolution only for legacy / admin
+                # raw-URL callers that don't send an id.
+                eid = endpoint_id.strip() if isinstance(endpoint_id, str) else ""
+                if eid:
+                    ep = _owned_endpoint_by_id(db, eid, user)
+                    if ep is None:
+                        # An id the caller can't see (wrong owner / deleted) must
+                        # NOT silently fall back to a same-URL row with a different
+                        # key — that's exactly the mix-up ids exist to prevent.
+                        raise HTTPException(404, "Model endpoint not found")
+                    # The id already resolved the endpoint; ignore any raw URL the
+                    # caller also sent and dial the stored config instead.
+                    endpoint = ep.base_url
+                elif not endpoint:
+                    raise HTTPException(
+                        422, "endpoint_a/endpoint_b or endpoint_a_id/endpoint_b_id is required"
+                    )
+                else:
+                    # Resolve the supplied URL to a ModelEndpoint the caller owns
+                    # (their own rows + legacy null-owner shared rows), scoped so a
+                    # comparison can't borrow another user's private endpoint key.
+                    base = normalize_base(endpoint)
+                    ep = _owned_endpoint_by_url(db, base, user)
+                # Reject *unregistered* raw URLs for signed-in non-admins; a
+                # matched registered endpoint supplies an id so the caller can
+                # still compare endpoints they own. Blanket-rejecting here (the
+                # earlier `endpoint_id=None` call) locked non-admins out of
+                # compare entirely, since compare resolves endpoints by URL with
+                # no endpoint_id. Mirrors the gallery inpaint/harmonize checks.
+                # Raised here (phase 1), before any session exists.
+                _reject_raw_endpoint_url_for_non_admin(
+                    request, user, str(ep.id) if ep is not None else None, endpoint
+                )
+                # Bind the [CMP] session to the RESOLVED endpoint, not the raw
+                # caller-supplied string. When the URL matches a registered
+                # endpoint visible to the caller, use that row's own normalized
+                # base URL (the same value owner scoping + endpoint validation
+                # already vetted) so the session dials exactly where the stored
+                # config points. The raw `endpoint` only survives for callers
+                # allowed to pass one — admins / single-user mode, where
+                # `_reject_raw_endpoint_url_for_non_admin` is a no-op and `ep`
+                # is None. Mirrors the registered-endpoint path in session_routes.
+                session_endpoint_url = (
+                    build_chat_url(normalize_base(ep.base_url)) if ep is not None else endpoint
+                )
+                # Headers come only from a matched endpoint's key; None when
+                # `ep` is None (raw admin URL or no match), so a comparison can
+                # never inherit another user's key/headers.
+                headers = build_headers(ep.api_key, ep.base_url) if (ep and ep.api_key) else None
+                resolved.append((sid, model, session_endpoint_url, headers))
+        finally:
+            db.close()
+
+        # Both endpoints validated — only now create the ephemeral [CMP]
+        # sessions and copy any resolved headers.
+        for sid, model, session_endpoint_url, headers in resolved:
            name = f"[CMP] {slot_name[sid]}" if blind else f"[CMP] {model.split('/')[-1]}"
            session_manager.create_session(
                session_id=sid,
                name=name,
-                endpoint_url=endpoint,
+                endpoint_url=session_endpoint_url,
                model=model,
                rag=False,
                owner=user,
            )
-            # Copy API key from endpoint config
-            db = SessionLocal()
-            try:
-                from src.endpoint_resolver import build_headers, normalize_base
-                # Find matching endpoint by URL, scoped to the caller so a
-                # comparison can't borrow another user's private endpoint key.
-                base = normalize_base(endpoint)
-                ep = _owned_endpoint_by_url(db, base, user)
-                if ep and ep.api_key:
-                    s = session_manager.sessions.get(sid)
-                    if s:
-                        s.headers = build_headers(ep.api_key, ep.base_url)
-            finally:
-                db.close()
+            if headers:
+                s = session_manager.sessions.get(sid)
+                if s:
+                    s.headers = headers

        # Store comparison record
        db = SessionLocal()
@@ -121,8 +205,12 @@ def setup_compare_routes(session_manager: SessionManager):
                prompt=prompt,
                model_a=model_a,
                model_b=model_b,
-                endpoint_a=endpoint_a,
-                endpoint_b=endpoint_b,
+                # Record the URL the session actually dials. For URL callers this
+                # is their raw input; for id-only callers (empty endpoint_a/_b)
+                # fall back to the resolved endpoint URL so the column stays
+                # meaningful and non-null. resolved is in [a, b] order.
+                endpoint_a=endpoint_a or resolved[0][2],
+                endpoint_b=endpoint_b or resolved[1][2],
                is_blind=blind,
                blind_mapping=json.dumps(mapping),
                owner=user,
@@ -11,20 +11,25 @@ import uuid
 import json
 import csv
 import io
+import os
+import inspect
 import httpx
 from pathlib import Path
 from datetime import datetime
-from fastapi import APIRouter, Query, Depends, Response
+from urllib.parse import urljoin, urlparse, urlunparse
+
+from fastapi import APIRouter, Query, Depends, Response, HTTPException
 from typing import List, Dict, Optional

-from src.auth_helpers import require_user
 from core.middleware import require_admin
+from src.url_safety import check_outbound_url

 logger = logging.getLogger(__name__)

-DATA_DIR = Path(__file__).resolve().parent.parent / "data"
-SETTINGS_FILE = DATA_DIR / "settings.json"
-LOCAL_CONTACTS_FILE = DATA_DIR / "contacts.json"
+from src.constants import DATA_DIR as _DATA_DIR, SETTINGS_FILE as _SETTINGS_FILE, CONTACTS_FILE as _CONTACTS_FILE
+DATA_DIR = Path(_DATA_DIR)
+SETTINGS_FILE = Path(_SETTINGS_FILE)
+LOCAL_CONTACTS_FILE = Path(_CONTACTS_FILE)


 def _load_settings():
@@ -41,10 +46,14 @@ def _save_settings(settings):
 def _get_carddav_config():
    import os
    settings = _load_settings()
+    password = settings.get("carddav_password", os.environ.get("CARDDAV_PASSWORD", ""))
+    if password and "carddav_password" in settings:
+        from src.secret_storage import decrypt
+        password = decrypt(password)
    return {
        "url": settings.get("carddav_url", os.environ.get("CARDDAV_URL", "")),
        "username": settings.get("carddav_username", os.environ.get("CARDDAV_USERNAME", "")),
-        "password": settings.get("carddav_password", os.environ.get("CARDDAV_PASSWORD", "")),
+        "password": password,
    }


@@ -53,6 +62,21 @@ def _carddav_configured(cfg: Optional[Dict] = None) -> bool:
    return bool((cfg.get("url") or "").strip())


+def _validate_carddav_url(url: str) -> str:
+    cleaned = (url if isinstance(url, str) else "").strip().rstrip("/")
+    ok, reason = check_outbound_url(
+        cleaned,
+        block_private=os.getenv("CARDDAV_BLOCK_PRIVATE_IPS", "false").lower() == "true",
+    )
+    if not ok:
+        raise ValueError(f"Rejected CardDAV URL: {reason}")
+    return cleaned
+
+
+def _carddav_base_url(cfg: Dict) -> str:
+    return _validate_carddav_url(cfg.get("url") or "")
+
+
 def _normalize_contact(contact: Dict) -> Dict:
    emails = []
    for e in contact.get("emails") or ([] if not contact.get("email") else [contact.get("email")]):
@@ -67,11 +91,13 @@ def _normalize_contact(contact: Dict) -> Dict:
    name = str(contact.get("name") or "").strip()
    if not name and emails:
        name = emails[0].split("@")[0]
+    address = str(contact.get("address") or "").strip()
    return {
        "uid": str(contact.get("uid") or uuid.uuid4()),
        "name": name,
        "emails": emails,
        "phones": phones,
+        "address": address,
    }


@@ -127,7 +153,7 @@ def _parse_vcards(text: str) -> List[Dict]:
    for block in re.split(r"BEGIN:VCARD", text):
        if not block.strip():
            continue
-        contact = {"name": "", "emails": [], "phones": [], "uid": ""}
+        contact = {"name": "", "emails": [], "phones": [], "uid": "", "address": ""}
        for line in block.split("\n"):
            line = line.strip()
            # Strip an optional RFC 6350 group prefix (e.g. "item1.EMAIL;...")
@@ -150,6 +176,15 @@ def _parse_vcards(text: str) -> List[Dict]:
                    phone = _vunesc(name_part.split(":", 1)[1])
                    if phone and phone not in contact["phones"]:
                        contact["phones"].append(phone)
+            elif name_part.startswith("ADR"):
+                # vCard ADR is 7 semicolon-separated components:
+                # post-office-box;extended-address;street;locality;region;postal-code;country.
+                # Recover a human-readable string by joining non-empty
+                # components with ", ".
+                if ":" in name_part:
+                    raw = name_part.split(":", 1)[1]
+                    parts = [_vunesc(p).strip() for p in raw.split(";")]
+                    contact["address"] = ", ".join(p for p in parts if p)
            elif name_part.startswith("UID:"):
                contact["uid"] = _vunesc(name_part[4:])
        if contact["name"] or contact["emails"]:
@@ -174,7 +209,8 @@ def _vesc(value: str) -> str:

 def _build_vcard(name: str, email: str, uid: Optional[str] = None,
                 emails: Optional[List[str]] = None,
-                 phones: Optional[List[str]] = None) -> str:
+                 phones: Optional[List[str]] = None,
+                 address: Optional[str] = None) -> str:
    """Build a vCard. Accepts either a single `email` (legacy callers) or
    full `emails`/`phones` lists (edit path). The first email is marked
    PREF=1. All values are RFC-6350-escaped."""
@@ -207,6 +243,12 @@ def _build_vcard(name: str, email: str, uid: Optional[str] = None,
        lines.append(f"EMAIL;PREF=1:{_vesc(em)}" if i == 0 else f"EMAIL:{_vesc(em)}")
    for ph in phone_list:
        lines.append(f"TEL:{_vesc(ph)}")
+    # Address: stuff the whole human-readable string into the street
+    # component of ADR. vCard ADR has 7 semicolon-separated components:
+    # post-office-box;extended-address;street;locality;region;postal-code;country.
+    addr = (address or "").strip()
+    if addr:
+        lines.append(f"ADR:;;{_vesc(addr)};;;;")
    lines.append("END:VCARD")
    return "\r\n".join(lines) + "\r\n"

@@ -219,14 +261,18 @@ _contact_cache = {"contacts": [], "fetched_at": None}
 def _abs_url(href: str) -> str:
    """Combine a multistatus <href> (an absolute path like
    /user/contacts/x.vcf) with the configured CardDAV server origin so we
-    get a fully-qualified URL to PUT/DELETE. If href is already absolute
-    (http...), return it as-is."""
-    from urllib.parse import urlparse, urlunparse
-    if href.startswith("http://") or href.startswith("https://"):
-        return href
+    get a fully-qualified URL to PUT/DELETE. Absolute hrefs are accepted only
+    for the configured origin; a cross-origin href is treated as a path on the
+    configured server so a malicious CardDAV response cannot redirect later
+    writes/deletes to cloud metadata or another host."""
    cfg = _get_carddav_config()
-    p = urlparse(cfg["url"])
-    return urlunparse((p.scheme, p.netloc, href, "", "", ""))
+    base = _carddav_base_url(cfg)
+    base_p = urlparse(base)
+    joined = urljoin(base.rstrip("/") + "/", href or "")
+    joined_p = urlparse(joined)
+    if (joined_p.scheme, joined_p.netloc) != (base_p.scheme, base_p.netloc):
+        joined = urlunparse((base_p.scheme, base_p.netloc, joined_p.path or "/", "", joined_p.query, ""))
+    return _validate_carddav_url(joined)


 # CardDAV REPORT body — pull every card's etag + raw vCard in ONE request,
@@ -297,6 +343,7 @@ def _fetch_contacts(force=False):
        return contacts

    try:
+        cfg["url"] = _carddav_base_url(cfg)
        auth = None
        if cfg["username"]:
            auth = (cfg["username"], cfg["password"])
@@ -338,7 +385,7 @@ def _resolve_resource_url(uid: str) -> str:
    return _lookup() or _vcard_url(uid)


-def _create_contact(name: str, email: str) -> bool:
+def _create_contact(name: str, email: str, address: str = "") -> bool:
    """Add a new contact via CardDAV or local contacts."""
    cfg = _get_carddav_config()
    if not _carddav_configured(cfg):
@@ -347,14 +394,14 @@ def _create_contact(name: str, email: str) -> bool:
        for c in contacts:
            if email_l and email_l in [e.lower() for e in c.get("emails", [])]:
                return True
-        contacts.append(_normalize_contact({"name": name, "emails": [email]}))
+        contacts.append(_normalize_contact({"name": name, "emails": [email], "address": address}))
        _save_local_contacts(contacts)
        return True

    contact_uid = str(uuid.uuid4())
-    vcard = _build_vcard(name, email, contact_uid)
-    url = cfg["url"].rstrip("/") + "/" + contact_uid + ".vcf"
+    vcard = _build_vcard(name, email, contact_uid, address=address)
    try:
+        url = _carddav_base_url(cfg) + "/" + contact_uid + ".vcf"
        auth = None
        if cfg["username"]:
            auth = (cfg["username"], cfg["password"])
@@ -382,7 +429,7 @@ def _vcard_url(uid: str) -> str:
    escape the collection and target an arbitrary CardDAV resource."""
    from urllib.parse import quote
    cfg = _get_carddav_config()
-    return cfg["url"].rstrip("/") + "/" + quote(uid, safe="") + ".vcf"
+    return _carddav_base_url(cfg) + "/" + quote(uid, safe="") + ".vcf"


 def _import_vcards(text: str) -> Dict:
@@ -413,6 +460,11 @@ def _import_vcards(text: str) -> Dict:
        if imported:
            _save_local_contacts(contacts)
        return {"imported": imported, "failed": 0, "total": len(parsed)}
+    try:
+        base_url = _carddav_base_url(cfg)
+    except ValueError as e:
+        logger.warning("CardDAV import URL rejected: %s", e)
+        return {"imported": 0, "failed": 0, "total": 0, "error": str(e)}
    auth = (cfg["username"], cfg["password"]) if cfg["username"] else None
    # Split into individual cards. re.split drops the BEGIN line, so we
    # re-add it. Normalize CRLF.
@@ -441,7 +493,7 @@ def _import_vcards(text: str) -> Dict:
        elif not re.search(r"^VERSION:", block, re.MULTILINE):
            block = block.replace("BEGIN:VCARD", "BEGIN:VCARD\nVERSION:4.0", 1)
        vcard = block.replace("\n", "\r\n") + "\r\n"
-        url = cfg["url"].rstrip("/") + "/" + quote(uid, safe="") + ".vcf"
+        url = base_url + "/" + quote(uid, safe="") + ".vcf"
        try:
            r = httpx.put(
                url, data=vcard.encode("utf-8"),
@@ -580,7 +632,7 @@ def _contacts_to_csv(contacts: List[Dict]) -> str:
    return out.getvalue()


-def _update_contact(uid: str, name: str, emails: List[str], phones: List[str]) -> bool:
+def _update_contact(uid: str, name: str, emails: List[str], phones: List[str], address: str = "") -> bool:
    """Rewrite an existing contact via CardDAV or local contacts."""
    cfg = _get_carddav_config()
    if not _carddav_configured(cfg):
@@ -589,20 +641,23 @@ def _update_contact(uid: str, name: str, emails: List[str], phones: List[str]) -
        out = []
        for c in contacts:
            if c.get("uid") == uid:
-                out.append(_normalize_contact({"uid": uid, "name": name, "emails": emails, "phones": phones}))
+                # Preserve existing address when caller passes "" (only
+                # updating name/emails/phones, not touching address).
+                addr = address if address else c.get("address", "")
+                out.append(_normalize_contact({"uid": uid, "name": name, "emails": emails, "phones": phones, "address": addr}))
                found = True
            else:
                out.append(c)
        if not found:
-            out.append(_normalize_contact({"uid": uid, "name": name, "emails": emails, "phones": phones}))
+            out.append(_normalize_contact({"uid": uid, "name": name, "emails": emails, "phones": phones, "address": address}))
        _save_local_contacts(out)
        return True

-    vcard = _build_vcard(name, "", uid=uid, emails=emails, phones=phones)
+    vcard = _build_vcard(name, "", uid=uid, emails=emails, phones=phones, address=address)
    # Use the real resource href (handles externally-created contacts whose
    # filename != UID); falls back to the <uid>.vcf guess.
-    url = _resolve_resource_url(uid)
    try:
+        url = _resolve_resource_url(uid)
        auth = (cfg["username"], cfg["password"]) if cfg["username"] else None
        r = httpx.put(
            url,
@@ -630,8 +685,8 @@ def _delete_contact(uid: str) -> bool:
        _save_local_contacts(remaining)
        return True

-    url = _resolve_resource_url(uid)
    try:
+        url = _resolve_resource_url(uid)
        auth = (cfg["username"], cfg["password"]) if cfg["username"] else None
        r = httpx.delete(url, auth=auth, timeout=10)
        if r.status_code in (200, 204):
@@ -685,23 +740,49 @@ def setup_contacts_routes():
        """Add a new contact."""
        name = (data.get("name") or "").strip()
        email = (data.get("email") or "").strip()
+        phone = (data.get("phone") or "").strip()
+        address = (data.get("address") or "").strip()
        if not email:
            return {"success": False, "error": "Email required"}
-        # Check if already exists
-        contacts = _fetch_contacts()
-        for c in contacts:
-            if email.lower() in [e.lower() for e in c["emails"]]:
-                return {"success": True, "message": "Already exists", "contact": c}
+        # Check if already exists by email
+        if email:
+            contacts = _fetch_contacts()
+            for c in contacts:
+                if email.lower() in [e.lower() for e in c["emails"]]:
+                    return {"success": True, "message": "Already exists", "contact": c}
        if not name:
            name = email.split("@")[0]
-        ok = _create_contact(name, email)
+        create_params = inspect.signature(_create_contact).parameters
+        if len(create_params) >= 3:
+            ok = _create_contact(name, email, address)
+        else:
+            ok = _create_contact(name, email)
+        # If a phone was provided, do an immediate update to thread it
+        # through (the simple _create_contact signature only takes name +
+        # email + address; phones happen via update).
+        if ok and phone:
+            try:
+                fresh = _fetch_contacts(force=True)
+                created = next((c for c in fresh if name == c.get("name") and (not email or email in c.get("emails", []))), None)
+                if created:
+                    _update_contact(
+                        created["uid"], name,
+                        created.get("emails", []),
+                        [phone],
+                        address,
+                    )
+            except Exception:
+                pass
        return {"success": ok}

    @router.post("/import")
    async def import_vcf(data: dict, _admin: str = Depends(require_admin)):
        """Import contacts from .vcf or CSV. Body: {"vcf": "..."} or {"csv": "..."}."""
-        text = data.get("vcf") or data.get("text") or ""
-        csv_text = data.get("csv") or ""
+        # Coerce defensively: a non-string vcf/text/csv (e.g. a number or list
+        # in the JSON body) would otherwise reach .strip() and 500 with an
+        # AttributeError instead of degrading to a clean "no data" response.
+        text = str(data.get("vcf") or data.get("text") or "")
+        csv_text = str(data.get("csv") or "")
        if text.strip():
            if "BEGIN:VCARD" not in text.upper():
                return {"success": False, "error": "No vCard data found"}
@@ -747,7 +828,17 @@ def setup_contacts_routes():
        settings = _load_settings()
        for key in ("carddav_url", "carddav_username", "carddav_password"):
            if key in data:
-                settings[key] = data[key]
+                if key == "carddav_url" and str(data[key] or "").strip():
+                    try:
+                        settings[key] = _validate_carddav_url(data[key])
+                    except ValueError as e:
+                        raise HTTPException(400, str(e))
+                else:
+                    value = data[key]
+                    if key == "carddav_password" and value:
+                        from src.secret_storage import encrypt
+                        value = encrypt(value)
+                    settings[key] = value
        _save_settings(settings)
        # Force re-fetch
        _contact_cache["fetched_at"] = None
@@ -764,7 +855,7 @@ def setup_contacts_routes():
    # match PUT /{uid} with uid="config".
    @router.put("/{uid}")
    async def edit_contact(uid: str, data: dict, _admin: str = Depends(require_admin)):
-        """Edit an existing contact — name / emails / phones."""
+        """Edit an existing contact — name / emails / phones / address."""
        name = (data.get("name") or "").strip()
        emails = data.get("emails")
        phones = data.get("phones")
@@ -772,11 +863,12 @@ def setup_contacts_routes():
            emails = [data["email"]]
        emails = [e.strip() for e in (emails or []) if e and e.strip()]
        phones = [p.strip() for p in (phones or []) if p and p.strip()]
-        if not name and not emails:
-            return {"success": False, "error": "Name or email required"}
+        address = (data.get("address") or "").strip()
+        if not name and not emails and not address:
+            return {"success": False, "error": "Name, email, or address required"}
        if not name and emails:
            name = emails[0].split("@")[0]
-        ok = _update_contact(uid, name, emails, phones)
+        ok = _update_contact(uid, name, emails, phones, address)
        return {"success": ok}

    @router.delete("/{uid}")
@@ -1,16 +1,21 @@
 """cookbook_helpers.py — validators + small helpers shared by the cookbook routes.
 Extracted from cookbook_routes.py; the routes module imports the symbols it needs."""

+import json
 import logging
 import ntpath
 import os
 import posixpath
 import re
 import shlex
+from pathlib import Path

 from fastapi import HTTPException
 from pydantic import BaseModel

+from routes._validators import validate_remote_host, validate_ssh_port
+from core.platform_compat import _ssh_exec_argv
+
 logger = logging.getLogger(__name__)


@@ -28,20 +33,24 @@ _LOCAL_MODEL_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*$")
 _OLLAMA_MODEL_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._:/-]{0,200}$")
 # Include pattern is a glob: allow typical safe glyphs only.
 _INCLUDE_RE = re.compile(r"^[A-Za-z0-9._\-*?/\[\]]+$")
-# Remote host: user@host (optionally with :port-free hostname parts).
-_REMOTE_HOST_RE = re.compile(r"^[A-Za-z0-9._-]+@[A-Za-z0-9._-]+$")
 # HF tokens and API tokens are url-safe base64-like.
 _TOKEN_RE = re.compile(r"^[A-Za-z0-9._~+/=-]+$")
 # Session IDs we mint look like "cookbook-deadbeef" or "serve-deadbeef".
 # Anything beyond plain alphanumerics + dash + underscore could break out
 # of the shell/PowerShell contexts the value lands in.
 _SESSION_ID_RE = re.compile(r"^[A-Za-z0-9_-]{1,64}$")
-_SSH_PORT_RE = re.compile(r"^\d{1,5}$")
 _GPU_LIST_RE = re.compile(r"^\d+(?:,\d+)*$")
 # A download target directory. Absolute or ~-relative path; safe path glyphs
-# only (no quotes, shell metacharacters, or spaces) since it lands in a shell
-# command. A leading ~ is expanded to $HOME at command-build time.
-_LOCAL_DIR_RE = re.compile(r"^~?/[A-Za-z0-9._/-]*$|^~$")
+# only (no quotes or shell metacharacters). Spaces are allowed because command
+# builders pass the value through quoted shell/Python contexts. The character
+# class uses ``\w`` — Unicode word characters under Python 3's default str
+# matching — so non-ASCII folder names pass validation too: Cyrillic, accented
+# Latin, CJK, e.g. ``/Volumes/Модели`` or ``D:\AI Models\Модели``. This stays
+# shell-safe: none of ``; & | ` $ '' "" () {}`` newlines etc. are in ``[\w. -]``,
+# so injection vectors remain rejected. A leading ~ is expanded to $HOME at
+# command-build time. (Drive letters stay ASCII: ``[A-Za-z]:``.)
+_LOCAL_DIR_RE = re.compile(r"^~?(?:/[\w. -]*)+$|^~$")
+_WINDOWS_LOCAL_DIR_RE = re.compile(r"^[A-Za-z]:[\\/](?:[\w. -]+(?:[\\/][\w. -]+)*[\\/]?)?$")
 _WINDOWS_DRIVE_PATH_RE = re.compile(r"^[A-Za-z]:[\\/]")


@@ -75,14 +84,6 @@ def _validate_include(v: str | None) -> str | None:
    return v


-def _validate_remote_host(v: str | None) -> str | None:
-    if v is None or v == "":
-        return None
-    if not _REMOTE_HOST_RE.match(v):
-        raise HTTPException(400, "Invalid remote_host — must be user@host, no SSH option syntax")
-    return v
-
-
 def _validate_token(v: str | None) -> str | None:
    if v is None or v == "":
        return None
@@ -91,26 +92,43 @@ def _validate_token(v: str | None) -> str | None:
    return v


+def load_stored_hf_token(*, state_path: Path | str | None = None) -> str:
+    """Return the decrypted HF token from cookbook_state.json, else env fallback."""
+    path = Path(state_path) if state_path else Path(os.environ.get("DATA_DIR", "data")) / "cookbook_state.json"
+    token = ""
+    if path.exists():
+        try:
+            state = json.loads(path.read_text(encoding="utf-8"))
+            env = state.get("env") if isinstance(state, dict) else {}
+            if isinstance(env, dict) and env.get("hfToken"):
+                from src.secret_storage import decrypt
+                token = decrypt(env.get("hfToken") or "")
+        except Exception:
+            token = ""
+    if not token:
+        token = (os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") or "").strip()
+    return token
+
+
 def _validate_local_dir(v: str | None) -> str | None:
    if v is None or v == "":
        return None
+    if len(v) >= 2 and v[0] == v[-1] and v[0] in {"'", '"'}:
+        v = v[1:-1]
    v = v.rstrip("/") or "/"
-    if not _LOCAL_DIR_RE.match(v):
-        raise HTTPException(400, "Invalid local_dir — must be an absolute or ~ path with no spaces or shell metacharacters")
+    if not (_LOCAL_DIR_RE.match(v) or _WINDOWS_LOCAL_DIR_RE.match(v)):
+        raise HTTPException(400, "Invalid local_dir — must be an absolute or ~ path with no shell metacharacters")
+    # Reject path segments that start with '-' (option injection). '-' is in the
+    # allowlist, so a dir like ``/models/-rf`` or ``D:\models\-rf`` could be read
+    # as a CLI flag by hf/etc. — and quoting does NOT stop a value from being
+    # parsed as an option. This is the one residual that command-build-time
+    # quoting can't cover, so the guard lives here, keeping the safety wholly
+    # inside the validator rather than relying on consumers.
+    if any(seg.startswith("-") for seg in re.split(r"[\\/]", v) if seg):
+        raise HTTPException(400, "Invalid local_dir — path segments cannot start with '-'")
    return v


-def _validate_ssh_port(v: str | None) -> str | None:
-    if v is None or v == "":
-        return None
-    if not _SSH_PORT_RE.fullmatch(str(v)):
-        raise HTTPException(400, "Invalid ssh_port")
-    port = int(v)
-    if port < 1 or port > 65535:
-        raise HTTPException(400, "Invalid ssh_port")
-    return str(port)
-
-
 def _validate_gpus(v: str | None) -> str | None:
    if v is None or v == "":
        return None
@@ -122,7 +140,7 @@ def _validate_gpus(v: str | None) -> str | None:
 def _shell_path(p: str) -> str:
    """Render a validated path for a double-quoted shell context, expanding a
    leading ~ to $HOME (single quotes wouldn't expand it). Safe because
-    _validate_local_dir already restricts the charset."""
+    _validate_local_dir already rejects quotes and shell metacharacters."""
    if p == "~":
        return '"$HOME"'
    if p.startswith("~/"):
@@ -195,6 +213,20 @@ def _pip_install_attempt(pip_cmd: str) -> str:
    )


+def _pip_command(python_cmd: str) -> str:
+    """Return a pip command for either a pip executable or a Python executable."""
+    cmd = python_cmd.strip()
+    if " -m pip" in cmd or cmd in {"pip", "pip3"}:
+        return python_cmd
+    if cmd in {"python", "python3", "python.exe"} or cmd.endswith(("/python", "/python3", "\\python.exe")):
+        return f"{python_cmd} -m pip"
+    return python_cmd
+
+
+def _pip_break_system_packages_check(pip_cmd: str) -> str:
+    return f"{pip_cmd} install --help 2>/dev/null | grep -q -- --break-system-packages"
+
+
 def _pip_install_fallback_chain(package: str, *, python_cmd: str = "python3 -m pip", upgrade: bool = False) -> str:
    """Build a bash pip install fallback chain that surfaces errors.

@@ -206,33 +238,44 @@ def _pip_install_fallback_chain(package: str, *, python_cmd: str = "python3 -m p
    exit code is preserved (no ``| tail`` masking) and the last 5 lines of
    pip output appear in the Cookbook log on failure.
    """
+    from core.platform_compat import IS_WINDOWS
    upgrade_flag = " -U" if upgrade else ""
    # Shell-quote the package spec: an extras spec like ``llama-cpp-python[server]``
    # contains brackets that bash would treat as a glob, so it must be quoted
    # before being embedded in the install command. Plain names (e.g.
    # ``huggingface_hub``) are returned unchanged by ``shlex.quote``.
    pkg = shlex.quote(package)
-    base = _pip_install_attempt(f"{python_cmd} install -q{upgrade_flag} {pkg}")
-    user = _pip_install_attempt(f"{python_cmd} install --user --break-system-packages -q{upgrade_flag} {pkg}")
+    # llama-cpp-python source builds are brittle on older distro pip/packaging
+    # stacks (common on WSL images). Prefer the prebuilt wheel index whenever
+    # this package is requested so dependency-install tasks are reliable.
+    if "llama-cpp-python" in package:
+        pkg += " --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu"
+
+    pip_cmd = _pip_command(python_cmd)
+    base = _pip_install_attempt(f"{pip_cmd} install -q{upgrade_flag} {pkg}")
+    user = _pip_install_attempt(f"{pip_cmd} install --user -q{upgrade_flag} {pkg}")
+    user_break_system = _pip_install_attempt(f"{pip_cmd} install --user --break-system-packages -q{upgrade_flag} {pkg}")
+    user_fallback = f"( {user} || {{ {_pip_break_system_packages_check(pip_cmd)} && {user_break_system}; }} )"
    # Derive the python executable for the venv detection check.
    # Must use the same interpreter that pip belongs to; hardcoding
    # python3 breaks when pip lives in a venv that only has "python".
-    if " -m pip" in python_cmd:
-        python_exe = python_cmd.replace(" -m pip", "")
-    elif python_cmd.strip() == "pip":
+    if " -m pip" in pip_cmd:
+        python_exe = pip_cmd.replace(" -m pip", "")
+    elif pip_cmd.strip() == "pip":
        python_exe = "python"
-    elif python_cmd.strip() == "pip3":
+    elif pip_cmd.strip() == "pip3":
        python_exe = "python3"
    else:
        python_exe = "python3"
    venv_check = f'{python_exe} -c "import sys; sys.exit(0 if sys.prefix != sys.base_prefix else 1)"'
-    # Negated: `! venv_check` succeeds (exit 0) when NOT in a venv → `&&` tries
-    # --user.  When IN a venv `! venv_check` fails → `&&` skips --user and the
+    # Negated: `! venv_check` succeeds (exit 0) when NOT in a venv -> `&&` tries
+    # --user. When IN a venv `! venv_check` fails -> `&&` skips --user and the
    # group exits non-zero, propagating the base-install failure instead of
    # masking it as success (the `|| { venv_check || … }` shape from #903
    # swallowed the exit code because venv_check's exit-0 became the group's
-    # result).
-    return f"{base} || {{ ! {venv_check} && {user}; }}"
+    # result). `--break-system-packages` is only attempted when the active pip
+    # supports it; older pip versions abort with "no such option" otherwise.
+    return f"{base} || {{ ! {venv_check} && {user_fallback}; }}"


 def _venv_safe_local_pip_install_cmd(cmd: str, *, local: bool, in_venv: bool) -> str:
@@ -263,6 +306,55 @@ def _venv_safe_local_pip_install_cmd(cmd: str, *, local: bool, in_venv: bool) ->
    return shlex.join(stripped)


+def _pip_install_command_without_break_system_packages(cmd: str) -> str:
+    try:
+        parts = shlex.split(cmd)
+    except ValueError:
+        return cmd
+    stripped = [part for part in parts if part != "--break-system-packages"]
+    return shlex.join(stripped)
+
+
+def _pip_install_help_check_from_cmd(cmd: str) -> str | None:
+    try:
+        parts = shlex.split(cmd)
+    except ValueError:
+        return None
+    try:
+        install_index = parts.index("install")
+    except ValueError:
+        return None
+    if install_index <= 0:
+        return None
+    pip_prefix = parts[:install_index]
+    return f"{shlex.join(pip_prefix + ['install', '--help'])} 2>/dev/null | grep -q -- --break-system-packages"
+
+
+def _append_pip_install_runner_lines(runner_lines: list[str], cmd: str) -> None:
+    """Append a pip install command, guarding --break-system-packages support.
+
+    The Dependencies UI may submit ``python3 -m pip install --user
+    --break-system-packages ...`` for non-venv installs. That flag is useful on
+    PEP-668-locked distros, but older pip (including Ubuntu 22.04's apt pip in
+    the NVIDIA CUDA base image) aborts with "no such option". Branch at runner
+    time so stale browser JS and remote targets are handled by the server too.
+    """
+    if "--break-system-packages" not in (cmd or ""):
+        runner_lines.append(cmd)
+        return
+    help_check = _pip_install_help_check_from_cmd(cmd)
+    without_break = _pip_install_command_without_break_system_packages(cmd)
+    if not help_check or without_break == cmd:
+        runner_lines.append(cmd)
+        return
+    runner_lines.append(f"if {help_check}; then")
+    runner_lines.append(f"  {cmd}")
+    runner_lines.append("else")
+    runner_lines.append('  echo "[odysseus] pip does not support --break-system-packages; installing without it."')
+    runner_lines.append(f"  {without_break}")
+    runner_lines.append("fi")
+
+
 def _user_shell_path_bootstrap() -> list[str]:
    return [
        'ODYSSEUS_USER_SHELL="${SHELL:-}"',
@@ -270,12 +362,20 @@ def _user_shell_path_bootstrap() -> list[str]:
        '  ODYSSEUS_USER_PATH="$("$ODYSSEUS_USER_SHELL" -ic \'printf "__ODYSSEUS_PATH__%s\\n" "$PATH"\' 2>/dev/null | sed -n \'s/^__ODYSSEUS_PATH__//p\' | tail -n 1 || true)"',
        '  if [ -n "$ODYSSEUS_USER_PATH" ]; then export PATH="$ODYSSEUS_USER_PATH:$PATH"; fi',
        'fi',
-        'command -v python3 >/dev/null 2>&1 || python3() { python "$@"; }',
+        # Windows can expose python3 as a Microsoft Store App Execution Alias
+        # under WindowsApps. Git Bash sees that stub as present, but it exits
+        # before running Python. A Windows venv usually has python.exe, not
+        # python3.exe, so treat a missing or WindowsApps python3 as absent.
+        '_odys_py3="$(command -v python3 2>/dev/null || true)"',
+        'case "$_odys_py3" in ""|*[Ww]indows[Aa]pps*) python3() { python "$@"; } ;; esac',
+        'command -v python >/dev/null 2>&1 || python() { python3 "$@"; }',
    ]


-def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str:
-    """Build the standalone Python scanner used by /api/model/cached."""
+def _cached_model_scan_script(model_dirs: list[str] | None = None, add_hf_cache: str | None = None) -> str:
+    """Build the standalone Python scanner used by /api/model/cached.
+    Allows for an additional HuggingFace cache path to be scanned (i.e. Windows HF cache for local WSL envs.)
+    """
    lines = [
        "import json, os, re, shutil, subprocess, urllib.request",
        "models = []",
@@ -306,6 +406,7 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str:
        "    for root, dirs, fns in safe_walk(base):",
        "        for fn in sorted(fns):",
        "            if not fn.lower().endswith('.gguf'): continue",
+        "            if fn.startswith('._'): continue  # macOS AppleDouble sidecar, not a real GGUF",
        "            fp = os.path.join(root, fn)",
        "            try: size = os.path.getsize(fp)",
        "            except Exception: size = 0",
@@ -338,6 +439,15 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str:
        "                if f.is_file(): nf += 1; sz += f.stat().st_size",
        "                if f.name.endswith('.incomplete'): ic = True",
        "        snap = os.path.join(cache, d, 'snapshots')",
+        "        # Windows HF cache stores files directly in snapshots/; blobs/ may be empty.",
+        "        # Fallback: scan snapshots for real files when blobs yielded nothing.",
+        "        if sz == 0 and os.path.isdir(snap):",
+        "            for sd in os.listdir(snap):",
+        "                sf = os.path.join(snap, sd)",
+        "                if not os.path.isdir(sf): continue",
+        "                for f in os.scandir(sf):",
+        "                    if f.is_file(): nf += 1; sz += f.stat().st_size",
+        "                    if f.name.endswith('.incomplete'): ic = True",
        "        is_diffusion = False; gguf_files = []",
        "        if os.path.isdir(snap):",
        "            for sd in os.listdir(snap):",
@@ -346,6 +456,21 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str:
        "                if os.path.exists(os.path.join(sf, 'model_index.json')): is_diffusion = True",
        "                for f in collect_ggufs(sf): f['rel_path'] = sd + '/' + f['rel_path']; gguf_files.append(f)",
        "        models.append({'repo_id':rid,'size_bytes':sz,'nb_files':nf,'has_incomplete':ic,'path':cache,'is_diffusion':is_diffusion,'is_gguf':bool(gguf_files),'gguf_files':gguf_files})",
+        "def hf_cache_paths():",
+        "    candidates = []",
+        "    def add(p):",
+        "        if not p: return",
+        "        p = os.path.expanduser(p)",
+        "        if p not in candidates: candidates.append(p)",
+        "    add(os.environ.get('HUGGINGFACE_HUB_CACHE'))",
+        "    hf_home = os.environ.get('HF_HOME')",
+        "    if hf_home: add(os.path.join(hf_home, 'hub'))",
+        "    add('~/.cache/huggingface/hub')",
+        "    # Docker images mount ./data/huggingface at /app/.cache/huggingface.",
+        "    # When HOME is /root, expanduser() misses that persisted cache.",
+        "    add('/app/.cache/huggingface/hub')",
+        f"    add({add_hf_cache!r})" if add_hf_cache else "",
+        "    return candidates",
        "def scan_dir(p):",
        "    if not os.path.isdir(p) or not safe_path(p): return",
        "    for d in sorted(os.listdir(p)):",
@@ -409,7 +534,7 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str:
        "            seen.add(name)",
        "            models.append({'repo_id':name,'size_bytes':size_bytes,'nb_files':1,'has_incomplete':False,'path':'ollama','backend':'ollama','is_ollama':True})",
        "        return",
-        "scan_hf(os.path.expanduser('~/.cache/huggingface/hub'))",
+        "for _hf_cache in hf_cache_paths(): scan_hf(_hf_cache)",
        "scan_ollama()",
        "scan_ollama_api()",
    ]
@@ -453,6 +578,36 @@ _GGUF_PRELUDE_RE = re.compile(
 _OLLAMA_HOST_ASSIGNMENT_RE = re.compile(r"(?:^|\s)OLLAMA_HOST=([^\s]+)")
 _OLLAMA_BIND_RE = re.compile(r"^\[([^\]]+)\]:(\d+)$|^([^:]+):(\d+)$")
 _OLLAMA_BIND_HOST_RE = re.compile(r"^[A-Za-z0-9._:-]+$")
+_LLAMA_CPP_PYTHON_GGML_TYPES = {
+    "f32": "0",
+    "f16": "1",
+    "q4_0": "2",
+    "q4_1": "3",
+    "q5_0": "6",
+    "q5_1": "7",
+    "q8_0": "8",
+    "q8_1": "9",
+    "q2_k": "10",
+    "q3_k": "11",
+    "q4_k": "12",
+    "q5_k": "13",
+    "q6_k": "14",
+    "q8_k": "15",
+    "iq2_xxs": "16",
+    "iq2_xs": "17",
+    "iq3_xxs": "18",
+    "iq1_s": "19",
+    "iq4_nl": "20",
+    "iq3_s": "21",
+    "iq2_s": "22",
+    "iq4_xs": "23",
+    "mxfp4": "39",
+    "nvfp4": "40",
+    "q1_0": "41",
+}
+_LLAMA_CPP_PYTHON_TYPE_FLAG_RE = re.compile(
+    r"(?P<flag>--type_[kv])(?P<sep>\s+|=)(?P<quote>['\"]?)(?P<value>[A-Za-z0-9_]+)(?P=quote)"
+)


 def _ollama_bind_from_cmd(cmd: str | None, *, default_host: str = "127.0.0.1") -> tuple[str, str]:
@@ -484,6 +639,22 @@ def _ollama_bind_from_cmd(cmd: str | None, *, default_host: str = "127.0.0.1") -
    return f"[{host}]" if bracketed_host else host, port


+def _normalize_llama_cpp_python_cache_types(cmd: str | None) -> str | None:
+    """Map llama.cpp KV cache type names to llama-cpp-python's integer enum."""
+    if not cmd or "llama_cpp.server" not in cmd:
+        return cmd
+
+    def repl(match: re.Match[str]) -> str:
+        value = match.group("value")
+        mapped = _LLAMA_CPP_PYTHON_GGML_TYPES.get(value.lower())
+        if not mapped:
+            return match.group(0)
+        quote = match.group("quote")
+        return f"{match.group('flag')}{match.group('sep')}{quote}{mapped}{quote}"
+
+    return _LLAMA_CPP_PYTHON_TYPE_FLAG_RE.sub(repl, cmd)
+
+
 def _check_serve_binary(seg: str) -> None:
    """Validate that a single command segment starts with an allowlisted binary
    (after skipping leading env-var assignments like `CUDA_VISIBLE_DEVICES=0`)."""
@@ -525,6 +696,7 @@ def _validate_serve_cmd(v: str | None) -> str | None:
    # Backticks and raw newlines are never legitimate here.
    if any(c in v for c in ("`", "\n", "\r")):
        raise HTTPException(400, "Invalid characters in cmd")
+
    # Known GGUF launcher prelude → validate the serve invocation(s) it guards.
    m = _GGUF_PRELUDE_RE.match(v)
    if m:
@@ -533,9 +705,19 @@ def _validate_serve_cmd(v: str | None) -> str | None:
        for part in rest.split("||"):
            _check_serve_binary(part.strip())
        return v
+
    # Otherwise: a single invocation — no shell metacharacters allowed.
+    # Temporarily replace safe $(printf %s ...) expressions with a placeholder
+    # to avoid triggering the metacharacter/command-injection checks.
+    cleaned_v = v
+    printf_matches = list(re.finditer(r"\$\(\s*printf\s+%s\s+([^\n()]*?)\)", v))
+    for match in printf_matches:
+        inner = match.group(1)
+        if not any(c in inner for c in (";", "&&", "||", "$(", "`")):
+            cleaned_v = cleaned_v.replace(match.group(0), "/placeholder/safe/path.gguf")
+
    # (`$(` was the original intent; bare `$` is fine for shell-safe paths.)
-    if any(c in v for c in (";", "&&", "||", "$(")):
+    if any(c in cleaned_v for c in (";", "&&", "||", "$(")):
        raise HTTPException(400, "Invalid characters in cmd")
    _check_serve_binary(v)
    return v
@@ -546,12 +728,34 @@ def _append_serve_preflight_exit_lines(runner_lines: list[str], *, keep_shell_op
    runner_lines.append('if [ -n "$ODYSSEUS_PREFLIGHT_EXIT" ]; then')
    runner_lines.append('  echo ""; echo "=== Process exited with code $ODYSSEUS_PREFLIGHT_EXIT ==="')
    if keep_shell_open:
+        # Decouple the post-crash interactive shell from the persistent log
+        # file. fds 3/4 were saved BEFORE the tee redirect at the top of
+        # the runner; restoring them here means the neofetch banner the
+        # user's .zshrc prints lands on the tmux pane only, not in the
+        # log file the agent's tail_serve_output reads.
+        runner_lines.append('  exec 1>&3 2>&4 3>&- 4>&- 2>/dev/null || true')
+        runner_lines.append('  sleep 0.2  # let tee child flush + exit')
        runner_lines.append('  exec "${SHELL:-/bin/bash}"')
    else:
        runner_lines.append('  exit "$ODYSSEUS_PREFLIGHT_EXIT"')
    runner_lines.append('fi')


+def _append_vllm_linux_preflight_lines(runner_lines: list[str]) -> None:
+    """Append Linux vLLM readiness lines that identify the runtime being used."""
+    # Keep the user install bin visible for Odysseus-managed `pip install --user`
+    # installs, but then report the actual CLI path so external runtimes are clear.
+    runner_lines.append('export PATH="$HOME/.local/bin:$PATH"')
+    runner_lines.append('ODYSSEUS_VLLM_BIN="$(command -v vllm 2>/dev/null || true)"')
+    runner_lines.append('if [ -z "$ODYSSEUS_VLLM_BIN" ]; then')
+    runner_lines.append('  echo "ERROR: vLLM is not installed."')
+    runner_lines.append('  ODYSSEUS_PREFLIGHT_EXIT=127')
+    runner_lines.append('else')
+    runner_lines.append('  echo "[odysseus] vLLM CLI: $ODYSSEUS_VLLM_BIN"')
+    runner_lines.append('  ODYSSEUS_VLLM_VERSION="$("$ODYSSEUS_VLLM_BIN" --version 2>&1 | head -n 1 || true)"')
+    runner_lines.append('  if [ -n "$ODYSSEUS_VLLM_VERSION" ]; then echo "[odysseus] vLLM version: $ODYSSEUS_VLLM_VERSION"; fi')
+    runner_lines.append('fi')
+
 def _append_serve_exit_code_lines(
    runner_lines: list[str],
    *,
@@ -563,7 +767,11 @@ def _append_serve_exit_code_lines(
    if is_pip_install:
        runner_lines.append('if [ $ODYSSEUS_CMD_EXIT -eq 0 ]; then echo ""; echo "DOWNLOAD_OK"; fi')
    if keep_shell_open:
-        runner_lines.append('echo ""; echo "=== Process exited with code $ODYSSEUS_CMD_EXIT ==="; exec "${SHELL:-/bin/bash}"')
+        runner_lines.append('echo ""; echo "=== Process exited with code $ODYSSEUS_CMD_EXIT ==="')
+        # See preflight branch above for the rationale on restoring fds 3/4.
+        runner_lines.append('exec 1>&3 2>&4 3>&- 4>&- 2>/dev/null || true')
+        runner_lines.append('sleep 0.2  # let tee child flush + exit')
+        runner_lines.append('exec "${SHELL:-/bin/bash}"')
    else:
        runner_lines.append('echo ""; echo "=== Process exited with code $ODYSSEUS_CMD_EXIT ==="')
        runner_lines.append('exit "$ODYSSEUS_CMD_EXIT"')
@@ -585,6 +793,7 @@ def _append_llama_cpp_linux_accel_build_lines(runner_lines: list[str]) -> None:
    runner_lines.append('    done')
    # rm -rf build so a prior poisoned CMakeCache.txt (e.g. from a failed CUDA
    # or HIP attempt) doesn't cause the next configure to reuse stale settings.
+    runner_lines.append('    mkdir -p ~/bin')
    runner_lines.append('    cd ~/llama.cpp && rm -rf build')
    runner_lines.append('    if command -v hipconfig &>/dev/null || [ -d /opt/rocm ] || [ -n "$ROCM_PATH" ] || [ -n "$HIP_PATH" ]; then')
    runner_lines.append('      if command -v hipconfig &>/dev/null; then')
@@ -647,6 +856,7 @@ def _llama_cpp_rebuild_cmd() -> str:

 class ModelDownloadRequest(BaseModel):
    repo_id: str
+    backend: str | None = None  # "hf" (default) or "ollama"
    include: str | None = None  # glob pattern e.g. "*Q4_K_M*"
    hf_token: str | None = None
    env_prefix: str | None = None  # e.g. "source ~/venv/bin/activate"
@@ -793,3 +1003,182 @@ def _ssh_ps(host, script_path, port=None):

 # Windows session dir — stored in user's temp on the remote
 WIN_SESSION_DIR = "$env:TEMP\\\\odysseus-sessions"
+
+
+def _diagnose_serve_output(text: str) -> dict | None:
+    """Server-side mirror of the Cookbook UI's common serve diagnoses.
+
+    The browser uses cookbook-diagnosis.js for clickable fixes. This gives
+    the agent/tool path the same structured signal so it can retry with an
+    adjusted command instead of guessing from raw tmux output.
+    """
+    if not text:
+        return None
+    tail = text[-6000:]
+    patterns = [
+        (
+            r"No available memory for the cache blocks|Available KV cache memory:.*-",
+            "No GPU memory left for KV cache after loading model.",
+            [
+                {"label": "retry with GPU memory utilization 0.95", "op": "replace", "flag": "--gpu-memory-utilization", "value": "0.95"},
+                {"label": "retry with context 2048", "op": "replace", "flag": "--max-model-len", "value": "2048"},
+            ],
+        ),
+        (
+            r"CUDA out of memory|torch\.cuda\.OutOfMemoryError|CUDA error: out of memory|warming up sampler|max_num_seqs.*gpu_memory_utilization",
+            "GPU ran out of memory during startup or warmup.",
+            [
+                {"label": "retry with context 4096", "op": "replace", "flag": "--max-model-len", "value": "4096"},
+                {"label": "retry with GPU memory utilization 0.80", "op": "replace", "flag": "--gpu-memory-utilization", "value": "0.80"},
+                {"label": "retry with --enforce-eager", "op": "append", "arg": "--enforce-eager"},
+            ],
+        ),
+        (
+            r"not divisib|must be divisible|attention heads.*divisible",
+            "Tensor parallel size is incompatible with the model.",
+            [
+                {"label": "retry with tensor parallel size 1", "op": "replace", "flag": "--tensor-parallel-size", "value": "1"},
+                {"label": "retry with tensor parallel size 2", "op": "replace", "flag": "--tensor-parallel-size", "value": "2"},
+            ],
+        ),
+        (
+            r"KV cache.*too (small|large)|max_model_len.*exceeds|maximum.*context",
+            "Context length is too large for available GPU memory.",
+            [
+                {"label": "retry with context 8192", "op": "replace", "flag": "--max-model-len", "value": "8192"},
+                {"label": "retry with context 4096", "op": "replace", "flag": "--max-model-len", "value": "4096"},
+            ],
+        ),
+        (
+            r"enable-auto-tool-choice requires --tool-call-parser",
+            "Auto tool choice requires an explicit tool call parser.",
+            [{"label": "retry with Hermes tool parser", "op": "append", "arg": "--tool-call-parser hermes"}],
+        ),
+        (
+            r"Please pass.*trust.remote.code=True|contains custom code which must be executed to correctly load|does not recognize this architecture|model type.*but Transformers does not",
+            "Model requires custom code or newer model support.",
+            [{"label": "retry with --trust-remote-code", "op": "append", "arg": "--trust-remote-code"}],
+        ),
+        (
+            r"There is no module or parameter named ['\"]lm_head\.input_scale['\"]|lm_head\.input_scale|weight_scale_2",
+            "vLLM cannot load this ModelOpt LM-head quantized checkpoint with the current runtime.",
+            [
+                {
+                    "label": "upgrade vLLM through the environment that provides this CLI, or use a compatible checkpoint",
+                    "op": "manual",
+                }
+            ],
+        ),
+        (
+            r"Either a revision or a version must be specified|transformers\.integrations\.hub_kernels|kernels/layer",
+            "vLLM/Transformers kernel package mismatch.",
+            [{"label": "update vLLM, Transformers, and kernels on this server", "op": "dependency", "package": "vllm transformers kernels"}],
+        ),
+        (
+            r"Address already in use|bind.*address.*in use",
+            "Port is already in use.",
+            [{"label": "retry on port 8001", "op": "replace", "flag": "--port", "value": "8001"}],
+        ),
+        (
+            r"No CUDA GPUs are available|no GPU.*found|CUDA_VISIBLE_DEVICES.*invalid",
+            "No GPUs are visible to the serve process.",
+            [{"label": "clear Cookbook GPU selection or choose available GPUs", "op": "settings", "field": "gpus", "value": ""}],
+        ),
+        (
+            r"Failed to infer device type|NVML Shared Library Not Found|No module named 'amdsmi'|platform is not available",
+            "vLLM could not find a supported GPU (CUDA or ROCm). "
+            "This machine may have integrated or unsupported graphics only.",
+            [
+                {"label": "switch to llama.cpp (CPU/Metal, works without a discrete GPU)", "op": "manual"},
+                {"label": "switch to Ollama (CPU/Metal, works without a discrete GPU)", "op": "manual"},
+            ],
+        ),
+        (
+            r"vllm.*command not found|No module named vllm|ERROR: vLLM is not installed",
+            "vLLM is not installed or not in PATH on this server.",
+            [{"label": "install vLLM in Cookbook Dependencies", "op": "dependency", "package": "vllm"}],
+        ),
+        (
+            r"sgl_kernel[\s\S]*(Python\.h|libnuma\.so\.1|common_ops)|"
+            r"(Python\.h|libnuma\.so\.1|common_ops)[\s\S]*sgl_kernel|"
+            r"Please ensure sgl_kernel is properly installed",
+            "SGLang native dependencies are missing on this server.",
+            [
+                {"label": "install OS packages: libnuma-dev python3.12-dev build-essential", "op": "manual"},
+                {"label": "upgrade sglang-kernel after OS packages are installed", "op": "manual"},
+            ],
+        ),
+        (
+            r"sglang.*command not found|No module named sglang|SGLang is not installed",
+            "SGLang is not installed or not in PATH on this server.",
+            [{"label": "install SGLang in Cookbook Dependencies", "op": "dependency", "package": "sglang[all]"}],
+        ),
+        (
+            r"llama-server.*command not found|llama\.cpp.*not found|No module named.*llama_cpp|No module named 'starlette_context'|git: command not found|cmake: command not found",
+            "llama.cpp / llama-cpp-python dependencies are missing.",
+            [{"label": "install llama.cpp dependencies or llama-cpp-python[server]", "op": "dependency", "package": "llama-cpp-python[server]"}],
+        ),
+        (
+            r"No GGUF found on this host|no \.gguf file|No GGUF file found",
+            "No GGUF file found for this model on this host. The llama.cpp backend needs a .gguf file.",
+            [{"label": "download a GGUF build of this model (repo name usually ends in -GGUF, file like Q4_K_M.gguf)", "op": "manual"}],
+        ),
+        (
+            r"No module named 'torch'|No module named torch|No module named 'diffusers'|No module named diffusers",
+            "Diffusion serving requires PyTorch and diffusers.",
+            [{"label": "install diffusers[torch] in Cookbook Dependencies", "op": "dependency", "package": "diffusers[torch]"}],
+        ),
+        (
+            r"403 Forbidden|401 Unauthorized|Access to model.*is restricted|gated repo|not in the authorized list|awaiting a review",
+            "Model access is gated or unauthorized.",
+            [{"label": "set HF token and request model access on HuggingFace", "op": "manual"}],
+        ),
+    ]
+    for pattern, message, suggestions in patterns:
+        if re.search(pattern, tail, re.I):
+            return {"message": message, "suggestions": suggestions}
+    if re.search(r"Traceback \(most recent call last\)", tail, re.I) and not re.search(
+        r"Application startup complete|GET /v1/|Uvicorn running on", tail, re.I
+    ):
+        return {
+            "message": "Python traceback detected during serve startup.",
+            "suggestions": [{"label": "inspect traceback and retry with adjusted backend/settings", "op": "manual"}],
+        }
+    return None
+
+
+async def run_ssh_command_async(
+    remote: str,
+    ssh_port: str | None,
+    remote_cmd: str,
+    *,
+    timeout: float,
+    connect_timeout: int | None = None,
+    strict_host_key_checking: bool | None = None,
+    stdin_data: bytes | None = None,
+) -> tuple[int, bytes, bytes]:
+    """Run an ssh command with centralized timeout and stderr/stdout capture.
+    Async version of core.platform_compat.run_ssh_command_sync.
+    """
+    import asyncio
+    proc = await asyncio.create_subprocess_exec(
+        *_ssh_exec_argv(
+            remote,
+            ssh_port,
+            remote_cmd=remote_cmd,
+            connect_timeout=connect_timeout,
+            strict_host_key_checking=strict_host_key_checking,
+        ),
+        stdin=asyncio.subprocess.PIPE if stdin_data is not None else None,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+    )
+    try:
+        stdout, stderr = await asyncio.wait_for(
+            proc.communicate(input=stdin_data), timeout=timeout
+        )
+    except asyncio.TimeoutError:
+        proc.kill()
+        await proc.communicate()
+        raise
+    return proc.returncode or 0, stdout, stderr
@@ -0,0 +1,75 @@
+"""Pure helpers for shaping cookbook task output for the status response.
+
+Kept dependency-free (no FastAPI / SQLAlchemy imports) so the behavior can be
+unit-tested without standing up the whole app.
+"""
+
+import re
+
+_FETCHING_ZERO_FILES_RE = re.compile(r"Fetching\s+0\s+files", re.IGNORECASE)
+
+# Probe scripts for the dead-session download check, run as
+# `python3 -c <PROBE> <repo_id> <cache_root>` (locally or over SSH).
+# cache_root is the task's custom download dir, '' for the default HF cache.
+# It has to be passed explicitly: the download runner exports
+# HF_HOME=<local_dir>, so that task's cache lives under <local_dir>/hub, and
+# the probe process's own environment knows nothing about it.
+HF_CACHE_COMPLETE_PROBE = (
+    "import os,sys;"
+    "repo=sys.argv[1];"
+    "root=os.path.expanduser(sys.argv[2]) if len(sys.argv)>2 and sys.argv[2] else '';"
+    "base=os.path.join(root,'hub') if root else (os.environ.get('HUGGINGFACE_HUB_CACHE') or os.path.join(os.environ.get('HF_HOME', os.path.expanduser('~/.cache/huggingface')), 'hub'));"
+    "d=os.path.join(base,'models--'+repo.replace('/','--'));"
+    "snap=os.path.join(d,'snapshots');"
+    "ok=os.path.isdir(snap) and any(os.path.isdir(os.path.join(snap,x)) and os.listdir(os.path.join(snap,x)) for x in os.listdir(snap));"
+    "inc=False;"
+    "blobs=os.path.join(d,'blobs');"
+    "inc=os.path.isdir(blobs) and any(x.endswith('.incomplete') for x in os.listdir(blobs));"
+    "sys.exit(0 if ok and not inc else 1)"
+)
+
+HF_CACHE_INCOMPLETE_PROBE = (
+    "import os,sys;"
+    "repo=sys.argv[1];"
+    "root=os.path.expanduser(sys.argv[2]) if len(sys.argv)>2 and sys.argv[2] else '';"
+    "base=os.path.join(root,'hub') if root else (os.environ.get('HUGGINGFACE_HUB_CACHE') or os.path.join(os.environ.get('HF_HOME', os.path.expanduser('~/.cache/huggingface')), 'hub'));"
+    "d=os.path.join(base,'models--'+repo.replace('/','--'));"
+    "blobs=os.path.join(d,'blobs');"
+    "inc=os.path.isdir(blobs) and any(x.endswith('.incomplete') for x in os.listdir(blobs));"
+    "sys.exit(0 if inc else 1)"
+)
+
+
+def classify_dead_download(full_snapshot: str):
+    """Resolve a dead download session's status from its runner markers.
+
+    The runner prints DOWNLOAD_OK only after exiting 0 (and DOWNLOAD_FAILED
+    otherwise), so the markers stay trustworthy after the tmux pane is gone.
+    Returns (status, zero_files), or None when the snapshot carries no marker
+    and the caller has to fall back to the cache probe. Same precedence as
+    the live-session branch: DOWNLOAD_OK wins, except a "Fetching 0 files"
+    run is an error (nothing matched the include/quant pattern).
+    """
+    if not full_snapshot:
+        return None
+    if "DOWNLOAD_OK" in full_snapshot:
+        if _FETCHING_ZERO_FILES_RE.search(full_snapshot):
+            return ("error", True)
+        return ("completed", False)
+    if "DOWNLOAD_FAILED" in full_snapshot:
+        return ("error", False)
+    return None
+
+
+def error_aware_output_tail(full_snapshot: str, status: str) -> str:
+    """Return the trailing slice of a task log for the status response.
+
+    Failed tasks return the last 50 lines so the "Copy last 50 lines" action
+    surfaces the actual error context (stack traces, build output). Running and
+    other non-error tasks keep the cheaper 12-line tail to limit the payload on
+    the 10s polling interval.
+    """
+    if not full_snapshot:
+        return ""
+    tail_lines = 50 if status == "error" else 12
+    return "\n".join(full_snapshot.splitlines()[-tail_lines:])
@@ -20,39 +20,26 @@ All routes are admin-gated (endpoint/provider management is an admin action).
 """

 import json
-import time
 import uuid
 import logging
-import threading
 from typing import Dict, Optional

 import httpx
-from fastapi import APIRouter, Request, Form, HTTPException
+from fastapi import HTTPException, Request

 from core.database import SessionLocal, ModelEndpoint
-from core.middleware import require_admin
+from routes.device_flow import (
+    DeviceFlowPoll,
+    DeviceFlowStart,
+    PendingDeviceFlowStore,
+    create_device_flow_router,
+)
 from src.auth_helpers import get_current_user
 from src import copilot

 logger = logging.getLogger(__name__)

-# Pending device-flow logins, keyed by an opaque poll_id. The device_code is a
-# bearer-like secret, so it lives here (server memory) rather than in the
-# browser. Entries expire with the GitHub device code.
-#
-# NOTE: this is per-process state. The device flow assumes a single worker
-# (Odysseus' default): with multiple uvicorn workers, the poll request can land
-# on a worker that never saw the start, returning "Unknown or expired login
-# session". Move this to a shared store (DB/Redis) if running multi-worker.
-_PENDING: Dict[str, Dict] = {}
-_PENDING_LOCK = threading.Lock()
-
-
-def _prune_expired() -> None:
-    now = time.time()
-    with _PENDING_LOCK:
-        for k in [k for k, v in _PENDING.items() if v.get("expires_at", 0) < now]:
-            _PENDING.pop(k, None)
+_DEVICE_FLOW_STORE = PendingDeviceFlowStore()


 def _provision_endpoint(token: str, base: str, owner: Optional[str]) -> Dict:
@@ -112,112 +99,75 @@ def _provision_endpoint(token: str, base: str, owner: Optional[str]) -> Dict:
    return result


-def setup_copilot_routes() -> APIRouter:
-    router = APIRouter(prefix="/api/copilot", tags=["copilot"])
+def _start_device_flow(request: Request, form) -> DeviceFlowStart:
+    host = copilot.GITHUB_HOST
+    ent = str(form.get("enterprise_url") or "").strip()
+    if ent:
+        host = copilot.normalize_domain(ent)
+    try:
+        data = copilot.request_device_code(host)
+    except httpx.HTTPStatusError as e:
+        status = e.response.status_code if e.response is not None else "unknown"
+        raise HTTPException(502, f"GitHub device-code request failed (HTTP {status})")
+    except Exception as e:
+        raise HTTPException(502, f"GitHub device-code request failed: {e}")

-    @router.post("/device/start")
-    def device_start(request: Request, enterprise_url: str = Form("")):
-        require_admin(request)
-        _prune_expired()
-        host = copilot.GITHUB_HOST
-        ent = (enterprise_url or "").strip()
-        if ent:
-            host = copilot.normalize_domain(ent)
-        try:
-            data = copilot.request_device_code(host)
-        except httpx.HTTPStatusError as e:
-            status = e.response.status_code if e.response is not None else "unknown"
-            raise HTTPException(502, f"GitHub device-code request failed (HTTP {status})")
-        except Exception as e:
-            raise HTTPException(502, f"GitHub device-code request failed: {e}")
+    device_code = data.get("device_code")
+    if not device_code:
+        raise HTTPException(502, "GitHub did not return a device code")

-        device_code = data.get("device_code")
-        if not device_code:
-            raise HTTPException(502, "GitHub did not return a device code")
-        interval = int(data.get("interval") or 5)
-        expires_in = int(data.get("expires_in") or 900)
-        poll_id = uuid.uuid4().hex
-        with _PENDING_LOCK:
-            _PENDING[poll_id] = {
-                "device_code": device_code,
-                "host": host,
-                "enterprise_url": ent,
-                "interval": interval,
-                "owner": get_current_user(request) or None,
-                "expires_at": time.time() + expires_in,
-                "next_poll_at": 0.0,
-            }
-        # verification_uri_complete embeds the user code, so the browser tab we
-        # open lands the user straight on GitHub's "Authorize" screen with the
-        # code pre-filled — one click, no manual code entry.
-        return {
-            "poll_id": poll_id,
+    # verification_uri_complete embeds the user code, so the browser tab we
+    # open lands the user straight on GitHub's "Authorize" screen with the
+    # code pre-filled — one click, no manual code entry.
+    return DeviceFlowStart(
+        pending={
+            "device_code": device_code,
+            "host": host,
+            "enterprise_url": ent,
+            "owner": get_current_user(request) or None,
+        },
+        response={
            "user_code": data.get("user_code"),
            "verification_uri": data.get("verification_uri"),
            "verification_uri_complete": data.get("verification_uri_complete"),
-            "interval": interval,
-            "expires_in": expires_in,
-        }
+        },
+        interval=int(data.get("interval") or 5),
+        expires_in=int(data.get("expires_in") or 900),
+    )

-    @router.post("/device/poll")
-    def device_poll(request: Request, poll_id: str = Form(...)):
-        require_admin(request)
-        _prune_expired()
-        with _PENDING_LOCK:
-            pending = _PENDING.get(poll_id)
-        if not pending:
-            raise HTTPException(404, "Unknown or expired login session")

-        # Enforce GitHub's polling interval server-side so a chatty client
-        # can't trip slow_down.
-        now = time.time()
-        if now < pending.get("next_poll_at", 0):
-            return {"status": "pending"}
+def _poll_device_flow(_request: Request, pending: Dict) -> DeviceFlowPoll:
+    try:
+        data = copilot.poll_access_token(pending["host"], pending["device_code"])
+    except Exception as e:
+        return DeviceFlowPoll.pending(f"poll error: {e}")

+    token = data.get("access_token")
+    if token:
+        base = copilot.enterprise_base(pending["enterprise_url"]) if pending["enterprise_url"] else copilot.COPILOT_BASE
        try:
-            data = copilot.poll_access_token(pending["host"], pending["device_code"])
+            result = _provision_endpoint(token, base, pending["owner"])
        except Exception as e:
-            return {"status": "pending", "detail": f"poll error: {e}"}
+            logger.exception("Copilot endpoint provisioning failed")
+            raise HTTPException(500, f"Login succeeded but provisioning failed: {e}")
+        return DeviceFlowPoll.authorized(result)

-        token = data.get("access_token")
-        if token:
-            base = copilot.enterprise_base(pending["enterprise_url"]) if pending["enterprise_url"] else copilot.COPILOT_BASE
-            try:
-                result = _provision_endpoint(token, base, pending["owner"])
-            except Exception as e:
-                logger.exception("Copilot endpoint provisioning failed")
-                with _PENDING_LOCK:
-                    _PENDING.pop(poll_id, None)
-                raise HTTPException(500, f"Login succeeded but provisioning failed: {e}")
-            with _PENDING_LOCK:
-                _PENDING.pop(poll_id, None)
-            return {"status": "authorized", "endpoint": result}
+    err = data.get("error")
+    if err == "authorization_pending":
+        return DeviceFlowPoll.pending()
+    if err == "slow_down":
+        return DeviceFlowPoll.slow_down(int(data.get("interval") or 0) or None)
+    if err in ("expired_token", "access_denied"):
+        return DeviceFlowPoll.failed(err)
+    # Unknown error — surface but keep the session for another try.
+    return DeviceFlowPoll.pending(err or "unknown")

-        err = data.get("error")
-        if err == "authorization_pending":
-            with _PENDING_LOCK:
-                if poll_id in _PENDING:
-                    _PENDING[poll_id]["next_poll_at"] = now + pending["interval"]
-            return {"status": "pending"}
-        if err == "slow_down":
-            new_interval = int(data.get("interval") or (pending["interval"] + 5))
-            with _PENDING_LOCK:
-                if poll_id in _PENDING:
-                    _PENDING[poll_id]["interval"] = new_interval
-                    _PENDING[poll_id]["next_poll_at"] = now + new_interval
-            return {"status": "pending"}
-        if err in ("expired_token", "access_denied"):
-            with _PENDING_LOCK:
-                _PENDING.pop(poll_id, None)
-            return {"status": "failed", "error": err}
-        # Unknown error — surface but keep the session for another try.
-        return {"status": "pending", "detail": err or "unknown"}

-    @router.post("/device/cancel")
-    def device_cancel(request: Request, poll_id: str = Form(...)):
-        require_admin(request)
-        with _PENDING_LOCK:
-            _PENDING.pop(poll_id, None)
-        return {"status": "cancelled"}
-
-    return router
+def setup_copilot_routes():
+    return create_device_flow_router(
+        prefix="/api/copilot",
+        tags=["copilot"],
+        store=_DEVICE_FLOW_STORE,
+        start_flow=_start_device_flow,
+        poll_flow=_poll_device_flow,
+    )
@@ -0,0 +1,193 @@
+"""Shared OAuth/device-flow route scaffolding for provider setup."""
+
+from __future__ import annotations
+
+import inspect
+import threading
+import time
+import uuid
+from dataclasses import dataclass
+from typing import Any, Callable, Iterable, Mapping, Optional
+
+from fastapi import APIRouter, Form, HTTPException, Request
+
+from core.middleware import require_admin
+
+
+@dataclass(frozen=True)
+class DeviceFlowStart:
+    """Provider-specific start result consumed by the shared route wrapper."""
+
+    pending: Mapping[str, Any]
+    response: Mapping[str, Any]
+    interval: int = 5
+    expires_in: int = 900
+
+
+@dataclass(frozen=True)
+class DeviceFlowPoll:
+    """Normalized provider poll outcome."""
+
+    status: str
+    endpoint: Optional[Mapping[str, Any]] = None
+    error: Optional[str] = None
+    detail: Optional[str] = None
+    interval: Optional[int] = None
+
+    @classmethod
+    def pending(cls, detail: Optional[str] = None) -> "DeviceFlowPoll":
+        return cls(status="pending", detail=detail)
+
+    @classmethod
+    def slow_down(cls, interval: Optional[int] = None, detail: Optional[str] = None) -> "DeviceFlowPoll":
+        return cls(status="slow_down", interval=interval, detail=detail)
+
+    @classmethod
+    def authorized(cls, endpoint: Mapping[str, Any]) -> "DeviceFlowPoll":
+        return cls(status="authorized", endpoint=endpoint)
+
+    @classmethod
+    def failed(cls, error: str) -> "DeviceFlowPoll":
+        return cls(status="failed", error=error)
+
+
+class PendingDeviceFlowStore:
+    """Thread-safe in-memory pending device-flow store.
+
+    Device codes and provider-side secrets stay inside this process. Each entry
+    stores provider payload separately from poll metadata so provider callbacks
+    only receive the fields they created.
+    """
+
+    def __init__(self, *, time_func: Callable[[], float] = time.time):
+        self._pending: dict[str, dict[str, Any]] = {}
+        self._lock = threading.Lock()
+        self._time = time_func
+
+    def _now(self) -> float:
+        return float(self._time())
+
+    def prune_expired(self) -> None:
+        now = self._now()
+        with self._lock:
+            for key in [k for k, v in self._pending.items() if v.get("expires_at", 0) < now]:
+                self._pending.pop(key, None)
+
+    def add(self, payload: Mapping[str, Any], *, interval: int, expires_in: int) -> str:
+        self.prune_expired()
+        poll_id = uuid.uuid4().hex
+        with self._lock:
+            self._pending[poll_id] = {
+                "payload": dict(payload),
+                "interval": max(int(interval or 5), 1),
+                "expires_at": self._now() + max(int(expires_in or 900), 1),
+                "next_poll_at": 0.0,
+            }
+        return poll_id
+
+    def get_payload(self, poll_id: str) -> Optional[dict[str, Any]]:
+        self.prune_expired()
+        with self._lock:
+            entry = self._pending.get(poll_id)
+            if entry is None:
+                return None
+            return dict(entry.get("payload") or {})
+
+    def is_throttled(self, poll_id: str) -> bool:
+        with self._lock:
+            entry = self._pending.get(poll_id)
+            return bool(entry and self._now() < float(entry.get("next_poll_at") or 0))
+
+    def schedule_next(self, poll_id: str) -> None:
+        now = self._now()
+        with self._lock:
+            entry = self._pending.get(poll_id)
+            if entry is not None:
+                entry["next_poll_at"] = now + int(entry.get("interval") or 5)
+
+    def slow_down(self, poll_id: str, interval: Optional[int] = None) -> None:
+        now = self._now()
+        with self._lock:
+            entry = self._pending.get(poll_id)
+            if entry is not None:
+                new_interval = int(interval or (int(entry.get("interval") or 5) + 5))
+                entry["interval"] = max(new_interval, 1)
+                entry["next_poll_at"] = now + entry["interval"]
+
+    def pop(self, poll_id: str) -> None:
+        with self._lock:
+            self._pending.pop(poll_id, None)
+
+
+async def _maybe_await(value: Any) -> Any:
+    if inspect.isawaitable(value):
+        return await value
+    return value
+
+
+def _pending_response(detail: Optional[str] = None) -> dict[str, Any]:
+    response: dict[str, Any] = {"status": "pending"}
+    if detail:
+        response["detail"] = detail
+    return response
+
+
+def create_device_flow_router(
+    *,
+    prefix: str,
+    tags: Iterable[str],
+    store: PendingDeviceFlowStore,
+    start_flow: Callable[[Request, Mapping[str, Any]], DeviceFlowStart],
+    poll_flow: Callable[[Request, Mapping[str, Any]], DeviceFlowPoll],
+) -> APIRouter:
+    """Create standard `/device/start|poll|cancel` routes for a provider."""
+
+    router = APIRouter(prefix=prefix, tags=list(tags))
+
+    @router.post("/device/start")
+    async def device_start(request: Request):
+        require_admin(request)
+        form = await request.form()
+        start = await _maybe_await(start_flow(request, form))
+        interval = int(start.interval or 5)
+        expires_in = int(start.expires_in or 900)
+        poll_id = store.add(start.pending, interval=interval, expires_in=expires_in)
+        response = dict(start.response)
+        response.update({"poll_id": poll_id, "interval": interval, "expires_in": expires_in})
+        return response
+
+    @router.post("/device/poll")
+    async def device_poll(request: Request, poll_id: str = Form(...)):
+        require_admin(request)
+        payload = store.get_payload(poll_id)
+        if payload is None:
+            raise HTTPException(404, "Unknown or expired login session")
+        if store.is_throttled(poll_id):
+            return {"status": "pending"}
+
+        try:
+            outcome = await _maybe_await(poll_flow(request, payload))
+        except Exception:
+            store.pop(poll_id)
+            raise
+
+        if outcome.status == "authorized":
+            store.pop(poll_id)
+            return {"status": "authorized", "endpoint": dict(outcome.endpoint or {})}
+        if outcome.status == "failed":
+            store.pop(poll_id)
+            return {"status": "failed", "error": outcome.error or "denied"}
+        if outcome.status == "slow_down":
+            store.slow_down(poll_id, outcome.interval)
+            return _pending_response(outcome.detail)
+
+        store.schedule_next(poll_id)
+        return _pending_response(outcome.detail)
+
+    @router.post("/device/cancel")
+    def device_cancel(request: Request, poll_id: str = Form(...)):
+        require_admin(request)
+        store.pop(poll_id)
+        return {"status": "cancelled"}
+
+    return router
@@ -1,12 +1,13 @@
 """Diagnostics routes — /api/db/stats, /api/rag/stats, /api/test/youtube, /api/test-research."""

 import logging
+import os
 from typing import Dict, Any

 from fastapi import APIRouter, HTTPException, Form, Request

 from services.youtube.youtube_handler import extract_youtube_id, extract_transcript_async
-from core.constants import DEFAULT_HOST
+from core.constants import DEFAULT_HOST, DATA_DIR
 from core.middleware import require_admin

 logger = logging.getLogger(__name__)
@@ -16,9 +17,42 @@ def setup_diagnostics_routes(
    rag_manager,
    rag_available: bool,
    research_handler,
+    memory_vector=None,
 ) -> APIRouter:
    router = APIRouter(tags=["diagnostics"])

+    @router.get("/api/diagnostics/services")
+    async def get_service_health(request: Request) -> Dict[str, Any]:
+        """Consolidated degraded-state report for ChromaDB, SearXNG, email,
+        ntfy, and provider endpoints. Non-intrusive probes — safe to poll."""
+        require_admin(request)
+        from src.service_health import collect_service_health
+        return await collect_service_health(rag_manager, memory_vector)
+
+    @router.get("/api/diagnostics/logs")
+    async def get_diagnostics_logs(request: Request, limit: int = 200) -> Dict[str, Any]:
+        require_admin(request)
+        limit = max(1, min(limit, 1000))
+        try:
+            log_file = os.path.join(DATA_DIR, "logs", "app.log")
+            if not os.path.exists(log_file):
+                return {"status": "success", "logs": []}
+
+            # Safe tail read of the log file (max 5MB via rotation)
+            with open(log_file, "r", encoding="utf-8", errors="ignore") as f:
+                lines = f.readlines()
+
+            tail_lines = lines[-limit:] if len(lines) > limit else lines
+            tail_lines = [line.rstrip('\r\n') for line in tail_lines]
+
+            return {
+                "status": "success",
+                "logs": tail_lines
+            }
+        except Exception as e:
+            logger.error(f"Diagnostics logs retrieval error: {e}")
+            raise HTTPException(500, f"Failed to retrieve logs: {str(e)}")
+
    @router.get("/api/db/stats")
    async def get_database_stats(request: Request) -> Dict[str, Any]:
        require_admin(request)
@@ -7,14 +7,24 @@ from typing import Dict, Any, List, Optional

 from fastapi import APIRouter, HTTPException, Query, Request, UploadFile, File, Form

-from sqlalchemy import func
+from sqlalchemy import case, func, or_
 from core.database import SessionLocal, Document, DocumentVersion
 from core.database import Session as DbSession
 from src.auth_helpers import get_current_user
+from src.constants import MAIL_ATTACHMENTS_DIR

 logger = logging.getLogger(__name__)


+def _get_session_or_404(db, session_id: str, user: Optional[str]):
+    session = db.query(DbSession).filter(DbSession.id == session_id).first()
+    if not session:
+        raise HTTPException(404, "Session not found")
+    if user and session.owner != user:
+        raise HTTPException(404, "Session not found")
+    return session
+
+
 def _aggregate_language_facets(lang_rows):
    """Sum document counts per display language for the library facet.

@@ -30,6 +40,19 @@ def _aggregate_language_facets(lang_rows):
    return out


+def _library_language_for_document(doc: Document) -> str:
+    """Return the display language used by the document library.
+
+    PDF documents are stored as markdown wrappers so the editor can preserve
+    extracted text, form fields, and annotations. The library should still
+    identify them as PDFs instead of exposing that internal wrapper format.
+    """
+    from src.pdf_form_doc import find_source_upload_id
+
+    if find_source_upload_id(doc.current_content or ""):
+        return "pdf"
+    return doc.language or "text"
+

 from routes.document_helpers import (
    DocumentCreate, DocumentUpdate, DocumentPatch,
@@ -69,17 +92,12 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
            # the doc is owner-stamped, so it lives in the library on its own.
            session = None
            if req.session_id:
-                session = db.query(DbSession).filter(DbSession.id == req.session_id).first()
-                if not session:
-                    raise HTTPException(404, "Session not found")
                # Match the lenient ownership model the rest of the app uses
                # (see _owner_filter): only block when an AUTHENTICATED user is
                # writing into a DIFFERENT user's session. In single-user /
-                # unconfigured / localhost-bypass mode the middleware leaves
-                # current_user unset (None), and those sessions are already
-                # served freely everywhere else.
-                if user and session.owner and session.owner != user:
-                    raise HTTPException(403, "Cannot create document in another user's session")
+                # unconfigured / localhost-bypass mode, falsey users preserve
+                # the existing lenient path.
+                session = _get_session_or_404(db, req.session_id, user)

            doc_id = str(uuid.uuid4())
            ver_id = str(uuid.uuid4())
@@ -90,10 +108,10 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
            # to markdown for prose.
            language = req.language
            if not language:
-                from src.tool_implementations import _looks_like_email_document, _sniff_doc_language
+                from src.agent_tools.document_tools import _looks_like_email_document, _sniff_doc_language
                language = _sniff_doc_language(req.content)
            else:
-                from src.tool_implementations import _looks_like_email_document
+                from src.agent_tools.document_tools import _looks_like_email_document
            if _looks_like_email_document(req.content, req.title):
                language = "email"

@@ -171,11 +189,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
        if session_id:
            db = SessionLocal()
            try:
-                sess = db.query(DbSession).filter(DbSession.id == session_id).first()
-                if not sess:
-                    raise HTTPException(404, "Session not found")
-                if user and sess.owner and sess.owner != user:
-                    raise HTTPException(403, "Cannot import into another user's session")
+                _get_session_or_404(db, session_id, user)
            finally:
                db.close()

@@ -198,7 +212,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:

        title = os.path.splitext(meta.get("original_name") or meta.get("name") or upload_id)[0]
        try:
-            body_text = strip_pdf_content_marker(_process_pdf(pdf_path))
+            body_text = strip_pdf_content_marker(_process_pdf(pdf_path, owner=user))
        except Exception:
            body_text = None

@@ -260,18 +274,29 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
        db = SessionLocal()
        try:
            from sqlalchemy import or_
+            pdf_marker_cond = or_(
+                Document.current_content.like('%<!-- pdf_source upload_id="%'),
+                Document.current_content.like('%<!-- pdf_form_source upload_id="%'),
+            )
+            library_language_expr = case(
+                (pdf_marker_cond, "pdf"),
+                (Document.language.is_(None), "text"),
+                else_=Document.language,
+            )
            # Archived view shows ONLY archived docs; the default view excludes
            # them (NULL = legacy rows that predate the column = not archived).
            _arch_cond = (Document.archived == True) if archived else or_(
                Document.archived == False, Document.archived.is_(None))
-            # Language facet counts (owner-filtered)
+            # Language facet counts (owner-filtered). PDF documents are stored
+            # as markdown wrappers, so group by the library display language
+            # instead of the raw stored language.
            lang_q = (
-                db.query(Document.language, func.count(Document.id))
+                db.query(library_language_expr, func.count(Document.id))
                .outerjoin(DbSession, Document.session_id == DbSession.id)
                .filter(Document.is_active == True).filter(_arch_cond)
            )
            lang_q = _owner_session_filter(lang_q, user)
-            lang_rows = lang_q.group_by(Document.language).all()
+            lang_rows = lang_q.group_by(library_language_expr).all()
            languages = _aggregate_language_facets(lang_rows)

            # Session count (owner-filtered)
@@ -303,12 +328,17 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
                        Document.title.ilike(term) | Document.current_content.ilike(term)
                    )

-            # Language filter
+            # Language filter. "pdf" is a display language derived from the
+            # source marker; "markdown" excludes those wrappers.
            if language:
                if language == "text":
                    q = q.filter((Document.language == None) | (Document.language == "text"))
+                elif language == "pdf":
+                    q = q.filter(pdf_marker_cond)
                else:
                    q = q.filter(Document.language == language)
+                    if language == "markdown":
+                        q = q.filter(~pdf_marker_cond)

            # Total before pagination
            total = q.count()
@@ -332,7 +362,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
                    "session_id": doc.session_id,
                    "session_name": session_name,
                    "title": doc.title,
-                    "language": doc.language or "text",
+                    "language": _library_language_for_document(doc),
                    "preview": (doc.current_content or "")[:500],
                    "version_count": doc.version_count,
                    "created_at": (doc.created_at.isoformat() + "Z") if doc.created_at else None,
@@ -359,18 +389,17 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
        try:
            if not user:
                raise HTTPException(403, "Authentication required")
-            session = db.query(DbSession).filter(DbSession.id == session_id).first()
            # v2 review HIGH-9: raise 403 explicitly when the caller
            # can't see this session, instead of returning [] which the
            # UI treats identically to "no docs" and silently masks
            # auth failures.
-            if not session:
-                raise HTTPException(404, "Session not found")
-            if user and session.owner and session.owner != user:
-                raise HTTPException(403, "Access denied")
-            docs = db.query(Document).filter(
+            _get_session_or_404(db, session_id, user)
+            q = db.query(Document).filter(
                Document.session_id == session_id
-            ).order_by(Document.created_at.desc()).all()
+            )
+            if user:
+                q = q.filter(or_(Document.owner == user, Document.owner.is_(None)))
+            docs = q.order_by(Document.created_at.desc()).all()
            return [_doc_to_dict(d) for d in docs]
        finally:
            db.close()
@@ -437,7 +466,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
                raise HTTPException(404, "Source PDF could not be located")

            try:
-                body_text = strip_pdf_content_marker(_process_pdf(pdf_path))
+                body_text = strip_pdf_content_marker(_process_pdf(pdf_path, owner=user))
            except Exception as e:
                logger.error(f"extract_pdf_text failed for {pdf_path}: {e}")
                raise HTTPException(500, f"Extraction failed: {e}")
@@ -474,7 +503,8 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
        user = get_current_user(request)
        try:
            data = await request.json()
-        except Exception:
+        except Exception as e:
+            logger.warning("Failed to parse export request body, defaulting to empty", exc_info=e)
            data = {}
        ids = data.get("ids") or []
        if not ids:
@@ -606,16 +636,18 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
                doc.language = req.language
            if req.session_id is not None:
                # Empty string = unlink from session
+                if req.session_id:
+                    _get_session_or_404(db, req.session_id, user)
                doc.session_id = req.session_id if req.session_id else None
                if not req.session_id:
                    # Tab closed / doc detached from its session — drop the
                    # in-memory active-doc pointer so the last-resort injection
                    # path doesn't re-surface this doc in a later chat (#1160).
                    try:
-                        from src.tool_implementations import clear_active_document
+                        from src.agent_tools.document_tools import clear_active_document
                        clear_active_document(doc_id)
-                    except Exception:
-                        pass
+                    except Exception as e:
+                        logger.warning("Failed to clear active document %r on detach", doc_id, exc_info=e)
            db.commit()
            db.refresh(doc)
            return _doc_to_dict(doc)
@@ -641,7 +673,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
            # Closed/deleted — drop the in-memory active-doc pointer so it isn't
            # re-injected into a later, unrelated chat (#1160).
            try:
-                from src.tool_implementations import clear_active_document
+                from src.agent_tools.document_tools import clear_active_document
                clear_active_document(doc_id)
            except Exception:
                pass
@@ -663,8 +695,9 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
        try:
            # Verify ownership before listing versions
            doc = db.query(Document).filter(Document.id == doc_id).first()
-            if doc:
-                _verify_doc_owner(db, doc, user)
+            if not doc:
+                raise HTTPException(404, "Document not found")
+            _verify_doc_owner(db, doc, user)
            versions = db.query(DocumentVersion).filter(
                DocumentVersion.document_id == doc_id
            ).order_by(DocumentVersion.version_number.desc()).all()
@@ -687,8 +720,9 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
        try:
            # Verify ownership
            doc = db.query(Document).filter(Document.id == doc_id).first()
-            if doc:
-                _verify_doc_owner(db, doc, user)
+            if not doc:
+                raise HTTPException(404, "Document not found")
+            _verify_doc_owner(db, doc, user)
            ver = db.query(DocumentVersion).filter(
                DocumentVersion.document_id == doc_id,
                DocumentVersion.version_number == num,
@@ -853,10 +887,10 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
        from src.llm_core import llm_call_async

        user = get_current_user(request)
-        url, model, headers = resolve_task_endpoint()
+        url, model, headers = resolve_task_endpoint(owner=user or None)
        if not url or not model:
            # Fall back to default endpoint
-            url, model, headers = resolve_endpoint("default")
+            url, model, headers = resolve_endpoint("default", owner=user or None)
        if not url or not model:
            raise HTTPException(500, "No endpoint configured for AI tidy")

@@ -1156,7 +1190,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
        settings = _load_vl_settings()
        vl_model = settings.get("vision_model", "")
        try:
-            url, model_id, headers = _resolve_vl_model(vl_model)
+            url, model_id, headers = _resolve_vl_model(vl_model, owner=user)
        except Exception as e:
            raise HTTPException(503, f"No vision model available: {e}")

@@ -1510,10 +1544,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
        # don't import from a routes file (cycle-prone). Same env override
        # as email_routes (ODYSSEUS_MAIL_ATTACHMENTS_DIR).
        from pathlib import Path as _Path
-        import os as _os
-        _DATA_DIR = _Path(__file__).resolve().parent.parent / "data"
-        _BASE = _os.environ.get("ODYSSEUS_MAIL_ATTACHMENTS_DIR", str(_DATA_DIR / "mail-attachments"))
-        _COMPOSE_DIR = _Path(_BASE) / "_compose"
+        _COMPOSE_DIR = _Path(MAIL_ATTACHMENTS_DIR) / "_compose"
        _COMPOSE_DIR.mkdir(parents=True, exist_ok=True)

        user = get_current_user(request)
@@ -1629,9 +1660,11 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
            #    context (To/Subject/In-Reply-To/References).
            try:
                from routes.email_routes import _imap, _decode_header
+                from routes.email_helpers import _q
            except Exception:
                _imap = None
                _decode_header = lambda x: x or ""
+                _q = lambda x: x or ""

            to_addr = ""
            from_name = ""
@@ -1641,7 +1674,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
            if _imap:
                try:
                    with _imap(doc.source_email_account_id or None) as conn:
-                        conn.select(doc.source_email_folder, readonly=True)
+                        conn.select(_q(doc.source_email_folder), readonly=True)
                        status, data = conn.fetch(doc.source_email_uid.encode(), "(RFC822.HEADER)")
                    if status == "OK" and data and data[0]:
                        raw_hdr = data[0][1]
@@ -13,6 +13,8 @@ and `email_pollers.py` (the background loops):
 """

 import os
+import base64
+import time
 import imaplib
 import smtplib
 import email as email_mod
@@ -38,6 +40,106 @@ from src.secret_storage import decrypt as _decrypt
 logger = logging.getLogger(__name__)


+def _xoauth2_raw(user: str, access_token: str) -> str:
+    """The SASL XOAUTH2 initial-response string (unencoded).
+
+    Both smtplib.SMTP.auth() and imaplib.IMAP4.authenticate() base64-encode
+    the value their callback returns, so callers pass this raw form — never
+    pre-encoded — to avoid double base64.
+    """
+    return f"user={user}\x01auth=Bearer {access_token}\x01\x01"
+
+
+def _xoauth2_bytes(user: str, access_token: str) -> bytes:
+    """Raw XOAUTH2 bytes for imaplib's authenticate() callback."""
+    return _xoauth2_raw(user, access_token).encode()
+
+
+def make_oauth_state(account_id: str, owner: str) -> str:
+    """Return an HMAC-signed, base64-encoded OAuth state token.
+
+    Encodes account_id + owner + a random nonce, signed with the app secret
+    so the callback can validate that the flow was initiated by an
+    authenticated, owning user (CSRF / state-forgery protection).
+    """
+    import hmac as _hmac, hashlib as _hl, secrets as _sec
+    from src.secret_storage import _load_or_create_key
+    nonce = _sec.token_hex(16)
+    payload = json.dumps({"a": account_id, "o": owner, "n": nonce}, separators=(",", ":"))
+    sig = _hmac.new(_load_or_create_key(), payload.encode(), _hl.sha256).hexdigest()
+    return base64.urlsafe_b64encode(f"{payload}|{sig}".encode()).decode()
+
+
+def verify_oauth_state(state: str) -> dict | None:
+    """Verify an OAuth state token's HMAC signature.
+
+    Returns the decoded payload dict ({"a", "o", "n"}) on success, or None if
+    the token is malformed, tampered, or signed with a different key.
+    """
+    import hmac as _hmac, hashlib as _hl
+    from src.secret_storage import _load_or_create_key
+    try:
+        decoded = base64.urlsafe_b64decode(state.encode()).decode()
+        payload, sig = decoded.rsplit("|", 1)
+        expected = _hmac.new(_load_or_create_key(), payload.encode(), _hl.sha256).hexdigest()
+        if not _hmac.compare_digest(sig, expected):
+            return None
+        return json.loads(payload)
+    except Exception:
+        return None
+
+
+def _refresh_google_token(account_id: str) -> str | None:
+    """Exchange the stored refresh token for a new access token and persist it."""
+    import httpx
+    from core.database import SessionLocal as _SL, EmailAccount as _EA
+    from src.secret_storage import encrypt as _enc, decrypt as _dec
+    client_id = os.environ.get("GOOGLE_OAUTH_CLIENT_ID", "")
+    client_secret = os.environ.get("GOOGLE_OAUTH_CLIENT_SECRET", "")
+    if not client_id or not client_secret:
+        return None
+    db = _SL()
+    try:
+        row = db.get(_EA, account_id)
+        if not row or not row.oauth_refresh_token:
+            return None
+        refresh_token = _dec(row.oauth_refresh_token or "")
+        if not refresh_token:
+            return None
+        resp = httpx.post("https://oauth2.googleapis.com/token", data={
+            "client_id": client_id,
+            "client_secret": client_secret,
+            "refresh_token": refresh_token,
+            "grant_type": "refresh_token",
+        }, timeout=10)
+        resp.raise_for_status()
+        data = resp.json()
+        access_token = data["access_token"]
+        row.oauth_access_token = _enc(access_token)
+        row.oauth_token_expiry = str(int(time.time()) + data.get("expires_in", 3600))
+        db.commit()
+        return access_token
+    except Exception:
+        logger.warning(f"Google token refresh failed for account {account_id}")
+        return None
+    finally:
+        db.close()
+
+
+def _get_valid_google_token(account_id: str, cfg: dict) -> str | None:
+    """Return a valid Google access token, refreshing if expired or missing."""
+    from src.secret_storage import decrypt as _dec
+    access_token = _dec(cfg.get("oauth_access_token") or "")
+    expiry_str = cfg.get("oauth_token_expiry") or ""
+    if access_token and expiry_str:
+        try:
+            if int(expiry_str) - 60 > time.time():
+                return access_token
+        except (ValueError, TypeError):
+            pass
+    return _refresh_google_token(account_id)
+
+
 def _smtp_security_mode(cfg: dict) -> str:
    raw = str(cfg.get("smtp_security") or "").strip().lower()
    if raw in {"ssl", "starttls", "none"}:
@@ -54,23 +156,64 @@ def _send_smtp_message(cfg: dict, from_addr: str, recipients: list[str], message
    port = int(cfg.get("smtp_port") or 465)
    user = cfg.get("smtp_user") or ""
    password = cfg.get("smtp_password") or ""
+
+    def _auth_smtp(smtp):
+        if cfg.get("oauth_provider") == "google":
+            token = _get_valid_google_token(cfg.get("account_id"), cfg)
+            if not token:
+                raise RuntimeError("Google OAuth token unavailable — reconnect the account")
+            smtp.ehlo()
+            smtp.auth("XOAUTH2", lambda challenge=None: _xoauth2_raw(user, token), initial_response_ok=True)
+        elif user and password:
+            smtp.login(user, password)
+
    security = _smtp_security_mode(cfg)

    if security == "ssl":
        with smtplib.SMTP_SSL(host, port, timeout=timeout) as smtp:
-            if user and password:
-                smtp.login(user, password)
+            _auth_smtp(smtp)
            smtp.sendmail(from_addr, recipients, message)
        return

    with smtplib.SMTP(host, port, timeout=timeout) as smtp:
        if security == "starttls":
            smtp.starttls()
-        if user and password:
-            smtp.login(user, password)
+        _auth_smtp(smtp)
        smtp.sendmail(from_addr, recipients, message)


+def _friendly_email_auth_error(protocol: str, host: str, error: object) -> str:
+    """Return a clearer setup error for known provider auth policies."""
+    raw = str(error or "")
+    lower = raw.lower()
+    host_lower = (host or "").lower()
+    microsoft_host = any(
+        marker in host_lower
+        for marker in (
+            "outlook.office365.com",
+            "smtp.office365.com",
+            "office365.com",
+            "outlook.com",
+            "hotmail.com",
+            "live.com",
+        )
+    )
+    microsoft_basic_auth_failure = (
+        "5.7.139" in lower
+        or "basic authentication is disabled" in lower
+        or ("authenticate failed" in lower and microsoft_host)
+        or ("authentication unsuccessful" in lower and microsoft_host)
+    )
+    if microsoft_basic_auth_failure:
+        return (
+            "Microsoft no longer accepts normal mailbox passwords for "
+            "Outlook/Office 365 IMAP/SMTP in most accounts. Odysseus "
+            "does not support Microsoft OAuth/Graph mail yet, so Outlook "
+            "accounts cannot be added with this password form."
+        )
+    return raw[:200]
+
+
 def _strip_think(text: str) -> str:
    """Email-flavored think strip — thin wrapper over the central helper.

@@ -254,16 +397,17 @@ def _cleanup_compose_uploads(tokens) -> None:
            pass


-DATA_DIR = Path(__file__).resolve().parent.parent / "data"
-SETTINGS_FILE = DATA_DIR / "settings.json"
+from src.constants import DATA_DIR as _DATA_DIR, MAIL_ATTACHMENTS_DIR, SETTINGS_FILE as _SETTINGS_FILE, SCHEDULED_EMAILS_DB
+DATA_DIR = Path(_DATA_DIR)
+SETTINGS_FILE = Path(_SETTINGS_FILE)
 # Override at deploy time via ODYSSEUS_MAIL_ATTACHMENTS_DIR. Defaults to a
 # subdir of the install's data/ tree so the app works out-of-the-box without
 # a hardcoded /home/<user>/ path.
-ATTACHMENTS_DIR = Path(os.environ.get("ODYSSEUS_MAIL_ATTACHMENTS_DIR", str(DATA_DIR / "mail-attachments")))
+ATTACHMENTS_DIR = Path(MAIL_ATTACHMENTS_DIR)
 ATTACHMENTS_DIR.mkdir(parents=True, exist_ok=True)
 COMPOSE_UPLOADS_DIR = ATTACHMENTS_DIR / "_compose"
 COMPOSE_UPLOADS_DIR.mkdir(parents=True, exist_ok=True)
-SCHEDULED_DB = DATA_DIR / "scheduled_emails.db"
+SCHEDULED_DB = Path(SCHEDULED_EMAILS_DB)


 OWNER_SCOPED_EMAIL_CACHE_TABLES = {
@@ -271,6 +415,7 @@ OWNER_SCOPED_EMAIL_CACHE_TABLES = {
    "email_ai_replies",
    "email_calendar_extractions",
    "email_urgency_alerts",
+    "sender_signatures",
 }


@@ -308,6 +453,55 @@ def _ensure_owner_scoped_email_cache_table(conn, table: str, create_sql: str, co
        _lg.getLogger(__name__).warning(f"{table} owner-migration skipped: {_mig_e}")


+def _ensure_sender_signatures_table(conn):
+    """Create/migrate learned sender signatures to an owner-scoped cache."""
+    create_sql = """
+        CREATE TABLE IF NOT EXISTS sender_signatures (
+            from_address TEXT,
+            owner TEXT DEFAULT '',
+            signature_text TEXT,
+            sample_count INTEGER,
+            last_built_at TEXT NOT NULL,
+            model_used TEXT,
+            source TEXT,
+            PRIMARY KEY (from_address, owner)
+        )
+    """
+    conn.execute(create_sql)
+    try:
+        info = conn.execute("PRAGMA table_info(sender_signatures)").fetchall()
+        cols = [r[1] for r in info]
+        pk_cols = [r[1] for r in sorted((r for r in info if r[5]), key=lambda r: r[5])]
+        if "owner" in cols and pk_cols == ["from_address", "owner"]:
+            return
+
+        conn.execute("ALTER TABLE sender_signatures RENAME TO sender_signatures__old")
+        conn.execute(create_sql)
+        old_cols = [r[1] for r in conn.execute("PRAGMA table_info(sender_signatures__old)").fetchall()]
+        copy_cols = [
+            c for c in (
+                "from_address",
+                "signature_text",
+                "sample_count",
+                "last_built_at",
+                "model_used",
+                "source",
+            )
+            if c in old_cols
+        ]
+        source_owner = "COALESCE(owner, '')" if "owner" in old_cols else "''"
+        conn.execute(
+            f"INSERT OR IGNORE INTO sender_signatures "
+            f"({', '.join([*copy_cols, 'owner'])}) "
+            f"SELECT {', '.join([*copy_cols, source_owner])} "
+            f"FROM sender_signatures__old"
+        )
+        conn.execute("DROP TABLE sender_signatures__old")
+    except Exception as _mig_e:
+        import logging as _lg
+        _lg.getLogger(__name__).warning(f"sender_signatures owner-migration skipped: {_mig_e}")
+
+
 def attachment_extract_dir(folder: str, uid: str) -> Path:
    """Containment-safe extraction directory for an attachment.

@@ -526,20 +720,10 @@ def _init_scheduled_db():
            conn.execute("ALTER TABLE email_boundaries ADD COLUMN turns_json TEXT")
    except Exception:
        pass
-    # Per-sender signature cache. Populated by `learn_sender_signatures`
-    # action: the LLM extracts the common trailing block across N emails
-    # from each sender; the renderer folds it consistently for every
-    # future email from that address.
-    conn.execute("""
-        CREATE TABLE IF NOT EXISTS sender_signatures (
-            from_address TEXT PRIMARY KEY,
-            signature_text TEXT,
-            sample_count INTEGER,
-            last_built_at TEXT NOT NULL,
-            model_used TEXT,
-            source TEXT
-        )
-    """)
+    # Per-sender signature cache. Populated by `learn_sender_signatures`.
+    # Message sender addresses are global, so signatures must be scoped to the
+    # mailbox owner before `/read` returns them to the renderer.
+    _ensure_sender_signatures_table(conn)
    conn.commit()
    conn.close()

@@ -628,10 +812,16 @@ def _get_email_config(account_id: str | None = None, owner: str = "") -> dict:
                    "imap_password": _decrypt(row.imap_password or ""),
                    "imap_starttls": bool(row.imap_starttls),
                    "from_address": row.from_address or row.imap_user or "",
+                    "oauth_provider": row.oauth_provider or "",
+                    "oauth_access_token": row.oauth_access_token or "",
+                    "oauth_refresh_token": row.oauth_refresh_token or "",
+                    "oauth_token_expiry": row.oauth_token_expiry or "",
+                    "display_name": row.display_name or "",
                }
-                if not (cfg["smtp_host"] and cfg["smtp_user"] and cfg["smtp_password"]):
+                is_oauth = bool(cfg.get("oauth_provider"))
+                if not is_oauth and not (cfg["smtp_host"] and cfg["smtp_user"] and cfg["smtp_password"]):
                    logger.warning(f"SMTP not configured for account {row.name!r}")
-                if not (cfg["imap_host"] and cfg["imap_user"] and cfg["imap_password"]):
+                if not is_oauth and not (cfg["imap_host"] and cfg["imap_user"] and cfg["imap_password"]):
                    logger.warning(f"IMAP not configured for account {row.name!r}")
                return cfg
        finally:
@@ -705,7 +895,16 @@ def _open_imap_connection(host: str, port: int, *, starttls: bool, timeout: int
    port = int(port or 993)
    if starttls:
        conn = imaplib.IMAP4(host, port, timeout=timeout)
-        conn.starttls()
+        try:
+            conn.starttls()
+        except Exception:
+            # Don't leak the open plain socket if the STARTTLS upgrade is
+            # rejected; close it before propagating. (#3174)
+            try:
+                conn.shutdown()
+            except Exception:
+                pass
+            raise
    elif port == 993:
        conn = imaplib.IMAP4_SSL(host, port, timeout=timeout)
    else:
@@ -714,12 +913,20 @@ def _open_imap_connection(host: str, port: int, *, starttls: bool, timeout: int
        conn.sock.settimeout(timeout)
    except Exception:
        pass
+    # Raise the IMAP line-length limit from the default 1 MB to 50 MB so that
+    # large mailboxes (tens of thousands of messages) don't crash with
+    # "got more than 1000000 bytes" on UID SEARCH ALL.  (#2883)
+    imaplib._MAXLINE = 50_000_000
    return conn

-def _imap_connect(account_id: str | None = None, owner: str = ""):
+def _imap_connect(account_id: str | None = None, owner: str = "",
+                  timeout: int = _IMAP_TIMEOUT_SECONDS):
    # SECURITY: passing `owner` scopes the fallback config lookup so a brand
    # new user doesn't get connected against another user's default mailbox
    # when they have no account configured.
+    #
+    # `timeout` is overridable so short-lived callers (e.g. the service-health
+    # probe) can impose a tighter budget than the default IMAP timeout.
    cfg = _get_email_config(account_id, owner=owner)
    # Connection mode:
    #   STARTTLS on → plain + upgrade
@@ -732,9 +939,27 @@ def _imap_connect(account_id: str | None = None, owner: str = ""):
        cfg["imap_host"],
        cfg["imap_port"],
        starttls=bool(cfg.get("imap_starttls")),
-        timeout=_IMAP_TIMEOUT_SECONDS,
+        timeout=timeout,
    )
-    conn.login(cfg["imap_user"], cfg["imap_password"])
+    try:
+        if cfg.get("oauth_provider") == "google":
+            token = _get_valid_google_token(cfg.get("account_id"), cfg)
+            if not token:
+                raise RuntimeError("Google OAuth token unavailable — reconnect the account in Settings → Integrations")
+            conn.authenticate("XOAUTH2", lambda x: _xoauth2_bytes(cfg["imap_user"], token))
+        else:
+            conn.login(cfg["imap_user"], cfg["imap_password"])
+    except Exception:
+        # A failed AUTHENTICATE (e.g. an Office 365 app password on an
+        # MFA-enabled tenant, #3174, or an expired/revoked OAuth token)
+        # otherwise orphans the already-connected socket; close it before
+        # propagating so a misconfigured account can't leak one descriptor
+        # per retry / background poller pass.
+        try:
+            conn.shutdown()
+        except Exception:
+            pass
+        raise
    return conn


@@ -798,20 +1023,28 @@ def _imap(account_id: str | None = None, owner: str = ""):
 def _decode_header(raw):
    if not raw:
        return ""
-    parts = email.header.decode_header(raw)
-    decoded = []
-    for data, charset in parts:
-        if isinstance(data, bytes):
-            try:
-                decoded.append(data.decode(charset or "utf-8", errors="replace"))
-            except (LookupError, ValueError):
-                # Unknown/invalid MIME charset (e.g. a malformed or spam header
-                # like =?x-unknown-charset?B?...?=). errors="replace" only covers
-                # byte-decode errors, not codec lookup, so fall back to utf-8.
-                decoded.append(data.decode("utf-8", errors="replace"))
-        else:
-            decoded.append(data)
-    return " ".join(decoded)
+    try:
+        # make_header concatenates per RFC 2047: no spurious space between an
+        # encoded-word and adjacent plain text (plain runs keep their own
+        # whitespace), and the whitespace between two adjacent encoded-words is
+        # dropped. The old " ".join produced "Re:  Jose"-style double spaces on
+        # every non-ASCII subject or sender.
+        return str(email.header.make_header(email.header.decode_header(raw)))
+    except Exception:
+        # Malformed header or unknown/invalid MIME charset (e.g. a spam header
+        # like =?x-unknown-charset?B?...?=) makes make_header raise LookupError;
+        # fall back to a lossy per-part decode. errors="replace" only covers
+        # byte-decode errors, not codec lookup, hence the explicit utf-8 retry.
+        decoded = []
+        for data, charset in email.header.decode_header(raw):
+            if isinstance(data, bytes):
+                try:
+                    decoded.append(data.decode(charset or "utf-8", errors="replace"))
+                except (LookupError, ValueError):
+                    decoded.append(data.decode("utf-8", errors="replace"))
+            else:
+                decoded.append(data)
+        return "".join(decoded)


 def _detect_sent_folder(conn):
@@ -1136,13 +1369,9 @@ def _fetch_sender_thread_context(sender_addr: str,
    if exclude_uid:
        seen_uids.add((exclude_folder or "INBOX", str(exclude_uid)))

+    conn = None
    try:
        conn = _imap_connect(account_id, owner=owner)
-    except Exception as e:
-        logger.warning(f"sender-thread-context: imap connect failed: {e}")
-        return ""
-
-    try:
        for folder in ["INBOX", "Sent", "Archive", "Drafts"]:
            if len(blocks) >= limit:
                break
@@ -1209,11 +1438,14 @@ def _fetch_sender_thread_context(sender_addr: str,
                if atts_text:
                    lines.append(atts_text)
                blocks.append("\n".join(lines))
+    except Exception as e:
+        logger.warning(f"sender-thread-context: imap failed: {e}")
    finally:
-        try: conn.close()
-        except Exception: pass
-        try: conn.logout()
-        except Exception: pass
+        if conn:
+            try: conn.close()
+            except Exception: pass
+            try: conn.logout()
+            except Exception: pass

    if not blocks:
        return ""
@@ -1316,6 +1548,7 @@ def _pre_retrieve_context(
        if not terms_list:
            return context_snippets, terms_list

+        ctx_conn = None
        try:
            ctx_conn = _imap_connect(account_id, owner=owner)
            for folder in ["INBOX", "Sent", "Archive", "Drafts"]:
@@ -1352,12 +1585,12 @@ def _pre_retrieve_context(
                    except Exception as _e:
                        logger.warning(f"  search {folder} {term!r} failed: {_e}")
                        continue
-            try:
-                ctx_conn.logout()
-            except Exception:
-                pass
        except Exception as _e:
            logger.warning(f"IMAP context search failed: {_e}")
+        finally:
+            if ctx_conn:
+                try: ctx_conn.logout()
+                except Exception: pass

        try:
            from routes.contacts_routes import _fetch_contacts
@@ -210,7 +210,7 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None
        if auto_cal:
            for sent_name in ("Sent", "INBOX/Sent", "Sent Items", "[Gmail]/Sent Mail"):
                try:
-                    st, _ = conn.select(sent_name, readonly=True)
+                    st, _ = conn.select(_q(sent_name), readonly=True)
                    if st == "OK":
                        folders_to_scan.append(sent_name)
                        break
@@ -1046,7 +1046,7 @@ def _scheduled_poll_once() -> dict:
                try:
                    with _imap(row_account_id, owner=row_owner) as imap:
                        sent_folder = _detect_sent_folder(imap)
-                        imap.append(sent_folder, "\\Seen", None, outer.as_bytes())
+                        imap.append(_q(sent_folder), "\\Seen", None, outer.as_bytes())
                except Exception as e:
                    logger.warning(f"Failed to append scheduled {sid} to Sent: {e}")

@@ -13,7 +13,9 @@ handlers need. The split is mechanical — no behavior change.
 """

 import asyncio
+import os
 import sqlite3 as _sql3
+import time
 import email as email_mod
 import email.header
 import email.utils
@@ -32,9 +34,10 @@ from email.mime.multipart import MIMEMultipart

 from fastapi import APIRouter, Query, UploadFile, File, BackgroundTasks, HTTPException, Depends, Request
 from fastapi.responses import FileResponse
+from src.constants import DATA_DIR

 from src.llm_core import llm_call_async
-from src.upload_limits import read_upload_limited
+from src.upload_limits import read_upload_limited, EMAIL_COMPOSE_UPLOAD_MAX_BYTES

 from routes.email_helpers import (
    _strip_think, _extract_reply, _apply_email_style_mechanics, require_owner, require_user, _assert_owns_account,
@@ -42,11 +45,13 @@ from routes.email_helpers import (
    _load_settings, _save_settings, _get_email_config,
    _send_smtp_message, _smtp_security_mode,
    _IMAP_TIMEOUT_SECONDS, _open_imap_connection,
+    make_oauth_state, verify_oauth_state,
    _imap_connect, _imap, _decode_header, _detect_sent_folder, _detect_drafts_folder,
    _extract_attachment_text, _list_attachments_from_msg,
    _extract_attachment_to_disk, _extract_html, _extract_text,
    _fetch_sender_thread_context, _pre_retrieve_context,
    _EMAIL_REPLY_SYS_PROMPT_BASE, _POOL_HOOKS,
+    _friendly_email_auth_error,
    SendEmailRequest, ExtractStyleRequest,
    ATTACHMENTS_DIR, COMPOSE_UPLOADS_DIR, SCHEDULED_DB,
    attachment_extract_dir, _email_cache_owner_clause,
@@ -56,7 +61,6 @@ from routes.email_pollers import _start_poller
 logger = logging.getLogger(__name__)

 ODYSSEUS_MAIL_ORIGIN = "odysseus-ui"
-EMAIL_COMPOSE_UPLOAD_MAX_BYTES = 25 * 1024 * 1024


 def _email_tag_owner_aliases(account_id: str | None, owner: str = "") -> list[str]:
@@ -75,15 +79,16 @@ def _email_tag_owner_aliases(account_id: str | None, owner: str = "") -> list[st
                        cfg.get("smtp_user") or "",
                        cfg.get("from_address") or "",
                    ])
-                except Exception:
+                except Exception as _e:
+                    logger.warning("Failed to resolve email account alias", exc_info=_e)
                    resolved_account_id = None
            row = db.get(_EA, resolved_account_id) if resolved_account_id else None
            if row:
                aliases.extend([row.owner or "", row.imap_user or "", row.from_address or ""])
        finally:
            db.close()
-    except Exception:
-        pass
+    except Exception as _e:
+        logger.warning("Failed to load email aliases", exc_info=_e)
    out = []
    for a in aliases:
        a = (a or "").strip()
@@ -248,8 +253,45 @@ def _uid_from_fetch_meta(meta_b: bytes) -> str:
    return m.group(1).decode() if m else ""


+_FETCH_SEQ_RE = re.compile(rb"^(\d+)\s+\(")
+
+
+def _group_uid_fetch_records(msg_data) -> list:
+    """Group an imaplib UID FETCH response into per-message (meta, payload).
+
+    imaplib yields an interleaved list: ``(meta, literal)`` tuples for
+    attributes that carry a literal (``RFC822.HEADER {n}`` etc.) plus bare
+    ``bytes`` elements for everything the server sends outside a literal.
+    Where each attribute lands is server-specific: Dovecot sends FLAGS
+    *before* the header literal (so it ends up inside the tuple meta), while
+    Gmail sends FLAGS *after* it, arriving as a bare ``b' FLAGS (\\Seen))'``
+    element. Dropping bare elements therefore silently loses FLAGS on Gmail
+    and every message renders as unread/unflagged.
+
+    A tuple whose meta starts with a sequence number opens a new record;
+    every other part — continuation tuple or bare bytes — is folded into the
+    current record's meta so attribute regexes see the full meta text.
+    Plain ``b')'`` terminators get folded in too, which is harmless.
+    """
+    grouped: list = []  # list of (meta_bytes, payload_bytes_or_None)
+    for part in (msg_data or []):
+        if isinstance(part, tuple):
+            meta_b = part[0] if isinstance(part[0], (bytes, bytearray)) else str(part[0]).encode()
+            if _FETCH_SEQ_RE.match(meta_b):
+                grouped.append((meta_b, part[1]))
+            elif grouped:
+                cur_meta, cur_payload = grouped[-1]
+                grouped[-1] = (cur_meta + b" " + meta_b, cur_payload or part[1])
+        elif isinstance(part, (bytes, bytearray)) and grouped:
+            cur_meta, cur_payload = grouped[-1]
+            grouped[-1] = (cur_meta + b" " + bytes(part), cur_payload)
+    return grouped
+
+
 def _smtp_ready(cfg: dict) -> bool:
-    return bool(cfg.get("smtp_host") and cfg.get("smtp_user") and cfg.get("smtp_password"))
+    if not cfg.get("smtp_host") or not cfg.get("smtp_user"):
+        return False
+    return bool(cfg.get("smtp_password") or cfg.get("oauth_provider"))


 def _resolve_send_config(account_id: str | None = None, owner: str = "") -> dict:
@@ -798,20 +840,11 @@ def setup_email_routes():
                except Exception as e:
                    logger.warning(f"Batch fetch failed, falling back to per-UID: {e}")
                    status, msg_data = "NO", []
-                # imaplib batch responses interleave (meta, payload) tuples and
-                # `b')'` terminators. Group by message: each tuple where the
-                # meta begins with a seq number starts a new message record.
-                seq_re = re.compile(rb'^(\d+)\s+\(')
-                grouped = []  # list of (meta_str, payload_bytes)
-                for part in (msg_data or []):
-                    if isinstance(part, tuple):
-                        meta_b = part[0] if isinstance(part[0], (bytes, bytearray)) else str(part[0]).encode()
-                        if seq_re.match(meta_b):
-                            grouped.append((meta_b, part[1]))
-                        elif grouped:
-                            # continuation of previous message — concatenate meta info if any
-                            cur_meta, cur_payload = grouped[-1]
-                            grouped[-1] = (cur_meta + b" " + meta_b, cur_payload or part[1])
+                # Group the batched response into per-message (meta, payload)
+                # records. Bare bytes parts must be kept: Gmail returns FLAGS
+                # after the header literal as a bare element, and dropping it
+                # rendered every Gmail message as unread/unflagged.
+                grouped = _group_uid_fetch_records(msg_data)

                if status != "OK" and not grouped:
                    conn.logout()
@@ -1060,14 +1093,22 @@ def setup_email_routes():
            return {"contacts": [], "error": "Mail operation failed"}

    @router.get("/search")
-    async def search_emails(
+    # Sync def: the body is blocking IMAP I/O with no awaits. As `async def` it ran
+    # directly on the event loop and stalled the whole app during a search; as a sync
+    # def FastAPI runs it in a threadpool, keeping the loop responsive.
+    def search_emails(
        q: str = Query(""),
        folder: str = Query("INBOX"),
        limit: int = Query(50),
        account_id: str | None = Query(None),
        owner: str = Depends(require_owner),
    ):
-        """Search emails server-side via IMAP SEARCH. Matches subject, from, or body text."""
+        """Search emails server-side via IMAP SEARCH. Matches subject, from, or body text.
+
+        When the caller asks for INBOX and the account has an "All Mail"
+        folder (Gmail does), we transparently swap to All Mail so the
+        search surfaces archived / labelled emails too. Plain IMAP
+        accounts fall back to whatever folder the caller specified."""
        if not q or len(q) < 2:
            return {"emails": [], "total": 0, "query": q}
        # CRLF in q would terminate the IMAP command early — reject defensively.
@@ -1075,7 +1116,27 @@ def setup_email_routes():
            raise HTTPException(400, "Invalid query")
        try:
            with _imap(account_id, owner=owner) as conn:
-                conn.select(_q(folder), readonly=True)
+                # If the user asked for INBOX, try to upgrade to All Mail —
+                # one folder == every email on Gmail-class servers.
+                effective_folder = folder
+                if (folder or "").upper() == "INBOX":
+                    try:
+                        status, folder_lines = conn.list()
+                        if status == "OK" and folder_lines:
+                            for raw in folder_lines:
+                                if isinstance(raw, bytes):
+                                    raw = raw.decode("utf-8", errors="replace")
+                                m = re.match(r"\((?P<flags>[^)]*)\)\s+\"[^\"]*\"\s+(?P<name>.+)", raw)
+                                if not m:
+                                    continue
+                                flags = (m.group("flags") or "").lower()
+                                name = m.group("name").strip().strip('"')
+                                if "\\all" in flags or "all mail" in name.lower():
+                                    effective_folder = name
+                                    break
+                    except Exception:
+                        pass
+                conn.select(_q(effective_folder), readonly=True)

                # Escape backslash and quote for the IMAP-SEARCH quoted-string.
                q_escaped = q.replace('\\', '\\\\').replace('"', '\\"')
@@ -1083,7 +1144,7 @@ def setup_email_routes():

                status, data = _imap_uid_search(conn, search_cmd)
                if status != "OK" or not data[0]:
-                    return {"emails": [], "total": 0, "query": q}
+                    return {"emails": [], "total": 0, "query": q, "folder": effective_folder}

                uid_list = data[0].split()
                total = len(uid_list)
@@ -1097,14 +1158,15 @@ def setup_email_routes():
                            continue
                        raw_header = None
                        flags = ""
-                        for part in msg_data:
-                            if isinstance(part, tuple):
-                                meta = part[0].decode() if isinstance(part[0], bytes) else str(part[0])
-                                if b"RFC822.HEADER" in part[0] if isinstance(part[0], bytes) else "RFC822.HEADER" in meta:
-                                    raw_header = part[1]
-                                flag_match = re.search(r'FLAGS \(([^)]*)\)', meta)
-                                if flag_match:
-                                    flags = flag_match.group(1)
+                        # Same Gmail caveat as the list route: FLAGS may
+                        # arrive after the header literal, so group bare
+                        # parts back into the message meta before scanning.
+                        for meta_b, payload in _group_uid_fetch_records(msg_data):
+                            if payload and b"RFC822.HEADER" in meta_b:
+                                raw_header = payload
+                            flag_match = re.search(rb'FLAGS \(([^)]*)\)', meta_b)
+                            if flag_match:
+                                flags = flag_match.group(1).decode(errors="replace")
                        if not raw_header:
                            continue
                        msg = email_mod.message_from_bytes(raw_header)
@@ -1147,6 +1209,13 @@ def setup_email_routes():
                            "is_flagged": "\\Flagged" in flags,
                            "flags": flags,
                            "has_attachments": has_attachments,
+                            # Stamp the folder so the frontend opens each
+                            # email from the folder it actually lives in
+                            # (the search may have run against All Mail
+                            # even though the caller asked for INBOX),
+                            # otherwise clicks open whatever happens to
+                            # have the same UID in INBOX → wrong email.
+                            "folder": effective_folder,
                        })
                    except Exception as e:
                        logger.warning(f"Error parsing search result {uid}: {e}")
@@ -1246,8 +1315,9 @@ def setup_email_routes():
                try:
                    if sender_addr:
                        _rs = _c.execute(
-                            "SELECT signature_text FROM sender_signatures WHERE from_address = ?",
-                            (sender_addr.lower().strip(),),
+                            f"SELECT signature_text FROM sender_signatures "
+                            f"WHERE from_address = ? AND {owner_clause}",
+                            (sender_addr.lower().strip(), *owner_params),
                        ).fetchone()
                        if _rs and _rs[0]:
                            cached_sender_sig = _rs[0]
@@ -1692,6 +1762,22 @@ def setup_email_routes():
            logger.error(f"Failed to mark unread {uid}: {e}")
            return {"success": False, "error": "Mail operation failed"}

+    @router.post("/flag/{uid}")
+    async def flag_email(uid: str, folder: str = Query("INBOX"), account_id: str | None = Query(None),
+                         on: bool = Query(True), owner: str = Depends(require_owner)):
+        """Toggle the \\Flagged flag (a.k.a. favorite / star) on an email.
+        Pass `on=true` to favorite, `on=false` to unfavorite."""
+        try:
+            with _imap(account_id, owner=owner) as conn:
+                conn.select(_q(folder))
+                if not _store_email_flag(conn, uid, "\\Flagged", add=bool(on)):
+                    return {"success": False, "error": "Email not found"}
+            _invalidate_list_cache(account_id, folder)
+            return {"success": True, "flagged": bool(on)}
+        except Exception as e:
+            logger.error(f"Failed to flag {uid}: {e}")
+            return {"success": False, "error": "Mail operation failed"}
+
    @router.post("/mark-read/{uid}")
    async def mark_read(uid: str, folder: str = Query("INBOX"), account_id: str | None = Query(None), owner: str = Depends(require_owner)):
        """Mark an email as read (set \\Seen flag)."""
@@ -1707,7 +1793,9 @@ def setup_email_routes():
            return {"success": False, "error": "Mail operation failed"}

    @router.post("/archive/{uid}")
-    async def archive_email(uid: str, folder: str = Query("INBOX"), account_id: str | None = Query(None), owner: str = Depends(require_owner)):
+    # Sync def: blocking IMAP I/O with no awaits — see search_emails above. Runs in a
+    # threadpool instead of blocking the event loop.
+    def archive_email(uid: str, folder: str = Query("INBOX"), account_id: str | None = Query(None), owner: str = Depends(require_owner)):
        """Move email to Archive folder."""
        try:
            with _imap(account_id, owner=owner) as conn:
@@ -1939,7 +2027,7 @@ def setup_email_routes():
            outer = MIMEMultipart("alternative")
            body_container = outer

-        outer["From"] = cfg["from_address"]
+        outer["From"] = email.utils.formataddr((cfg.get("display_name") or "", cfg["from_address"]))
        outer["To"] = to
        if cc:
            outer["Cc"] = cc
@@ -2070,6 +2158,79 @@ def setup_email_routes():
            logger.error(f"cancel_scheduled {sid!r} failed: {e}")
            return {"success": False, "error": "Mail operation failed"}

+    # ── Agent send-confirm: list/approve/cancel ──────────────────────────
+    # When `agent_email_confirm` is on, the MCP send_email tool drops the
+    # composed email into scheduled_emails with status='agent_draft' (a
+    # far-future send_at so the poller never picks it up). These endpoints
+    # let the chat UI surface them for the user and either approve (flip
+    # to status='pending' with send_at=now so the poller delivers it) or
+    # cancel (status='cancelled').
+    @router.get("/pending")
+    async def list_pending_agent_drafts(owner: str = Depends(require_owner)):
+        import sqlite3
+        try:
+            conn = sqlite3.connect(SCHEDULED_DB)
+            conn.row_factory = sqlite3.Row
+            # The MCP server can't easily set owner, so it stores '' — fall
+            # back to those rows in addition to the caller's owner.
+            rows = conn.execute(
+                """SELECT id, to_addr, subject, body, created_at, account_id
+                   FROM scheduled_emails
+                   WHERE status = 'agent_draft' AND (owner = ? OR owner = '')
+                   ORDER BY created_at DESC""",
+                (owner or "",),
+            ).fetchall()
+            conn.close()
+            return {"pending": [dict(r) for r in rows]}
+        except Exception as e:
+            logger.error(f"list_pending_agent_drafts failed: {e}")
+            return {"pending": [], "error": "Mail operation failed"}
+
+    @router.post("/pending/{sid}/approve")
+    async def approve_agent_draft(sid: str, owner: str = Depends(require_owner)):
+        """Approve a draft staged by the agent: flip status → pending and
+        backdate send_at so the scheduled-send poller picks it up
+        immediately."""
+        import sqlite3
+        try:
+            conn = sqlite3.connect(SCHEDULED_DB)
+            cur = conn.execute(
+                """UPDATE scheduled_emails
+                   SET status = 'pending', send_at = ?
+                   WHERE id = ? AND status = 'agent_draft' AND (owner = ? OR owner = '')""",
+                (datetime.utcnow().isoformat(), sid, owner or ""),
+            )
+            conn.commit()
+            affected = cur.rowcount
+            conn.close()
+            if not affected:
+                return {"success": False, "error": "Draft not found or already handled"}
+            return {"success": True}
+        except Exception as e:
+            logger.error(f"approve_agent_draft {sid!r} failed: {e}")
+            return {"success": False, "error": "Mail operation failed"}
+
+    @router.delete("/pending/{sid}")
+    async def cancel_agent_draft(sid: str, owner: str = Depends(require_owner)):
+        """Discard a draft the agent staged for approval."""
+        import sqlite3
+        try:
+            conn = sqlite3.connect(SCHEDULED_DB)
+            cur = conn.execute(
+                """UPDATE scheduled_emails SET status = 'cancelled'
+                   WHERE id = ? AND status = 'agent_draft' AND (owner = ? OR owner = '')""",
+                (sid, owner or ""),
+            )
+            conn.commit()
+            affected = cur.rowcount
+            conn.close()
+            if not affected:
+                return {"success": False, "error": "Draft not found or already handled"}
+            return {"success": True}
+        except Exception as e:
+            logger.error(f"cancel_agent_draft {sid!r} failed: {e}")
+            return {"success": False, "error": "Mail operation failed"}
+
    @router.get("/resolve-contact")
    async def resolve_contact(name: str = Query(..., description="Name to search for"), owner: str = Depends(require_owner)):
        """Search Sent folder for a contact by name. Returns matching email addresses."""
@@ -2130,6 +2291,7 @@ def setup_email_routes():
        try:
            cfg = _resolve_send_config(req.account_id, owner=owner)
        except Exception as e:
+            logger.warning(f"No SMTP-capable account resolved: {e}")
            return {"success": False, "error": str(e) or "No SMTP-capable email account configured"}

        # Use 'mixed' if we have attachments, 'alternative' otherwise
@@ -2142,7 +2304,7 @@ def setup_email_routes():
            outer = MIMEMultipart("alternative")
            body_container = outer

-        outer["From"] = cfg["from_address"]
+        outer["From"] = email.utils.formataddr((cfg.get("display_name") or "", cfg["from_address"]))
        outer["To"] = req.to
        if req.cc:
            outer["Cc"] = req.cc
@@ -2193,6 +2355,10 @@ def setup_email_routes():

        _account_id = cfg.get("account_id") or req.account_id  # capture for the IMAP append in the closure
        _in_reply_to = (req.in_reply_to or "").strip()
+        _oauth_provider = cfg.get("oauth_provider") or ""
+        _oauth_access_token = cfg.get("oauth_access_token") or ""
+        _oauth_refresh_token = cfg.get("oauth_refresh_token") or ""
+        _oauth_token_expiry = cfg.get("oauth_token_expiry") or ""

        def _deliver():
            try:
@@ -2203,6 +2369,11 @@ def setup_email_routes():
                        "smtp_security": _smtp_security,
                        "smtp_user": _smtp_user,
                        "smtp_password": _smtp_pw,
+                        "account_id": _account_id,
+                        "oauth_provider": _oauth_provider,
+                        "oauth_access_token": _oauth_access_token,
+                        "oauth_refresh_token": _oauth_refresh_token,
+                        "oauth_token_expiry": _oauth_token_expiry,
                    },
                    _from,
                    _recipients,
@@ -2315,7 +2486,7 @@ def setup_email_routes():
            msg.attach(MIMEText(_draft_html, "html", "utf-8"))
        else:
            msg = MIMEText(req.body, "plain", "utf-8")
-        msg["From"] = cfg["from_address"]
+        msg["From"] = email.utils.formataddr((cfg.get("display_name") or "", cfg["from_address"]))
        msg["To"] = req.to
        if req.cc:
            msg["Cc"] = req.cc
@@ -2583,11 +2754,15 @@ def setup_email_routes():
            source_uid = (data.get("uid") or "").strip()
            source_folder = (data.get("folder") or "INBOX").strip()
            fast_reply = bool(data.get("fast", False))
+            user_hint = (data.get("user_hint") or "").strip()

            if not original_body:
                return {"success": False, "error": "No email body provided"}

-            if message_id:
+            # Skip cache lookup when the caller supplied a user_hint — the
+            # cached generic reply doesn't reflect the instructions and
+            # would silently override them.
+            if message_id and not user_hint:
                try:
                    _c = _sql3.connect(SCHEDULED_DB)
                    owner_clause, owner_params = _email_cache_owner_clause(owner)
@@ -2727,8 +2902,13 @@ def setup_email_routes():
            user_msg = (
                f"Recipient: {to}\nSubject: {subject}\n\n"
                f"Original email and any current draft:\n{original_body[:6000]}\n\n"
-                f"Draft a reply. Return only the reply body text."
            )
+            if user_hint:
+                user_msg += (
+                    f"User's instructions for THIS reply (follow these — they override "
+                    f"defaults like length/tone):\n{user_hint[:2000]}\n\n"
+                )
+            user_msg += "Draft a reply. Return only the reply body text."

            # Build a candidate chain so a stale session-stored API key
            # (the most common cause of "authentication failed" here)
@@ -2904,7 +3084,7 @@ def setup_email_routes():
        from pathlib import Path as _P
        import json as _json
        _slug = "".join(c if (c.isalnum() or c in "-_.@") else "_" for c in (owner or "default"))
-        path = _P(f"data/email_urgency_state_{_slug}.json")
+        path = _P(DATA_DIR) / f"email_urgency_state_{_slug}.json"
        if not path.exists():
            return {"total_unread": 0, "total_urgent": 0, "max_score": 0, "per_uid": {}}
        try:
@@ -2958,6 +3138,8 @@ def setup_email_routes():
                    "from_address": r.from_address or "",
                    "has_imap_password": bool(r.imap_password),
                    "has_smtp_password": bool(r.smtp_password),
+                    "oauth_provider": r.oauth_provider or "",
+                    "display_name": r.display_name or "",
                })
            return {"accounts": out}
        finally:
@@ -2990,6 +3172,7 @@ def setup_email_routes():
                smtp_user=(data.get("smtp_user") or "").strip(),
                smtp_password=_enc(data.get("smtp_password") or ""),
                from_address=(data.get("from_address") or "").strip(),
+                display_name=(data.get("display_name") or "").strip(),
                # SECURITY: stamp the creator so all subsequent reads / mutations
                # can filter by user. Without this every new account leaks to
                # every other user.
@@ -3024,7 +3207,7 @@ def setup_email_routes():
            if not row:
                return {"ok": False, "error": "Account not found"}
            # Simple fields
-            for key in ("name", "imap_host", "imap_user", "smtp_host", "smtp_user", "from_address"):
+            for key in ("name", "imap_host", "imap_user", "smtp_host", "smtp_user", "from_address", "display_name"):
                if key in data:
                    setattr(row, key, (data[key] or "").strip())
            for key in ("imap_port", "smtp_port"):
@@ -3162,7 +3345,7 @@ def setup_email_routes():
                    try: conn.logout()
                    except Exception: pass
            except Exception as e:
-                imap_result = {"ok": False, "error": str(e)[:200]}
+                imap_result = {"ok": False, "error": _friendly_email_auth_error("IMAP", imap_host, e)}

        smtp_host = (body.get("smtp_host") or "").strip()
        if smtp_host:
@@ -3184,7 +3367,7 @@ def setup_email_routes():
                    try: smtp.quit()
                    except Exception: pass
            except Exception as e:
-                smtp_result = {"ok": False, "error": str(e)[:200]}
+                smtp_result = {"ok": False, "error": _friendly_email_auth_error("SMTP", smtp_host, e)}

        return {
            "ok": imap_result["ok"] and (smtp_result is None or smtp_result["ok"]),
@@ -3213,4 +3396,123 @@ def setup_email_routes():
        finally:
            db.close()

+    # ── Google OAuth2 routes ──
+
+    @router.get("/oauth/google/authorize")
+    async def google_oauth_authorize(account_id: str = Query(...), request: Request = None, owner: str = Depends(require_user)):
+        import urllib.parse
+        _assert_owns_account(account_id, owner)
+        client_id = os.environ.get("GOOGLE_OAUTH_CLIENT_ID", "")
+        if not client_id:
+            raise HTTPException(400, "GOOGLE_OAUTH_CLIENT_ID not set — add it to .env")
+        redirect_uri = (
+            os.environ.get("GOOGLE_OAUTH_REDIRECT_URI")
+            or f"http://{request.headers.get('host', 'localhost:7000')}/api/email/oauth/google/callback"
+        )
+        state = make_oauth_state(account_id, owner)
+        params = urllib.parse.urlencode({
+            "client_id": client_id,
+            "redirect_uri": redirect_uri,
+            "response_type": "code",
+            "scope": "https://mail.google.com/ email",
+            "access_type": "offline",
+            "prompt": "consent",
+            "state": state,
+        })
+        from fastapi.responses import RedirectResponse as _RR
+        return _RR(f"https://accounts.google.com/o/oauth2/v2/auth?{params}")
+
+    @router.get("/oauth/google/callback")
+    async def google_oauth_callback(
+        code: str = Query(None),
+        state: str = Query(None),
+        error: str = Query(None),
+        request: Request = None,
+    ):
+        import urllib.parse
+        from fastapi.responses import RedirectResponse as _RR
+        if error:
+            return _RR("/?section=integrations&email_oauth_error=google_error")
+        if not code or not state:
+            return _RR("/?section=integrations&email_oauth_error=missing_code")
+        state_data = verify_oauth_state(state)
+        if not state_data:
+            return _RR("/?section=integrations&email_oauth_error=invalid_state")
+        account_id = state_data.get("a", "")
+        owner = state_data.get("o", "")
+        client_id = os.environ.get("GOOGLE_OAUTH_CLIENT_ID", "")
+        client_secret = os.environ.get("GOOGLE_OAUTH_CLIENT_SECRET", "")
+        redirect_uri = (
+            os.environ.get("GOOGLE_OAUTH_REDIRECT_URI")
+            or f"http://{request.headers.get('host', 'localhost:7000')}/api/email/oauth/google/callback"
+        )
+        import httpx as _httpx
+        try:
+            resp = _httpx.post("https://oauth2.googleapis.com/token", data={
+                "code": code,
+                "client_id": client_id,
+                "client_secret": client_secret,
+                "redirect_uri": redirect_uri,
+                "grant_type": "authorization_code",
+            }, timeout=10)
+            resp.raise_for_status()
+            data = resp.json()
+        except Exception:
+            logger.warning("Google token exchange failed")
+            return _RR("/?section=integrations&email_oauth_error=token_exchange_failed")
+        access_token = data.get("access_token", "")
+        refresh_token = data.get("refresh_token", "")
+        expiry = str(int(time.time()) + data.get("expires_in", 3600))
+        # Fetch the email address from userinfo so we can auto-fill imap_user.
+        email_addr = ""
+        display_name = ""
+        try:
+            ui = _httpx.get("https://www.googleapis.com/oauth2/v1/userinfo",
+                            headers={"Authorization": f"Bearer {access_token}"}, timeout=10)
+            if ui.is_success:
+                ui_data = ui.json()
+                email_addr = ui_data.get("email", "")
+                display_name = ui_data.get("name", "")
+        except Exception:
+            pass
+        from core.database import SessionLocal, EmailAccount
+        from src.secret_storage import encrypt as _enc
+        db = SessionLocal()
+        try:
+            row = db.query(EmailAccount).filter(EmailAccount.id == account_id).first()
+            if not row:
+                return _RR("/?section=integrations&email_oauth_error=account_not_found")
+            # SECURITY: verify the account belongs to the initiating user.
+            if owner and row.owner and row.owner != owner:
+                logger.warning("OAuth callback owner mismatch — rejecting token write")
+                return _RR("/?section=integrations&email_oauth_error=ownership_error")
+            row.oauth_provider = "google"
+            row.oauth_access_token = _enc(access_token)
+            if refresh_token:
+                row.oauth_refresh_token = _enc(refresh_token)
+            row.oauth_token_expiry = expiry
+            # Auto-fill Google IMAP/SMTP settings if not already configured.
+            if not row.imap_host:
+                row.imap_host = "imap.gmail.com"
+                row.imap_port = 993
+                row.imap_starttls = False
+            if not row.smtp_host:
+                row.smtp_host = "smtp.gmail.com"
+                row.smtp_port = 587
+            if email_addr:
+                if not row.imap_user:
+                    row.imap_user = email_addr
+                if not row.smtp_user:
+                    row.smtp_user = email_addr
+                if not row.from_address:
+                    row.from_address = email_addr
+                if not row.name or row.name == row.id:
+                    row.name = email_addr
+            if display_name and not row.display_name:
+                row.display_name = display_name
+            db.commit()
+        finally:
+            db.close()
+        return _RR("/?section=integrations&email_oauth_success=1")
+
    return router
@@ -7,12 +7,12 @@ import logging
 import asyncio
 from pathlib import Path
 from fastapi import APIRouter, HTTPException, Form, Depends
-from core.constants import BASE_DIR
+from core.constants import EMBEDDING_ENDPOINT_FILE, FASTEMBED_CACHE_DIR
 from core.middleware import require_admin

 logger = logging.getLogger(__name__)

-_ENDPOINT_FILE = os.path.join(BASE_DIR, "data", "embedding_endpoint.json")
+_ENDPOINT_FILE = EMBEDDING_ENDPOINT_FILE

 # Track in-progress downloads
 _downloading: dict = {}
@@ -35,13 +35,7 @@ def _cache_dir() -> str:
    default lived in /tmp, which many systems wipe on reboot — forcing a
    full re-download of the embedding model after every restart.
    """
-    env = os.environ.get("FASTEMBED_CACHE_PATH")
-    if env:
-        return env
-    return os.path.join(
-        os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
-        "data", "fastembed_cache",
-    )
+    return FASTEMBED_CACHE_DIR


 def _model_cache_name(hf_source: str) -> str:
@@ -49,19 +43,35 @@ def _model_cache_name(hf_source: str) -> str:
    return "models--" + hf_source.replace("/", "--")


+def _model_cache_path(hf_source: str) -> Path:
+    """Return a confined cache path for a fastembed HF source."""
+    root = Path(_cache_dir()).expanduser().resolve()
+    raw_path = root / _model_cache_name(hf_source)
+    if raw_path.is_symlink():
+        raise ValueError("Model cache path must not be a symlink")
+    path = raw_path.resolve(strict=False)
+    try:
+        path.relative_to(root)
+    except ValueError:
+        raise ValueError("Model cache path escapes cache root")
+    return path
+
+
 def _is_downloaded(hf_source: str) -> bool:
    """Check if a model is already cached."""
-    cache = _cache_dir()
-    model_dir = os.path.join(cache, _model_cache_name(hf_source))
-    if not os.path.isdir(model_dir):
+    try:
+        model_dir = _model_cache_path(hf_source)
+    except ValueError:
+        return False
+    if not model_dir.is_dir():
        return False
    # Check for actual model files (not just empty dir)
-    snapshots = os.path.join(model_dir, "snapshots")
-    if os.path.isdir(snapshots):
-        return any(os.listdir(snapshots))
+    snapshots = model_dir / "snapshots"
+    if snapshots.is_dir():
+        return any(snapshots.iterdir())
    # Also check for blobs (older cache format)
-    blobs = os.path.join(model_dir, "blobs")
-    return os.path.isdir(blobs) and any(os.listdir(blobs))
+    blobs = model_dir / "blobs"
+    return blobs.is_dir() and any(blobs.iterdir())


 def _active_model() -> str:
@@ -119,8 +129,10 @@ def setup_embedding_routes():

            cached_size = None
            if downloaded and hf_src:
-                model_path = os.path.join(_cache_dir(), _model_cache_name(hf_src))
-                cached_size = _dir_size_mb(model_path)
+                try:
+                    cached_size = _dir_size_mb(str(_model_cache_path(hf_src)))
+                except ValueError:
+                    cached_size = None

            result.append({
                "model": m["model"],
@@ -217,8 +229,11 @@ def setup_embedding_routes():
        if not hf_src:
            raise HTTPException(400, "No cache source for this model")

-        model_path = os.path.join(_cache_dir(), _model_cache_name(hf_src))
-        if not os.path.isdir(model_path):
+        try:
+            model_path = _model_cache_path(hf_src)
+        except ValueError as e:
+            raise HTTPException(400, str(e))
+        if not model_path.is_dir():
            return {"deleted": False, "message": "Model not cached"}

        shutil.rmtree(model_path)
@@ -237,7 +252,7 @@ def setup_embedding_routes():
        }

    @router.post("/endpoint")
-    def set_endpoint(url: str = Form(...), model: str = Form("")):
+    def set_endpoint(url: str = Form(...), model: str = Form(""), api_key: str = Form("")):
        """Save a custom embedding endpoint URL."""
        url = url.strip()
        if not url:
@@ -261,6 +276,7 @@ def setup_embedding_routes():
            resp = httpx.post(
                url,
                json={"input": ["test"], "model": model or "test"},
+                headers={"Authorization": f"Bearer {api_key}"} if api_key else {},
                timeout=10,
            )
            resp.raise_for_status()
@@ -271,10 +287,16 @@ def setup_embedding_routes():
        data = {"url": url}
        if model:
            data["model"] = model
+        if api_key:
+            from src.secret_storage import encrypt
+            data["api_key"] = encrypt(api_key)
+
        _save_custom_endpoint(data)
        os.environ["EMBEDDING_URL"] = url
        if model:
            os.environ["EMBEDDING_MODEL"] = model
+        if api_key:
+            os.environ["EMBEDDING_API_KEY"] = api_key

        # Reset the RAG singleton so it picks up the new endpoint
        import src.rag_singleton as _rs
@@ -288,6 +310,16 @@ def setup_embedding_routes():
            reset_http_embed_state()
        except Exception:
            pass
+        try:
+            from src.embedding_lanes import reset_embedding_lane_state
+            reset_embedding_lane_state()
+        except Exception:
+            pass
+        try:
+            from src.tool_index import reset_tool_index
+            reset_tool_index()
+        except Exception:
+            pass

        # Reset ChromaDB client (collections will be recreated with new embeddings)
        try:
@@ -308,6 +340,7 @@ def setup_embedding_routes():
        # Remove from environment
        os.environ.pop("EMBEDDING_URL", None)
        os.environ.pop("EMBEDDING_MODEL", None)
+        os.environ.pop("EMBEDDING_API_KEY", None)

        # Reset the RAG singleton so it falls back to fastembed
        import src.rag_singleton as _rs
@@ -318,6 +351,16 @@ def setup_embedding_routes():
            reset_http_embed_state()
        except Exception:
            pass
+        try:
+            from src.embedding_lanes import reset_embedding_lane_state
+            reset_embedding_lane_state()
+        except Exception:
+            pass
+        try:
+            from src.tool_index import reset_tool_index
+            reset_tool_index()
+        except Exception:
+            pass

        # Reset ChromaDB client
        try:
@@ -16,22 +16,54 @@ from pathlib import Path

 import httpx
 from fastapi import APIRouter
-from fastapi.responses import FileResponse, Response
+from fastapi.responses import Response
+
+from src.constants import EMOJI_CACHE_DIR

 logger = logging.getLogger(__name__)

-_CACHE_DIR = Path(__file__).resolve().parent.parent / "data" / "emoji_cache"
+_CACHE_DIR = Path(EMOJI_CACHE_DIR)
 # OpenMoji "black" set = monochrome line-art SVGs. Filenames are the codepoints
 # in UPPERCASE (FE0F dropped, same as we compute), '-' joined.
 _OPENMOJI_BASE = "https://cdn.jsdelivr.net/npm/openmoji@15.0.0/black/svg"
 # codepoints like "1f600" or "1f468-200d-1f469-200d-1f467" (lowercase hex, '-' joined)
 _CODE_RE = re.compile(r"^[0-9a-f]{2,6}(?:-[0-9a-f]{2,6})*$")
-_SVG_HEADERS = {"Cache-Control": "public, max-age=31536000, immutable"}
+_MAX_SVG_BYTES = 256 * 1024
+_BLOCKED_SVG_RE = re.compile(
+    br"<\s*(?:script|foreignObject|iframe|object|embed|image)\b|"
+    br"\bon[a-z0-9_-]+\s*=",
+    re.IGNORECASE,
+)
+_EXTERNAL_REF_RE = re.compile(
+    br"\b(?:href|xlink:href)\s*=\s*['\"](?:https?:|//|data:|javascript:)",
+    re.IGNORECASE,
+)
+_SVG_SECURITY_HEADERS = {
+    "X-Content-Type-Options": "nosniff",
+    "Content-Security-Policy": "sandbox",
+    "Cross-Origin-Resource-Policy": "same-origin",
+}
+_SVG_HEADERS = {
+    "Cache-Control": "public, max-age=31536000, immutable",
+    **_SVG_SECURITY_HEADERS,
+}
 # Returned when a codepoint is unknown/unreachable: an empty (transparent) SVG,
 # so the CSS mask renders nothing instead of a solid box. Not cached, so a later
 # request can still pick up the real glyph once the CDN is reachable.
 _BLANK_SVG = b'<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1 1"></svg>'
-_BLANK_HEADERS = {"Cache-Control": "no-store"}
+_BLANK_HEADERS = {"Cache-Control": "no-store", **_SVG_SECURITY_HEADERS}
+
+
+def _is_safe_svg(content: bytes) -> bool:
+    if not isinstance(content, bytes) or not content:
+        return False
+    if len(content) > _MAX_SVG_BYTES:
+        return False
+    if b"<svg" not in content[:256].lower():
+        return False
+    if _BLOCKED_SVG_RE.search(content) or _EXTERNAL_REF_RE.search(content):
+        return False
+    return True


 def setup_emoji_routes() -> APIRouter:
@@ -49,14 +81,21 @@ def setup_emoji_routes() -> APIRouter:
        _CACHE_DIR.mkdir(parents=True, exist_ok=True)
        fp = _CACHE_DIR / f"{code}.svg"
        if fp.exists():
-            return FileResponse(fp, media_type="image/svg+xml", headers=_SVG_HEADERS)
+            try:
+                content = fp.read_bytes()
+                if _is_safe_svg(content):
+                    return Response(content, media_type="image/svg+xml", headers=_SVG_HEADERS)
+                fp.unlink(missing_ok=True)
+            except Exception as e:
+                logger.warning("emoji cache read %s failed: %s", code, e)
+            return _blank()

        # First time we've seen this emoji — fetch the OpenMoji black SVG + cache
        # it. OpenMoji filenames are the codepoints uppercased.
        try:
            async with httpx.AsyncClient(timeout=8.0) as client:
                r = await client.get(f"{_OPENMOJI_BASE}/{code.upper()}.svg")
-            if r.status_code == 200 and b"<svg" in r.content[:256]:
+            if r.status_code == 200 and _is_safe_svg(r.content):
                try:
                    fp.write_bytes(r.content)
                except Exception:
@@ -11,6 +11,7 @@ from typing import Dict, Any, Optional
 from pydantic import BaseModel

 from core.database import GalleryImage
+from src.auth_helpers import _auth_disabled

 logger = logging.getLogger(__name__)

@@ -120,19 +121,18 @@ def _image_to_dict(img: GalleryImage, session_name: str = None) -> Dict[str, Any
    }


-def _owner_filter(q, user):
+def _owner_filter(q, user, model_cls=GalleryImage):
    """Apply owner filtering to a gallery query.

-    When auth is disabled (single-user mode) get_current_user returns None
-    and there is no per-user scoping. The main library list and stats already
-    treat None as "show everything" (`if user is not None`), so this helper
-    must too — otherwise the tag/model filter sidebars come back empty and the
-    tag-cleanup endpoints (clear-user-tags, clear-ai-tags, dedupe-tags)
-    silently affect zero rows in the most common self-hosted deployment.
+    ``get_current_user`` returns None both in auth-disabled single-user mode
+    and when auth is enabled but no current user was resolved. Preserve the
+    single-user behavior, but fail closed for auth-enabled null-user states.
    """
-    if user is None:
+    if user is not None:
+        return q.filter(model_cls.owner == user)
+    if _auth_disabled():
        return q
-    return q.filter(GalleryImage.owner == user)
+    return q.filter(False)



@@ -12,8 +12,14 @@ from fastapi import APIRouter, HTTPException, Query, Request

 from core.database import SessionLocal, GalleryImage, GalleryAlbum, ModelEndpoint
 from core.database import Session as DbSession
-from src.auth_helpers import get_current_user, require_privilege
-from src.upload_limits import read_upload_limited
+from src.auth_helpers import get_current_user, owner_filter, require_privilege
+from src.upload_limits import (
+    read_upload_limited,
+    GALLERY_UPLOAD_MAX_BYTES,
+    GALLERY_TRANSFORM_UPLOAD_MAX_BYTES,
+)
+from src.constants import GENERATED_IMAGES_DIR
+from src.optional_deps import patch_realesrgan_torchvision_compat

 from routes.gallery_helpers import (
    GalleryPatch, _extract_exif, _image_to_dict, _owner_filter, _human_size,
@@ -21,17 +27,122 @@ from routes.gallery_helpers import (

 logger = logging.getLogger(__name__)

-GALLERY_UPLOAD_MAX_BYTES = int(os.getenv("ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES", str(100 * 1024 * 1024)))
-GALLERY_TRANSFORM_UPLOAD_MAX_BYTES = int(os.getenv("ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES", str(25 * 1024 * 1024)))
+
+def _current_user_is_admin(request: Request, user: str | None) -> bool:
+    if not user:
+        return False
+    auth_mgr = getattr(request.app.state, "auth_manager", None)
+    is_admin = getattr(auth_mgr, "is_admin", None)
+    if not callable(is_admin):
+        return False
+    try:
+        return bool(is_admin(user))
+    except Exception:
+        return False


 def _sanitize_gallery_filename(filename: str) -> str:
    """Return a local filename safe to join under generated_images."""
-    safe_name = re.sub(r"[^A-Za-z0-9._-]", "_", Path(filename or "").name)[:128]
+    safe_name = re.sub(r"[^A-Za-z0-9._-]", "_", Path(str(filename or "")).name)[:128]
    if not safe_name or safe_name in {".", ".."}:
        safe_name = uuid.uuid4().hex[:12]
    return safe_name

+
+GALLERY_IMAGE_DIR = Path(GENERATED_IMAGES_DIR)
+
+
+def _gallery_image_path(filename: str) -> Path:
+    """Resolve a stored gallery filename without leaving generated_images."""
+    if not isinstance(filename, str):
+        raise HTTPException(400, "Unsafe gallery filename")
+    safe_name = _sanitize_gallery_filename(filename)
+    original = str(filename or "")
+    root = GALLERY_IMAGE_DIR.resolve()
+    path = (GALLERY_IMAGE_DIR / safe_name).resolve()
+    try:
+        if os.path.commonpath([str(root), str(path)]) != str(root):
+            raise ValueError
+    except Exception:
+        raise HTTPException(400, "Unsafe gallery filename")
+    if safe_name != original:
+        raise HTTPException(400, "Unsafe gallery filename")
+    if not path.exists():
+        cwd_root = (Path.cwd() / "data" / "generated_images").resolve()
+        cwd_path = (cwd_root / safe_name).resolve()
+        try:
+            if os.path.commonpath([str(cwd_root), str(cwd_path)]) == str(cwd_root) and cwd_path.exists():
+                return cwd_path
+        except Exception:
+            pass
+    return path
+
+
+def _normalize_image_endpoint_base(url: str) -> str:
+    base = (url or "").strip().rstrip("/")
+    if base.endswith("/v1"):
+        base = base[:-3].rstrip("/")
+    return base
+
+
+def _visible_image_endpoint_query(db, owner: str | None):
+    from src.auth_helpers import owner_filter
+    q = db.query(ModelEndpoint).filter(
+        ModelEndpoint.model_type == "image",
+        ModelEndpoint.is_enabled == True,  # noqa: E712
+    )
+    return owner_filter(q, ModelEndpoint, owner)
+
+
+def _first_visible_image_endpoint(db, owner: str | None):
+    endpoints = _visible_image_endpoint_query(db, owner).all()
+    if owner:
+        for ep in endpoints:
+            if getattr(ep, "owner", None) == owner:
+                return ep
+    return endpoints[0] if endpoints else None
+
+
+def _visible_image_endpoint_for_base(db, base: str, owner: str | None):
+    target = _normalize_image_endpoint_base(base)
+    if not target:
+        return None
+    fallback = None
+    for ep in _visible_image_endpoint_query(db, owner).all():
+        if _normalize_image_endpoint_base(getattr(ep, "base_url", "")) == target:
+            if owner and getattr(ep, "owner", None) == owner:
+                return ep
+            if fallback is None:
+                fallback = ep
+    return fallback
+
+
+async def _fetch_result_image_b64(url: str) -> Optional[str]:
+    """Fetch an image URL returned in an upstream response body, base64-encoded
+    (or None on a non-200).
+
+    The URL comes from the diffusion/OpenAI server's response, not from our own
+    config, so a malicious or compromised endpoint could otherwise steer this
+    fetch at an internal or cloud-metadata address. Validate it the same way the
+    client-supplied endpoint is validated before the first request.
+    """
+    import base64
+    import httpx
+    from src.url_safety import check_outbound_url
+
+    ok, reason = check_outbound_url(
+        url,
+        block_private=os.getenv("IMAGE_BLOCK_PRIVATE_IPS", "false").lower() == "true",
+    )
+    if not ok:
+        raise HTTPException(502, f"Upstream returned an unsafe image URL: {reason}")
+    async with httpx.AsyncClient(timeout=60) as c2:
+        ir = await c2.get(url)
+        if ir.status_code == 200:
+            return base64.b64encode(ir.content).decode()
+    return None
+
+
 def setup_gallery_routes() -> APIRouter:
    router = APIRouter(tags=["gallery"])

@@ -55,6 +166,9 @@ def setup_gallery_routes() -> APIRouter:
        file_hash = hashlib.sha256(content).hexdigest()
        db = SessionLocal()
        try:
+            if album_id and user is not None:
+                _get_or_404_album(db, album_id, user)
+
            # SECURITY: scope the dup-detect to THIS user — otherwise a
            # caller can probe whether someone else uploaded the same
            # file (the response leaks the existing row's id+filename).
@@ -69,7 +183,7 @@ def setup_gallery_routes() -> APIRouter:
                return {"ok": False, "duplicate": True, "filename": existing.filename,
                        "id": existing.id, "message": "Duplicate photo skipped"}

-            img_dir = Path("data/generated_images")
+            img_dir = Path(GENERATED_IMAGES_DIR)
            img_dir.mkdir(parents=True, exist_ok=True)

            ext = file.filename.rsplit(".", 1)[-1].lower() if "." in file.filename else "png"
@@ -118,8 +232,6 @@ def setup_gallery_routes() -> APIRouter:
    @router.post("/api/gallery/{image_id}/replace")
    async def gallery_replace(request: Request, image_id: str):
        """Replace an existing gallery image file with a new one."""
-        from pathlib import Path
-
        user = get_current_user(request)
        db = SessionLocal()
        try:
@@ -135,9 +247,8 @@ def setup_gallery_routes() -> APIRouter:
                raise HTTPException(400, "No image provided")

            content = await read_upload_limited(file, GALLERY_UPLOAD_MAX_BYTES, "Gallery replacement")
-            img_dir = Path("data/generated_images")
-            img_dir.mkdir(parents=True, exist_ok=True)
-            img_path = img_dir / _sanitize_gallery_filename(img.filename)
+            GALLERY_IMAGE_DIR.mkdir(parents=True, exist_ok=True)
+            img_path = _gallery_image_path(img.filename)
            img_path.write_bytes(content)

            # Refresh dimensions in case the editor resized the canvas.
@@ -211,7 +322,7 @@ def setup_gallery_routes() -> APIRouter:
            if not user or img.owner != user:
                raise HTTPException(403, "Not your image")

-            img_path = Path("data/generated_images") / img.filename
+            img_path = _gallery_image_path(img.filename)
            if not img_path.exists():
                raise HTTPException(404, "Image file not found")

@@ -248,7 +359,7 @@ def setup_gallery_routes() -> APIRouter:
        """AI upscale using img2img with the diffusion server."""
        import base64, httpx

-        require_privilege(request, "can_generate_images")
+        user = require_privilege(request, "can_generate_images")
        form = await request.form()
        file = form.get("image")
        if not file: raise HTTPException(400, "No image")
@@ -260,7 +371,7 @@ def setup_gallery_routes() -> APIRouter:
        # Find image endpoint
        db = SessionLocal()
        try:
-            ep = db.query(ModelEndpoint).filter(ModelEndpoint.model_type == "image", ModelEndpoint.is_enabled == True).first()
+            ep = _first_visible_image_endpoint(db, user)
        finally:
            db.close()

@@ -291,7 +402,7 @@ def setup_gallery_routes() -> APIRouter:
        """Style transfer using img2img with the diffusion server."""
        import base64, httpx

-        require_privilege(request, "can_generate_images")
+        user = require_privilege(request, "can_generate_images")
        form = await request.form()
        file = form.get("image")
        prompt = form.get("prompt", "")
@@ -303,7 +414,7 @@ def setup_gallery_routes() -> APIRouter:

        db = SessionLocal()
        try:
-            ep = db.query(ModelEndpoint).filter(ModelEndpoint.model_type == "image", ModelEndpoint.is_enabled == True).first()
+            ep = _first_visible_image_endpoint(db, user)
        finally:
            db.close()

@@ -397,8 +508,7 @@ def setup_gallery_routes() -> APIRouter:
                .outerjoin(DbSession, GalleryImage.session_id == DbSession.id)
                .filter(GalleryImage.is_active == True)
            )
-            if user is not None:
-                q = q.filter(GalleryImage.owner == user)
+            q = _owner_filter(q, user)

            # Search filter (prompt + tags + ai_tags)
            if search:
@@ -500,23 +610,27 @@ def setup_gallery_routes() -> APIRouter:
        db = SessionLocal()
        try:
            q = db.query(GalleryAlbum)
-            if user:
-                q = q.filter(GalleryAlbum.owner == user)
+            q = _owner_filter(q, user, GalleryAlbum)
            albums = q.order_by(GalleryAlbum.created_at.desc()).all()
            result = []
            for a in albums:
-                count = db.query(GalleryImage).filter(
+                _count_q = db.query(GalleryImage).filter(
                    GalleryImage.album_id == a.id, GalleryImage.is_active == True
-                ).count()
+                )
+                _count_q = _owner_filter(_count_q, user)
+                count = _count_q.count()
                cover_url = None
                if a.cover_id:
-                    cover = db.query(GalleryImage).filter(GalleryImage.id == a.cover_id).first()
+                    cover_q = db.query(GalleryImage).filter(GalleryImage.id == a.cover_id)
+                    cover = _owner_filter(cover_q, user).first()
                    if cover:
                        cover_url = f"/api/generated-image/{cover.filename}"
                elif count > 0:
-                    first = db.query(GalleryImage).filter(
+                    _cover_q = db.query(GalleryImage).filter(
                        GalleryImage.album_id == a.id, GalleryImage.is_active == True
-                    ).order_by(GalleryImage.created_at.desc()).first()
+                    )
+                    _cover_q = _owner_filter(_cover_q, user)
+                    first = _cover_q.order_by(GalleryImage.created_at.desc()).first()
                    if first:
                        cover_url = f"/api/generated-image/{first.filename}"
                result.append({
@@ -558,10 +672,9 @@ def setup_gallery_routes() -> APIRouter:
            base = db.query(GalleryImage).filter(GalleryImage.is_active == True)
            size_q = db.query(func.sum(GalleryImage.file_size)).filter(GalleryImage.is_active == True)
            album_q = db.query(GalleryAlbum)
-            if user:
-                base = base.filter(GalleryImage.owner == user)
-                size_q = size_q.filter(GalleryImage.owner == user)
-                album_q = album_q.filter(GalleryAlbum.owner == user)
+            base = _owner_filter(base, user)
+            size_q = _owner_filter(size_q, user)
+            album_q = _owner_filter(album_q, user, GalleryAlbum)
            total = base.count()
            total_size = size_q.scalar() or 0
            fav_count = base.filter(GalleryImage.favorite == True).count()
@@ -589,8 +702,7 @@ def setup_gallery_routes() -> APIRouter:
                GalleryImage.is_active == True,
                (GalleryImage.ai_tags == None) | (GalleryImage.ai_tags == ""),
            )
-            if user:
-                q = q.filter(GalleryImage.owner == user)
+            q = _owner_filter(q, user)
            if album_id:
                q = q.filter(GalleryImage.album_id == album_id)
            untagged = q.count()
@@ -649,7 +761,14 @@ def setup_gallery_routes() -> APIRouter:
            if req.favorite is not None:
                img.favorite = req.favorite
            if req.album_id is not None:
-                img.album_id = req.album_id if req.album_id else None
+                if req.album_id:
+                    # Validate the target album belongs to the caller before
+                    # moving the image into it — mirrors add_to_album, so you
+                    # cannot file your image into another user's album.
+                    _get_or_404_album(db, req.album_id, user)
+                    img.album_id = req.album_id
+                else:
+                    img.album_id = None
            db.commit()
            db.refresh(img)
            return _image_to_dict(img)
@@ -692,11 +811,11 @@ def setup_gallery_routes() -> APIRouter:
            used = set()
            with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf:
                for img in imgs:
-                    src = os.path.join("data", "generated_images", img.filename)
-                    if not os.path.exists(src):
+                    src = _gallery_image_path(img.filename)
+                    if not src.exists():
                        continue
-                    ext = os.path.splitext(img.filename)[1] or ".png"
-                    base = (img.prompt or "").strip() or os.path.splitext(img.filename)[0]
+                    ext = src.suffix or ".png"
+                    base = (img.prompt or "").strip() or src.stem
                    base = re.sub(r"[^\w\-. ]+", "", base)[:60].strip() or img.id
                    name = f"{base}{ext}"
                    i = 1
@@ -817,15 +936,23 @@ def setup_gallery_routes() -> APIRouter:
                raise HTTPException(404, "Image not found")

            img_filename = img.filename
-            # Remove the file from disk
-            img_path = os.path.join("data", "generated_images", img_filename)
-            if os.path.exists(img_path):
-                os.remove(img_path)
-
-            # Soft-delete the record
+            # Soft-delete the record first; the DB is the source of truth.
            img.is_active = False
            db.commit()

+            # Only after the soft-delete commit succeeds do we remove the file.
+            # If the file were deleted first and the commit then failed/rolled
+            # back, the still-active record would point at a missing file.
+            # Best-effort so a missing or locked file can't 500 a delete that
+            # already succeeded logically. Uses the path-confined resolver so a
+            # malformed stored filename can't escape generated_images.
+            try:
+                img_path = _gallery_image_path(img_filename)
+                if img_path.exists():
+                    img_path.unlink()
+            except Exception as e:
+                logger.warning(f"Could not remove gallery image file for {img_filename}: {e}")
+
            # Strip stale chat-history references so the image bubble
            # (and its prompt caption) doesn't come back after a server
            # reboot replays the session. We remove the matching tool
@@ -923,7 +1050,7 @@ def setup_gallery_routes() -> APIRouter:
        the request for /v1/images/edits (multipart, inverted mask). Otherwise
        proxy through to a self-hosted diffusion server's /v1/images/inpaint."""
        import httpx
-        require_privilege(request, "can_generate_images")
+        user = require_privilege(request, "can_generate_images")
        body = await request.json()
        # Use endpoint from request body (editor dropdown) or fall back to DB lookup
        base = (body.pop("_endpoint", "") or "").rstrip("/")
@@ -942,14 +1069,11 @@ def setup_gallery_routes() -> APIRouter:
        if not base:
            db = SessionLocal()
            try:
-                eps = db.query(ModelEndpoint).filter(
-                    ModelEndpoint.is_enabled == True,
-                    ModelEndpoint.model_type == "image",
-                ).all()
-                if not eps:
+                ep = _first_visible_image_endpoint(db, user)
+                if not ep:
                    raise HTTPException(400, "No image generation endpoint configured. Serve a diffusion model via Cookbook first.")
-                base = eps[0].base_url.rstrip("/")
-                api_key = eps[0].api_key
+                base = ep.base_url.rstrip("/")
+                api_key = ep.api_key
            finally:
                db.close()
        else:
@@ -966,10 +1090,12 @@ def setup_gallery_routes() -> APIRouter:
            _target = _norm_url(base)
            db = SessionLocal()
            try:
-                for ep in db.query(ModelEndpoint).all():
-                    if _norm_url(ep.base_url) == _target:
-                        api_key = ep.api_key
-                        break
+                ep = _visible_image_endpoint_for_base(db, _target, user)
+                if ep:
+                    base = (ep.base_url or base).rstrip("/")
+                    api_key = ep.api_key
+                elif user and not _current_user_is_admin(request, user):
+                    raise HTTPException(403, "Choose a registered image endpoint")
            finally:
                db.close()

@@ -1056,10 +1182,7 @@ def setup_gallery_routes() -> APIRouter:
                        if item.get("b64_json"):
                            raw_b64 = item["b64_json"]
                        elif item.get("url"):
-                            async with httpx.AsyncClient(timeout=60) as c2:
-                                img_r = await c2.get(item["url"])
-                                if img_r.status_code == 200:
-                                    raw_b64 = base64.b64encode(img_r.content).decode()
+                            raw_b64 = await _fetch_result_image_b64(item["url"])
                    if not raw_b64:
                        raise HTTPException(502, "OpenAI returned no image")

@@ -1120,8 +1243,8 @@ def setup_gallery_routes() -> APIRouter:
        original and regenerates `strength` fraction. With strength ~0.4
        you get edge blending + lighting unification while keeping the
        composition recognisable."""
-        import httpx, base64 as _b64
-        require_privilege(request, "can_generate_images")
+        import httpx
+        user = require_privilege(request, "can_generate_images")
        body = await request.json()

        image_b64 = body.get("image")
@@ -1148,23 +1271,22 @@ def setup_gallery_routes() -> APIRouter:
        if not base:
            db = SessionLocal()
            try:
-                eps = db.query(ModelEndpoint).filter(
-                    ModelEndpoint.is_enabled == True,
-                    ModelEndpoint.model_type == "image",
-                ).all()
-                if not eps:
+                ep = _first_visible_image_endpoint(db, user)
+                if not ep:
                    raise HTTPException(400, "No image generation endpoint configured.")
-                base = eps[0].base_url.rstrip("/")
-                api_key = eps[0].api_key
+                base = ep.base_url.rstrip("/")
+                api_key = ep.api_key
            finally:
                db.close()
        else:
            db = SessionLocal()
            try:
-                for ep in db.query(ModelEndpoint).all():
-                    if ep.base_url.rstrip("/").removesuffix("/v1").rstrip("/") == base.rstrip("/").removesuffix("/v1").rstrip("/"):
-                        api_key = ep.api_key
-                        break
+                ep = _visible_image_endpoint_for_base(db, base, user)
+                if ep:
+                    base = (ep.base_url or base).rstrip("/")
+                    api_key = ep.api_key
+                elif user and not _current_user_is_admin(request, user):
+                    raise HTTPException(403, "Choose a registered image endpoint")
            finally:
                db.close()

@@ -1297,10 +1419,9 @@ def setup_gallery_routes() -> APIRouter:
                            if item.get("b64_json"):
                                return {"image": item["b64_json"]}
                            if item.get("url"):
-                                async with httpx.AsyncClient(timeout=60) as c2:
-                                    ir = await c2.get(item["url"])
-                                    if ir.status_code == 200:
-                                        return {"image": _b64.b64encode(ir.content).decode()}
+                                img_b64 = await _fetch_result_image_b64(item["url"])
+                                if img_b64:
+                                    return {"image": img_b64}
                    last_err = f"{path}: server returned no image"
                except httpx.ConnectError as e:
                    raise HTTPException(502, f"Can't reach diffusion server at {base}: {e}")
@@ -1316,6 +1437,7 @@ def setup_gallery_routes() -> APIRouter:
    @router.post("/api/image/sharpen")
    async def sharpen_image(request: Request):
        """Apply unsharp-mask sharpening to an image."""
+        require_privilege(request, "can_generate_images")
        body = await request.json()
        image_b64 = body.get("image")
        amount = body.get("amount", 50) / 100.0
@@ -1359,6 +1481,7 @@ def setup_gallery_routes() -> APIRouter:
        img_bytes = base64.b64decode(image_b64)
        src = Image.open(io.BytesIO(img_bytes)).convert("RGB")
        try:
+            patch_realesrgan_torchvision_compat()
            from realesrgan import RealESRGANer
        except ImportError:
            return {"error": "realesrgan not installed. Install it from Cookbook → Dependencies (search 'realesrgan')."}
@@ -1408,6 +1531,7 @@ def setup_gallery_routes() -> APIRouter:
        img_bytes = base64.b64decode(image_b64)
        src = Image.open(io.BytesIO(img_bytes)).convert("RGB")
        try:
+            patch_realesrgan_torchvision_compat()
            from basicsr.archs.rrdbnet_arch import RRDBNet
            from realesrgan import RealESRGANer
        except ImportError:
@@ -1635,9 +1759,10 @@ def setup_gallery_routes() -> APIRouter:
        db = SessionLocal()
        try:
            album = _get_or_404_album(db, album_id, user)
-            db.query(GalleryImage).filter(GalleryImage.album_id == album_id).update(
-                {"album_id": None}, synchronize_session=False
-            )
+            q = db.query(GalleryImage).filter(GalleryImage.album_id == album_id)
+            if user is not None:
+                q = q.filter(GalleryImage.owner == user)
+            q.update({"album_id": None}, synchronize_session=False)
            db.delete(album)
            db.commit()
            return {"ok": True}
@@ -1708,7 +1833,7 @@ def setup_gallery_routes() -> APIRouter:
        try:
            img = _get_or_404_image(db, image_id, user)

-            img_path = Path("data/generated_images") / img.filename
+            img_path = _gallery_image_path(img.filename)
            if not img_path.exists():
                raise HTTPException(404, "Image file not found")

@@ -1726,7 +1851,7 @@ def setup_gallery_routes() -> APIRouter:
                return {"error": "Vision is disabled — enable it in Settings → Vision"}
            configured = vl_settings.get("vision_model", "")
            try:
-                chat_url, model_name, headers = _resolve_vl_model(configured)
+                chat_url, model_name, headers = _resolve_vl_model(configured, owner=user)
            except ValueError:
                return {"error": "No vision model configured — set one in Settings → Vision"}
            if not chat_url:
@@ -1807,4 +1932,3 @@ def setup_gallery_routes() -> APIRouter:
            db.close()

    return router
-
@@ -490,7 +490,13 @@ def setup_history_routes(session_manager) -> APIRouter:
            # Copy messages up to keep_count
            msgs_to_copy = source.history[:keep_count]
            for msg in msgs_to_copy:
-                new_session.add_message(ChatMessage(msg.role, msg.content, msg.metadata))
+                # Copy the metadata dict. Sharing it would let the fork's
+                # persistence (add_message -> _persist_message stamps
+                # _db_id/timestamp onto the dict) mutate the SOURCE session's
+                # in-memory messages, corrupting their _db_id and breaking
+                # edit/delete-by-id on the original conversation.
+                meta = dict(msg.metadata) if isinstance(msg.metadata, dict) else None
+                new_session.add_message(ChatMessage(msg.role, msg.content, meta))
            try:
                from src.event_bus import fire_event
                fire_event("session_created", getattr(source, 'owner', None))
@@ -522,6 +528,8 @@ def setup_history_routes(session_manager) -> APIRouter:
    async def compact_session(request: Request, session_id: str):
        """Manually trigger context compaction for a session."""
        _verify_session_owner(request, session_id)
+        from src.auth_helpers import effective_user
+        owner = effective_user(request)
        try:
            session = session_manager.get_session(session_id)
        except KeyError:
@@ -555,7 +563,7 @@ def setup_history_routes(session_manager) -> APIRouter:
            )

            # Use utility model if available
-            util_url, util_model, util_headers = resolve_endpoint("utility")
+            util_url, util_model, util_headers = resolve_endpoint("utility", owner=owner or None)
            compact_url = util_url or session.endpoint_url
            compact_model = util_model or session.model
            compact_headers = util_headers if util_url else session.headers
@@ -1,7 +1,9 @@
 import re
 from copy import deepcopy

-from fastapi import APIRouter
+from fastapi import APIRouter, HTTPException
+
+from routes._validators import validate_remote_host, validate_ssh_port


 # Backends the manual hardware simulator accepts. Must stay a subset of what
@@ -11,6 +13,14 @@ from fastapi import APIRouter
 _MANUAL_BACKENDS = {"cuda", "rocm", "metal", "cpu_x86", "cpu_arm"}


+def _validate_detection_target(host: str = "", ssh_port: str = "") -> tuple[str, str]:
+    host_value = validate_remote_host(host) or ""
+    port_value = validate_ssh_port(ssh_port) or ""
+    if port_value and not host_value:
+        raise HTTPException(400, "ssh_port requires host")
+    return host_value, port_value
+
+
 def _apply_manual_hardware(system, manual_mode="", manual_gpu_count="", manual_vram_gb="", manual_ram_gb="", manual_backend=""):
    """Manual hardware is a "what if I had this setup" simulator —
    REPLACES the detected hardware entirely instead of adding to it.
@@ -105,10 +115,11 @@ def setup_hwfit_routes():
        """Detect and return current system hardware info. Pass host=user@server for remote.
        fresh=true bypasses the per-host cache (the Rescan button)."""
        from services.hwfit.hardware import detect_system
+        host, ssh_port = _validate_detection_target(host, ssh_port)
        return detect_system(host=host, ssh_port=ssh_port, platform=platform, fresh=fresh)

    @router.get("/models")
-    def get_models(use_case: str = "", sort: str = "score", limit: int = 50, search: str = "", host: str = "", quant: str = "", ctx: str = "", gpu_count: str = "", gpu_group: str = "", ssh_port: str = "", platform: str = "", fresh: bool = False, manual_mode: str = "", manual_gpu_count: str = "", manual_vram_gb: str = "", manual_ram_gb: str = "", manual_backend: str = "", ignore_detected_gpu: bool = False, ignore_detected_ram: bool = False, fit_only: bool = False):
+    def get_models(use_case: str = "", sort: str = "newest", limit: int = 50, search: str = "", host: str = "", quant: str = "", ctx: str = "", gpu_count: str = "", gpu_group: str = "", ssh_port: str = "", platform: str = "", fresh: bool = False, manual_mode: str = "", manual_gpu_count: str = "", manual_vram_gb: str = "", manual_ram_gb: str = "", manual_backend: str = "", ignore_detected_gpu: bool = False, ignore_detected_ram: bool = False, fit_only: bool = False):
        """Rank LLM models against detected hardware and return scored results.
        gpu_count: override GPU count (0 = CPU only, 1-N = simulate N GPUs of the
            active group). gpu_group: index into system.gpu_groups (the homogeneous
@@ -118,6 +129,7 @@ def setup_hwfit_routes():
        from services.hwfit.hardware import detect_system
        from services.hwfit.fit import rank_models
        from services.hwfit.models import get_models, model_catalog_path
+        host, ssh_port = _validate_detection_target(host, ssh_port)
        system = deepcopy(detect_system(host=host, ssh_port=ssh_port, platform=platform, fresh=fresh))
        if system.get("error"):
            return {"system": system, "models": [], "error": system["error"]}
@@ -165,8 +177,14 @@ def setup_hwfit_routes():
            system["gpu_name"] = g["name"]
            system["active_group"] = {**g, "use_count": n}

-        if gpu_count != "":
-            n = int(gpu_count)
+        # Parse the optional count defensively (matches the gpu_group guard
+        # above): a non-numeric query param previously raised ValueError ->
+        # HTTP 500. A malformed value is ignored, same as omitting it.
+        try:
+            n = int(gpu_count) if gpu_count != "" else None
+        except ValueError:
+            n = None
+        if n is not None:
            if n == 0:
                # RAM-only mode: rank against system memory, offload allowed.
                system["has_gpu"] = False
@@ -196,7 +214,24 @@ def setup_hwfit_routes():
        if target_context is not None:
            target_context = max(1024, min(target_context, 1000000))

-        results = rank_models(system, use_case=use_case or None, limit=limit, search=search or None, sort=sort, quant=quant or None, target_context=target_context, fit_only=fit_only)
+        rank_kwargs = {
+            "use_case": use_case or None,
+            "limit": limit,
+            "search": search or None,
+            "sort": sort,
+            "quant": quant or None,
+            "fit_only": fit_only,
+        }
+        if target_context is not None:
+            rank_kwargs["target_context"] = target_context
+        try:
+            import inspect
+            supported = set(inspect.signature(rank_models).parameters)
+            rank_kwargs = {k: v for k, v in rank_kwargs.items() if k in supported}
+        except Exception:
+            rank_kwargs.pop("target_context", None)
+            rank_kwargs.pop("fit_only", None)
+        results = rank_models(system, **rank_kwargs)
        return {"system": system, "models": results}

    @router.get("/profiles")
@@ -212,6 +247,7 @@ def setup_hwfit_routes():
        from services.hwfit.hardware import detect_system
        from services.hwfit.models import get_models
        from services.hwfit.profiles import compute_serve_profiles
+        host, ssh_port = _validate_detection_target(host, ssh_port)
        system = detect_system(host=host, ssh_port=ssh_port, platform=platform, fresh=fresh)
        if system.get("error"):
            return {"system": system, "profiles": [], "error": system["error"]}
@@ -262,6 +298,7 @@ def setup_hwfit_routes():
        """Rank image generation models against detected hardware."""
        from services.hwfit.hardware import detect_system
        from services.hwfit.image_models import rank_image_models
+        host, ssh_port = _validate_detection_target(host, ssh_port)
        system = deepcopy(detect_system(host=host, ssh_port=ssh_port, platform=platform, fresh=fresh))
        if system.get("error"):
            return {"system": system, "models": [], "error": system["error"]}
@@ -13,7 +13,7 @@ import httpx

 from core.database import McpServer, SessionLocal
 from core.middleware import require_admin
-from src.constants import DATA_DIR
+from src.constants import DATA_DIR, MCP_OAUTH_DIR
 from src.mcp_manager import McpManager

 logger = logging.getLogger(__name__)
@@ -23,7 +23,7 @@ router = APIRouter(prefix="/api/mcp", tags=["mcp"])

 def _mcp_oauth_base_dir() -> Path:
    """Directory that may contain OAuth files managed by Odysseus."""
-    return (Path(DATA_DIR) / "mcp_oauth").resolve(strict=False)
+    return Path(MCP_OAUTH_DIR).resolve(strict=False)


 def _resolve_mcp_oauth_path(raw_path, field_name: str) -> str:
@@ -108,6 +108,12 @@ def _load_disabled_map():
        db.close()


+def _mcp_oauth_redirect_uri() -> str:
+    """Shared callback URL for legacy Google and generic MCP OAuth flows."""
+    from src.mcp_oauth import REDIRECT_URI
+    return REDIRECT_URI
+
+
 def setup_mcp_routes(mcp_manager: McpManager):
    """Setup MCP routes with the provided manager."""

@@ -445,9 +451,9 @@ def setup_mcp_routes(mcp_manager: McpManager):
            client_id = keys["client_id"]
            scopes = oauth_cfg.get("scopes", [])

-            # For Desktop App creds, redirect to localhost — the user will
+            # For Desktop App creds, default to localhost — the user will
            # paste the resulting URL back if they're on a different device.
-            redirect_uri = "http://localhost:7000/api/mcp/oauth/callback"
+            redirect_uri = _mcp_oauth_redirect_uri()

            params = {
                "client_id": client_id,
@@ -469,7 +475,7 @@ def setup_mcp_routes(mcp_manager: McpManager):
                return RedirectResponse(auth_url)
            else:
                # Remote device — show paste-back page
-                return HTMLResponse(_oauth_authorize_page(auth_url, server_id, host))
+                return HTMLResponse(_oauth_authorize_page(auth_url, server_id, host, redirect_uri))
        finally:
            db.close()

@@ -536,7 +542,7 @@ def setup_mcp_routes(mcp_manager: McpManager):
            client_id = keys["client_id"]
            client_secret = keys["client_secret"]

-            redirect_uri = "http://localhost:7000/api/mcp/oauth/callback"
+            redirect_uri = _mcp_oauth_redirect_uri()

            async with httpx.AsyncClient() as client:
                resp = await client.post(
@@ -603,13 +609,19 @@ def setup_mcp_routes(mcp_manager: McpManager):
    return router


-def _oauth_authorize_page(auth_url: str, server_id: str, host: str) -> str:
+def _oauth_authorize_page(
+    auth_url: str,
+    server_id: str,
+    host: str,
+    redirect_uri: str = "http://localhost:7000/api/mcp/oauth/callback",
+) -> str:
    """Page with Google sign-in link and URL paste-back form for remote access."""
    # Escape values interpolated into the page: `host` comes from the request
    # Host header and `server_id` from the OAuth state — neither is trusted.
    auth_url = html.escape(auth_url, quote=True)
    server_id = html.escape(server_id, quote=True)
    host = html.escape(host, quote=True)
+    redirect_uri = html.escape(redirect_uri, quote=True)
    return f"""<!DOCTYPE html>
 <html><head>
 <meta charset="UTF-8"><title>Authorize — Odysseus</title>
@@ -654,7 +666,7 @@ def _oauth_authorize_page(auth_url: str, server_id: str, host: str) -> str:
  <div class="divider"></div>
  <form method="POST" action="http://{host}/api/mcp/oauth/exchange/{server_id}">
    <p>Paste the URL from your browser after signing in:</p>
-    <input type="text" name="callback_url" placeholder="http://localhost:7000/api/mcp/oauth/callback?code=..." required>
+    <input type="text" name="callback_url" placeholder="{redirect_uri}?code=..." required>
    <br><button type="submit">Connect</button>
  </form>
 </div></body></html>"""
@@ -29,11 +29,11 @@ from src.llm_core import llm_call_async
 from services.memory.memory_extractor import audit_memories
 from src.auth_helpers import get_current_user, require_user
 from src.endpoint_resolver import resolve_endpoint
-from src.upload_limits import read_upload_limited
+from src.task_endpoint import resolve_task_endpoint
+from src.upload_limits import read_upload_limited, MEMORY_IMPORT_MAX_BYTES

 logger = logging.getLogger(__name__)

-MEMORY_IMPORT_MAX_BYTES = int(os.getenv("ODYSSEUS_MEMORY_IMPORT_MAX_BYTES", str(10 * 1024 * 1024)))

 def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionManager, memory_vector=None):
    """Set up memory-related routes."""
@@ -106,6 +106,13 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
        if memory_manager.find_duplicates(text, user_mem):
            return {"ok": True, "count": len(user_mem), "message": "Memory already exists"}

+        if memory_data.session_id:
+            try:
+                session_obj = session_manager.get_session(memory_data.session_id)
+            except KeyError:
+                raise HTTPException(404, "Session not found")
+            _assert_session_owner(session_obj, user)
+
        new_entry = memory_manager.add_entry(text, memory_data.source, memory_data.category, owner=user)
        if memory_data.session_id:
            new_entry["session_id"] = memory_data.session_id
@@ -164,8 +171,17 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM

            session_id = memory.get("session_id")
            if session_id and session_id in session_manager.sessions:
-                session = session_manager.get_session(session_id)
-                memory["session_name"] = session.name if session else f"Session {session_id[:6]}"
+                try:
+                    session = session_manager.get_session(session_id)
+                    if session:
+                        _assert_session_owner(session, user)
+                    memory["session_name"] = session.name if session else f"Session {session_id[:6]}"
+                except KeyError:
+                    memory["session_name"] = "Unknown"
+                except HTTPException as exc:
+                    if exc.status_code != 404:
+                        raise
+                    memory["session_name"] = "Unknown"
            else:
                memory["session_name"] = "Unknown"

@@ -225,14 +241,18 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
        }
        messages = [system_msg] + sess.get_context_messages()

+        t_url, t_model, t_headers = resolve_task_endpoint(
+            sess.endpoint_url, sess.model, sess.headers, owner=_owner(request)
+        )
+
        try:
            suggestion_text = await llm_call_async(
-                sess.endpoint_url,
-                sess.model,
+                t_url,
+                t_model,
                messages,
                temperature=0.2,
                max_tokens=500,
-                headers=sess.headers,
+                headers=t_headers,
            )
            try:
                suggestions = json.loads(suggestion_text)
@@ -263,42 +283,50 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
        endpoint_url = model = None
        headers = {}

-        # Try default model from settings first
-        settings = _load_settings()
-        ep_id = settings.get("default_endpoint_id", "")
-        default_model = settings.get("default_model", "")
-        if ep_id:
-            db = SessionLocal()
-            try:
-                ep = db.query(ModelEndpoint).filter(
-                    ModelEndpoint.id == ep_id, ModelEndpoint.is_enabled == True
-                ).first()
-                if ep:
-                    base = _normalize_base(ep.base_url)
-                    endpoint_url = build_chat_url(base)
-                    model = default_model
-                    if not model and ep.models:
-                        try:
-                            models = _json.loads(ep.models) if isinstance(ep.models, str) else ep.models
-                            if models:
-                                model = models[0]
-                        except Exception:
-                            pass
-                    if ep.api_key:
-                        headers = {"Authorization": f"Bearer {ep.api_key}"}
-            finally:
-                db.close()
+        # Try utility model from settings first — memory audit is a background
+        # task and should prefer the lighter utility model over the main chat model.
+        from src.task_endpoint import resolve_task_endpoint
+        user = _owner(request)
+        t_url, t_model, t_headers = resolve_task_endpoint(owner=user)
+        if t_url and t_model:
+            endpoint_url, model, headers = t_url, t_model, t_headers
+        else:
+            # Fall back to default model if no task/utility model configured
+            settings = _load_settings()
+            ep_id = settings.get("default_endpoint_id", "")
+            default_model = settings.get("default_model", "")
+            if ep_id:
+                db = SessionLocal()
+                try:
+                    ep = db.query(ModelEndpoint).filter(
+                        ModelEndpoint.id == ep_id, ModelEndpoint.is_enabled == True
+                    ).first()
+                    if ep:
+                        base = _normalize_base(ep.base_url)
+                        endpoint_url = build_chat_url(base)
+                        model = default_model
+                        if not model and ep.models:
+                            try:
+                                models = _json.loads(ep.models) if isinstance(ep.models, str) else ep.models
+                                if models:
+                                    model = models[0]
+                            except Exception:
+                                pass
+                        if ep.api_key:
+                            headers = {"Authorization": f"Bearer {ep.api_key}"}
+                finally:
+                    db.close()

-        # Fall back to session model if no default configured
-        if not endpoint_url and session:
-            try:
-                sess = session_manager.get_session(session)
-                _assert_session_owner(sess, _owner(request))
-                endpoint_url = sess.endpoint_url
-                model = sess.model
-                headers = sess.headers
-            except KeyError:
-                pass
+            # Fall back to session model if no default configured
+            if not endpoint_url and session:
+                try:
+                    sess = session_manager.get_session(session)
+                    _assert_session_owner(sess, _owner(request))
+                    endpoint_url = sess.endpoint_url
+                    model = sess.model
+                    headers = sess.headers
+                except KeyError:
+                    pass

        if not endpoint_url or not model:
            raise HTTPException(400, "No default model configured — set one in Settings")
@@ -345,13 +373,14 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
            try:
                sess = session_manager.get_session(session)
                _assert_session_owner(sess, _owner(request))
-                endpoint_url = sess.endpoint_url
-                model = sess.model
-                headers = sess.headers
+                endpoint_url, model, headers = resolve_task_endpoint(
+                    sess.endpoint_url, sess.model, sess.headers, owner=_owner(request)
+                )
            except KeyError:
-                 raise HTTPException(404, "Session not found — needed for LLM config")
+                logger.warning("Session %s not found, falling back to utility endpoint", session)
+                endpoint_url, model, headers = resolve_endpoint("utility", owner=_owner(request))
        else:
-            endpoint_url, model, headers = resolve_endpoint("utility", owner=_owner(request))
+            endpoint_url, model, headers = resolve_task_endpoint(owner=_owner(request))
    
        if not endpoint_url or not model:
            raise HTTPException(400, "No LLM model configured. Set a default model in Settings.")
@@ -371,7 +400,7 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
                tmp.write(content)
                tmp_path = tmp.name
            try:
-                text = _process_pdf(tmp_path)
+                text = _process_pdf(tmp_path, owner=_owner(request))
            finally:
                os.unlink(tmp_path)
        else:
@@ -4,6 +4,7 @@ import os
 import re
 import uuid
 import json
+import hashlib
 import socket
 import time as _time
 import logging
@@ -25,7 +26,7 @@ from src.endpoint_resolver import (
    build_models_url,
    build_headers,
 )
-from src.auth_helpers import _auth_disabled, owner_filter
+from src.auth_helpers import _auth_disabled, effective_user, owner_filter

 logger = logging.getLogger(__name__)

@@ -122,6 +123,21 @@ def _clear_user_pref_endpoint_refs(all_prefs: dict, ep_id: str) -> int:
    return cleared_users


+def _default_endpoint_needs_assignment(current_default_id: str, enabled_endpoint_ids) -> bool:
+    """Whether the global default chat endpoint should be (re)assigned.
+
+    True when nothing is configured yet, or the configured default no longer
+    resolves to an enabled endpoint (e.g. the user disabled it). Without the
+    second case, adding a new endpoint after disabling the previous default
+    leaves `default_endpoint_id` pointing at the disabled endpoint, so features
+    that read the raw setting (Memory → Tidy) fail with "No default model
+    configured" even though an enabled endpoint exists. See #3586.
+    """
+    if not current_default_id:
+        return True
+    return current_default_id not in enabled_endpoint_ids
+
+
 # Loopback hosts a user might type for a local model server (LM Studio,
 # llama.cpp, vLLM, …). Inside Docker these point at the *container*, not the
 # host the server actually runs on.
@@ -232,6 +248,9 @@ _PROVIDER_CURATED = {
    "zai-coding": [
        "glm-5.1", "glm-5v-turbo", "glm-5-turbo", "glm-4.7", "glm-4.5-air",
    ],
+    "kimi-code": [
+        "kimi-for-coding",
+    ],
    "deepseek": [
        "deepseek-chat", "deepseek-reasoner",
    ],
@@ -282,6 +301,7 @@ _HOST_TO_CURATED = (
    ("fireworks.ai", "fireworks"),
    ("googleapis.com", "google"),
    ("x.ai", "xai"),
+    ("nvidia.com", "nvidia"),
    ("openrouter.ai", "openrouter"),
    ("ollama.com", "ollama"),
 )
@@ -298,6 +318,8 @@ def _match_provider_curated(base_url: str, provider: str) -> str:
    parsed = urlparse(base_url)
    if _host_match(base_url, "z.ai") and "/api/coding" in (parsed.path or ""):
        return "zai-coding"
+    if _host_match(base_url, "kimi.com") and "/coding" in (parsed.path or ""):
+        return "kimi-code"
    for domain, key in _HOST_TO_CURATED:
        if _host_match(base_url, domain):
            return key
@@ -476,10 +498,17 @@ _NON_CHAT_PREFIXES = (
    "dall-e", "tts-", "whisper", "text-embedding", "embedding",
    "davinci", "babbage", "moderation", "omni-moderation",
    "sora", "gpt-image", "chatgpt-image",
+    # embedding / retrieval / non-chat models (common across providers)
+    "snowflake/arctic-embed", "nvidia/nv-embed", "embed",
 )
 _NON_CHAT_CONTAINS = (
    "-realtime", "-transcribe", "-tts", "-codex",
-    "codex-",
+    "codex-", "content-safety", "-safety", "-reward", "nvclip",
+    "kosmos", "fuyu", "deplot", "vila", "neva",
+    "gliner", "riva", "-parse", "-embedqa", "-nemoretriever",
+    "topic-control", "calibration",
+    "ai-synthetic-video", "cosmos-reason2",
+    "bge", "llama-guard",
 )
 _NON_CHAT_EXACT_PREFIXES = (
    "gpt-audio",  # gpt-audio, gpt-audio-mini etc. (not gpt-4o-audio-preview which is chat)
@@ -502,9 +531,71 @@ def _is_chat_model(model_id: str) -> bool:
    return True


+def _delete_orphaned_provider_auth(db, auth_id: Optional[str], exclude_ep_id: Optional[str] = None) -> bool:
+    """Delete a ProviderAuthSession once no endpoint still references it."""
+    if not auth_id:
+        return False
+    from core.database import ProviderAuthSession
+    still_referenced = db.query(ModelEndpoint.id).filter(
+        ModelEndpoint.provider_auth_id == auth_id,
+        ModelEndpoint.id != exclude_ep_id,
+    ).first()
+    if still_referenced is not None:
+        return False
+    auth_row = db.query(ProviderAuthSession).filter(ProviderAuthSession.id == auth_id).first()
+    if auth_row is None:
+        return False
+    db.delete(auth_row)
+    return True
+
+
+def _safe_detect_provider(base_url: str) -> str:
+    """Best-effort provider detection that must not break endpoint probing."""
+    try:
+        return _detect_provider(base_url)
+    except Exception as exc:
+        logger.debug("Provider detection failed for %s: %s", base_url, exc)
+        return ""
+
+
+def _safe_build_models_url(base_url: str) -> str:
+    """Build a /models URL without letting optional provider imports break probes."""
+    try:
+        return build_models_url(base_url)
+    except Exception as exc:
+        logger.debug("Model URL detection failed for %s: %s", base_url, exc)
+        return f"{(base_url or '').rstrip('/')}/models"
+
+
+def _safe_build_headers(api_key: Optional[str], base_url: str) -> dict:
+    """Build auth headers without letting optional provider imports break probes."""
+    try:
+        return build_headers(api_key, base_url)
+    except Exception as exc:
+        logger.debug("Header detection failed for %s: %s", base_url, exc)
+        return {"Authorization": f"Bearer {api_key}"} if api_key else {}
+
+
+def _is_discovery_only_provider(provider: str) -> bool:
+    return provider == "chatgpt-subscription"
+
+
+def _resolve_probe_key(ep) -> Optional[str]:
+    """API key/bearer to probe an endpoint with."""
+    try:
+        from src.endpoint_resolver import resolve_endpoint_runtime
+        _base, key = resolve_endpoint_runtime(ep, owner=getattr(ep, "owner", None))
+        return key
+    except Exception as exc:
+        logger.warning("Probe key resolution failed for %s: %s", getattr(ep, "id", "?"), exc)
+        return None
+
+
 def _probe_single_model(base: str, api_key: str, model_id: str, timeout: int = 10, with_tools: bool = False) -> dict:
    """Send a realistic completion request to a single model. Returns {status, latency_ms, error?}."""
-    provider = _detect_provider(base)
+    provider = _safe_detect_provider(base)
+    if _is_discovery_only_provider(provider):
+        return {"status": "ok", "latency_ms": 0, "skipped": True}
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Say OK"},
@@ -523,12 +614,12 @@ def _probe_single_model(base: str, api_key: str, model_id: str, timeout: int = 1
    elif provider == "ollama":
        from src.llm_core import _build_ollama_payload
        target_url = build_chat_url(base)
-        h = build_headers(api_key, base)
+        h = _safe_build_headers(api_key, base)
        h["Content-Type"] = "application/json"
        payload = _build_ollama_payload(model_id, messages, 0.0, 5, stream=False, tools=_test_tools)
    else:
        target_url = build_chat_url(base)
-        h = build_headers(api_key, base)
+        h = _safe_build_headers(api_key, base)
        h["Content-Type"] = "application/json"
        from src.llm_core import _uses_max_completion_tokens, _restricts_temperature
        _max_key = "max_completion_tokens" if _uses_max_completion_tokens(model_id) else "max_tokens"
@@ -617,10 +708,17 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
    """Probe a base URL's /models endpoint and return list of model IDs.
    For Anthropic, queries their /v1/models API, falling back to hardcoded list."""
    from src.endpoint_resolver import resolve_url
+    from src.llm_core import httpx_get_kimi_aware
    base = resolve_url(_normalize_base(base_url))
-    if _detect_provider(base) == "anthropic":
+    provider = _safe_detect_provider(base)
+    if provider == "chatgpt-subscription":
+        from src.chatgpt_subscription import fetch_available_models
+        if api_key:
+            return fetch_available_models(api_key, timeout=timeout)
+        return []
+    if provider == "anthropic":
        # Try Anthropic's /v1/models endpoint first
-        url = build_models_url(base)
+        url = _safe_build_models_url(base)
        headers = {"anthropic-version": "2023-06-01"}
        if api_key:
            headers["x-api-key"] = api_key
@@ -643,10 +741,10 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
                return []
            logger.warning(f"Anthropic /v1/models failed, using hardcoded list: {e}")
        return list(ANTHROPIC_MODELS)
-    url = build_models_url(base)
-    headers = build_headers(api_key, base)
+    url = _safe_build_models_url(base)
+    headers = _safe_build_headers(api_key, base)
    try:
-        r = httpx.get(url, headers=headers, timeout=timeout, verify=llm_verify())
+        r = httpx_get_kimi_aware(url, headers, timeout=timeout, verify=llm_verify())
        r.raise_for_status()
        data = r.json()
        # OpenAI format: {"data": [{"id": "model-name"}]}
@@ -662,7 +760,12 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
                for _e in _PROVIDER_CURATED.get(_ck, []):
                    if _e not in set(models) and not any(m.startswith(_e) for m in models):
                        models.append(_e)
-            return models
+            if _host_match(base, "kimi.com") and "/coding" in (urlparse(base).path or ""):
+                _ck = _match_provider_curated(base, None)
+                for _e in _PROVIDER_CURATED.get(_ck, []):
+                    if _e not in set(models) and not any(m.startswith(_e) for m in models):
+                        models.append(_e)
+            return [m for m in models if _is_chat_model(m)]
    except httpx.HTTPStatusError as e:
        if api_key:
            status = e.response.status_code if e.response is not None else "unknown"
@@ -686,7 +789,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
            data = r.json()
            models = [m.get("name") or m.get("model") for m in (data.get("models") or []) if m.get("name") or m.get("model")]
            if models:
-                return models
+                return [m for m in models if _is_chat_model(m)]
    except Exception as e:
        logger.debug(f"Ollama /api/tags probe failed for {base}: {e}")
    # Fall back to curated list if the provider has a URL-based match (e.g. z.ai has no /models endpoint)
@@ -702,7 +805,7 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
    """Reachability probe that does not require installed/listed models."""
    from src.endpoint_resolver import resolve_url
    base = resolve_url(_normalize_base(base_url))
-    headers = build_headers(api_key, base)
+    headers = _safe_build_headers(api_key, base)

    # Ollama exposes /v1/models (OpenAI-compatible) AND native /api/version,
    # /api/tags. Probe native paths for Ollama-style endpoints, but avoid using
@@ -754,7 +857,22 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->

    try:
        r = httpx.get(base, headers=headers, timeout=timeout, verify=llm_verify())
-        return _result_from_response(r)
+        result = _result_from_response(r)
+        if result["reachable"]:
+            return result
+        sc = result.get("status_code") or 0
+        if 400 <= sc < 500 and sc not in (401, 403):
+            models_url = _safe_build_models_url(base)
+            try:
+                r2 = httpx.get(models_url, headers=headers, timeout=timeout, verify=llm_verify())
+                result2 = _result_from_response(r2)
+                if result2["reachable"]:
+                    return result2
+            except Exception:
+                pass
+        if sc:
+            return result
+        last_error = result.get("error") or last_error
    except Exception as e:
        last_error = str(e)[:120]

@@ -763,15 +881,52 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->


 def _model_endpoint_error_message(base_url: str, ping: Dict[str, Any] = None) -> str:
-    """Return a provider-aware error message for failed endpoint probes."""
+    """Return a provider-aware error message for failed endpoint probes.
+
+    Surfaces the URL we actually probed and, when the endpoint looks like
+    LM Studio (port 1234 or hostname match), adds a hint about loading a
+    model and confirming the Developer Server is running. The user previously
+    saw a generic "No models found for that provider/key" with no way to
+    tell whether the URL was wrong, the server was down, or the server was
+    reachable but had no model loaded (issue #25).
+    """
    ping = ping or {}
    error = ping.get("error")
+    from src.endpoint_resolver import build_models_url
+    try:
+        probed = build_models_url(base_url) or base_url
+    except Exception:
+        probed = base_url
    parsed = urlparse(base_url)
    host = (parsed.hostname or "").lower()
    is_ollama = parsed.port == 11434 or "ollama" in host or "ollama" in base_url.lower()
+    is_lmstudio = (
+        parsed.port == 1234
+        or "lmstudio" in host
+        or "lm-studio" in host
+        or "lm_studio" in host
+    )
+
+    if is_lmstudio:
+        parts = [
+            "LM Studio is reachable, but no models were reported.",
+            f"Probed {probed}.",
+        ]
+        if error:
+            parts.append(f"Last probe error: {error}.")
+        parts.append(
+            "Open LM Studio, load at least one model, and confirm the "
+            "Developer Server is running on port 1234."
+        )
+        parts.append(
+            "Base URL should be http://localhost:1234/v1 (native) or "
+            "http://host.docker.internal:1234/v1 (Docker)."
+        )
+        return " ".join(parts)

    if is_ollama:
        parts = ["No Ollama models found for that endpoint."]
+        parts.append(f"Probed {probed}.")
        if error:
            parts.append(f"Last probe error: {error}.")
        parts.append("Check that Ollama is running and that the base URL is correct.")
@@ -781,9 +936,9 @@ def _model_endpoint_error_message(base_url: str, ping: Dict[str, Any] = None) ->
        return " ".join(parts)

    if error:
-        return f"No models found for that provider/key. Last probe error: {error}."
+        return f"No models found for that provider/key. Probed {probed}. Last probe error: {error}."

-    return "No models found for that provider/key."
+    return f"No models found for that provider/key. Probed {probed}."


 def _normalize_model_ids(value):
@@ -850,6 +1005,14 @@ def _visible_models(cached_models, hidden_models, pinned_models=None):
    return [m for m in merged if m not in hidden]


+def _api_key_fingerprint(api_key: Optional[str]) -> str:
+    """Stable, non-secret label for distinguishing same-URL credentials."""
+    key = (api_key or "").strip()
+    if not key:
+        return ""
+    return hashlib.sha256(key.encode("utf-8")).hexdigest()[:8]
+
+
 def setup_model_routes(model_discovery):
    router = APIRouter(prefix="/api")

@@ -1028,7 +1191,7 @@ def setup_model_routes(model_discovery):

        for ep in endpoints:
            base = _normalize_base(ep.base_url)
-            provider = _detect_provider(base)
+            provider = _safe_detect_provider(base)
            # Merge cached + pinned models, then filter out hidden ones
            ep_model_type = getattr(ep, "model_type", None) or "llm"
            model_ids = _visible_models(
@@ -1092,20 +1255,24 @@ def setup_model_routes(model_discovery):
        # Require auth; "" is the unconfigured single-user mode, treated as
        # "see everything" by _fetch_models.
        try:
-            from src.auth_helpers import get_current_user as _gcu
-            owner = _gcu(request) or ""
-        except Exception:
-            owner = ""
-        # Reject anonymous in configured deployments — no leaking the model
-        # list to unauthenticated callers.
-        try:
+            if getattr(request.state, "api_token", False):
+                scopes = set(getattr(request.state, "api_token_scopes", []) or [])
+                if "chat" not in scopes:
+                    raise HTTPException(403, "API token is not scoped for chat")
+                if not getattr(request.state, "api_token_owner", None):
+                    raise HTTPException(403, "API token has no owner")
+            owner = effective_user(request) or ""
+
+            # Reject anonymous in configured deployments — no leaking the model
+            # list to unauthenticated callers.
            auth_mgr = getattr(request.app.state, "auth_manager", None)
            if not owner and not _auth_disabled() and auth_mgr is not None and getattr(auth_mgr, "is_configured", False):
                raise HTTPException(401, "Not authenticated")
        except HTTPException:
            raise
-        except Exception:
-            pass
+        except Exception as e:
+            logger.error("Auth gate error in GET /api/models, failing closed: %s", e)
+            raise HTTPException(status_code=500, detail="Internal error")
        # Admins see every endpoint (they manage the global pool); regular
        # users get the owner-scoped view.
        _is_admin = False
@@ -1169,7 +1336,14 @@ def setup_model_routes(model_discovery):
            t0 = _time.time()
            try:
                import asyncio as _asyncio
-                ping = await _asyncio.to_thread(_ping_endpoint, data["base"], data.get("api_key"), 1.5)
+                # Bumped 1.5s → 3.5s. The previous 1.5s budget was clipping
+                # local vLLM endpoints on Tailscale links where the model
+                # server is still loading (Qwen3.5-122B takes 2–3 min to
+                # warm); /v1/models can take 500–2500 ms on a busy box,
+                # which pushed _ping_endpoint's full path-discovery sweep
+                # past the cap and marked the row offline despite the
+                # user actively chatting with it.
+                ping = await _asyncio.to_thread(_ping_endpoint, data["base"], data.get("api_key"), 3.5)
                lat = round((_time.time() - t0) * 1000)
                return {
                    "alive": bool(ping.get("reachable")),
@@ -1207,7 +1381,7 @@ def setup_model_routes(model_discovery):
        results = []
        for ep in endpoints:
            base = _normalize_base(ep.base_url)
-            provider = _detect_provider(base)
+            provider = _safe_detect_provider(base)
            kind = _effective_endpoint_kind(ep, base)
            cached_count = len(_cached_model_ids(ep))
            entry = {
@@ -1386,10 +1560,35 @@ def setup_model_routes(model_discovery):
                # admin-pinned IDs that a probe would never surface.
                status = "online" if (all_models or pinned) else "offline"
                ping = None
+                # When cached_models is empty, do a quick reachability probe.
+                # Bumped 1.0s → 3.5s because the user reported endpoints they
+                # were ACTIVELY chatting with showed "offline" — the previous
+                # 1s timeout was clipping live cloud endpoints (DeepSeek can
+                # take 1.5–2.5s on /v1/models when their region is under load,
+                # vLLM on a remote GPU box behind SSH can also push past 1s).
+                # 3.5s still keeps the picker render snappy in the common
+                # "everything's already cached" path because this branch only
+                # runs for endpoints with an empty cached_models.
                if not all_models and not pinned and r.is_enabled:
-                    ping = _ping_endpoint(r.base_url, r.api_key, timeout=1.0)
+                    ping = _ping_endpoint(r.base_url, r.api_key, timeout=3.5)
                    if ping.get("reachable"):
                        status = "empty"
+                        # Best-effort: if the probe came back reachable, try
+                        # to populate cached_models in the background so the
+                        # NEXT picker load shows "online" instead of "empty".
+                        # Failure here is silent — we already returned the
+                        # "empty" status, and the existing background refresh
+                        # path will eventually fill it in too.
+                        try:
+                            probed = _probe_endpoint(r.base_url, r.api_key, timeout=5)
+                            if probed:
+                                r.cached_models = json.dumps(probed)
+                                db.commit()
+                                all_models = probed
+                                visible = _visible_models(all_models, r.hidden_models, pinned)
+                                status = "online"
+                        except Exception as _refill_err:
+                            logger.debug(f"opportunistic cached_models refill failed for {r.id}: {_refill_err!r}")
                base = _normalize_base(r.base_url)
                kind = _effective_endpoint_kind(r, base)
                results.append({
@@ -1397,6 +1596,7 @@ def setup_model_routes(model_discovery):
                    "name": r.name,
                    "base_url": r.base_url,
                    "has_key": bool(r.api_key),
+                    "api_key_fingerprint": _api_key_fingerprint(r.api_key),
                    "is_enabled": r.is_enabled,
                    "models": visible,
                    "pinned_models": pinned,
@@ -1469,15 +1669,27 @@ def setup_model_routes(model_discovery):
        # re-adding manually-added endpoints under their host:port name.
        from src.auth_helpers import get_current_user as _gcu_dedup
        _caller = _gcu_dedup(request) or None
+        _incoming_api_key = api_key.strip()
        _db_dedup = SessionLocal()
        try:
-            existing = (
+            _same_url_rows = (
                _db_dedup.query(ModelEndpoint)
                .filter(ModelEndpoint.base_url == base_url)
                .filter((ModelEndpoint.owner.is_(None)) | (ModelEndpoint.owner == _caller))
                .order_by(ModelEndpoint.owner.desc())  # prefer owned over shared
-                .first()
+                .all()
            )
+            existing = None
+            _empty_key_existing = None
+            for _candidate in _same_url_rows:
+                _candidate_key = (getattr(_candidate, "api_key", None) or "").strip()
+                if _candidate_key == _incoming_api_key:
+                    existing = _candidate
+                    break
+                if _incoming_api_key and not _candidate_key and _empty_key_existing is None:
+                    _empty_key_existing = _candidate
+            if existing is None and _incoming_api_key and _empty_key_existing is not None:
+                existing = _empty_key_existing
            if existing:
                changed = False
                # Persist any incoming pinned IDs onto the existing row. An
@@ -1526,6 +1738,8 @@ def setup_model_routes(model_discovery):
                    "id": existing.id,
                    "name": existing.name,
                    "base_url": existing.base_url,
+                    "has_key": bool(existing.api_key),
+                    "api_key_fingerprint": _api_key_fingerprint(existing.api_key),
                    "models": _visible_models(
                        existing_models,
                        getattr(existing, "hidden_models", None),
@@ -1579,12 +1793,19 @@ def setup_model_routes(model_discovery):
            )
            db.add(ep)
            db.commit()
-            # Auto-set as default chat endpoint if none configured yet. Seed
-            # the first CHAT model (not raw model_ids[0]) so we don't pin the
-            # global default to an embedding/tts/etc. entry a provider happens
-            # to list first.
+            # Auto-set as default chat endpoint when none is usable yet — either
+            # nothing is configured, or the configured default points at an
+            # endpoint that is now missing/disabled (#3586). Seed the first CHAT
+            # model (not raw model_ids[0]) so we don't pin the global default to
+            # an embedding/tts/etc. entry a provider happens to list first.
            settings = _load_settings()
-            if not settings.get("default_endpoint_id"):
+            enabled_ids = {
+                e.id
+                for e in db.query(ModelEndpoint).filter(
+                    ModelEndpoint.is_enabled == True  # noqa: E712
+                ).all()
+            }
+            if _default_endpoint_needs_assignment(settings.get("default_endpoint_id") or "", enabled_ids):
                from src.endpoint_resolver import _first_chat_model
                settings["default_endpoint_id"] = ep.id
                settings["default_model"] = _first_chat_model(model_ids) or ""
@@ -1599,6 +1820,8 @@ def setup_model_routes(model_discovery):
            "id": ep_id,
            "name": name.strip(),
            "base_url": base_url,
+            "has_key": bool(api_key.strip()),
+            "api_key_fingerprint": _api_key_fingerprint(api_key),
            "models": _merge_model_ids(model_ids, _pinned),
            "pinned_models": _pinned,
            "online": bool(model_ids) or bool(_pinned) or bool(ping.get("reachable")),
@@ -2049,7 +2272,9 @@ def setup_model_routes(model_discovery):
            cleared_user_preferences = _clear_user_prefs_for_endpoint(ep_id)
            cleared_sessions = _clear_sessions_for_endpoint(db, ep.base_url)
            cleared_loaded_sessions = _clear_loaded_sessions_for_endpoint(ep.base_url)
+            auth_id = getattr(ep, "provider_auth_id", None)
            db.delete(ep)
+            cleared_provider_auth = _delete_orphaned_provider_auth(db, auth_id, exclude_ep_id=ep_id)
            db.commit()
            _invalidate_models_cache()
            _local_probe_cache["data"] = None
@@ -2059,6 +2284,7 @@ def setup_model_routes(model_discovery):
                "cleared_user_preferences": cleared_user_preferences,
                "cleared_sessions": cleared_sessions,
                "cleared_loaded_sessions": cleared_loaded_sessions,
+                "cleared_provider_auth": cleared_provider_auth,
            }
        finally:
            db.close()
@@ -10,7 +10,8 @@ from fastapi import APIRouter, HTTPException, Request
 from pydantic import BaseModel

 from core.database import SessionLocal, Note
-from src.auth_helpers import get_current_user
+from src.auth_helpers import require_user
+from src.constants import DATA_DIR
 from sqlalchemy.orm.attributes import flag_modified

 logger = logging.getLogger(__name__)
@@ -95,6 +96,32 @@ def _note_to_dict(note: Note) -> Dict[str, Any]:
    }


+def _reminder_text_from_note(note: Note) -> tuple[str, str]:
+    """Return the reminder title/body from a stored note row."""
+    title = (note.title or "Note reminder").strip() or "Note reminder"
+    if note.items:
+        try:
+            items = json.loads(note.items)
+        except (json.JSONDecodeError, TypeError):
+            items = None
+        if isinstance(items, list):
+            pending: list[str] = []
+            for item in items:
+                if not isinstance(item, dict):
+                    continue
+                if item.get("done") or item.get("checked"):
+                    continue
+                text = str(item.get("text") or "").strip()
+                if text:
+                    pending.append(text)
+            if pending:
+                shown = "\n".join(f"- {text}" for text in pending[:8])
+                extra = f"\n...and {len(pending) - 8} more" if len(pending) > 8 else ""
+                return title, f"Pending ({len(pending)}):\n{shown}{extra}"
+            return title, f"{len(items)} item{'s' if len(items) != 1 else ''}"
+    return title, (note.content or "").strip()[:400]
+
+

 # ---------------------------------------------------------------------------
 # Reminder dispatch — module-level so background tasks (built-in actions)
@@ -114,8 +141,9 @@ async def dispatch_reminder(
    note_id: str,
    owner: str = "",
    queue_browser: bool = True,
+    settings_override: dict | None = None,
 ) -> dict:
-    """Fire a reminder via the configured channel (browser/email/ntfy).
+    """Fire a reminder via the configured channel (browser/email/ntfy/webhook).

    Args:
        title: short headline shown to the user
@@ -129,7 +157,7 @@ async def dispatch_reminder(
    nothing is "sent" synchronously for it — the channel just routes there.
    """
    from src.settings import load_settings
-    settings = load_settings()
+    settings = {**load_settings(), **(settings_override or {})}
    channel = settings.get("reminder_channel", "browser")
    llm_on = bool(settings.get("reminder_llm_synthesis", False))
    title = (title or "").strip()
@@ -143,7 +171,7 @@ async def dispatch_reminder(
            from datetime import datetime as _dt, timezone as _tz, timedelta as _td
            from pathlib import Path as _P
            _slug = "".join(c if (c.isalnum() or c in "-_.@") else "_" for c in (owner or "default"))
-            cache_path = _P(f"data/note_pings_{_slug}.json")
+            cache_path = _P(DATA_DIR) / f"note_pings_{_slug}.json"
            if cache_path.exists():
                cache = _json.loads(cache_path.read_text(encoding="utf-8"))
            last = cache.get(cache_key)
@@ -160,13 +188,14 @@ async def dispatch_reminder(
                # Treat those as browser-only dedupe so email reminders can be
                # retried by the backend scanner after a failed frontend path.
                should_skip = last_dt >= _dt.now(_tz.utc) - _td(minutes=25)
-                if should_skip and channel in ("email", "ntfy"):
+                if should_skip and channel in ("email", "ntfy", "webhook"):
                    should_skip = last_channel == channel
                if should_skip:
                    return {
                        "synthesis": None,
                        "email_sent": False,
                        "ntfy_sent": False,
+                        "webhook_sent": False,
                        "browser_sent": True,
                        "skipped": True,
                    }
@@ -179,14 +208,17 @@ async def dispatch_reminder(
        try:
            from src.endpoint_resolver import resolve_endpoint
            from src.llm_core import llm_call_async
-            url, model, headers = resolve_endpoint("utility")
+            from src.reminder_personas import synthesis_system_prompt
+            url, model, headers = resolve_endpoint("utility", owner=owner or None)
            if not url:
-                url, model, headers = resolve_endpoint("default")
+                url, model, headers = resolve_endpoint("default", owner=owner or None)
            if url and model:
+                persona_id = (settings.get("reminder_llm_persona") or "").strip()
+                sys_prompt = synthesis_system_prompt(persona_id)
                raw = await llm_call_async(
                    url=url, model=model,
                    messages=[
-                        {"role": "system", "content": "You are a reminder assistant. Write a single short, warm, motivating sentence (max 25 words) reminding the user about the note below. Do not add greetings, preamble, or hashtags. Output only the sentence."},
+                        {"role": "system", "content": sys_prompt},
                        {"role": "user", "content": f"Title: {title}\n\n{note_body}".strip()},
                    ],
                    temperature=0.7, max_tokens=200, headers=headers, timeout=30,
@@ -360,6 +392,76 @@ async def dispatch_reminder(
            email_error = str(e) or e.__class__.__name__
            logger.warning(f"Reminder email send failed: {e}")

+    webhook_sent = False
+    webhook_error = ""
+    if channel == "webhook":
+        try:
+            import httpx
+            import json as _wjson
+            from src.integrations import load_integrations
+            # Built-in payload defaults for known presets so users don't have
+            # to configure a template just to use a standard service.
+            _PRESET_TEMPLATE_DEFAULTS = {
+                "discord_webhook": '{"embeds": [{"title": "{{title}}", "description": "{{message}}", "color": 5793266}]}',
+            }
+            intg_id = settings.get("reminder_webhook_integration_id", "").strip()
+            template = settings.get("reminder_webhook_payload_template", "").strip()
+            if not intg_id:
+                webhook_error = "No webhook integration selected"
+            else:
+                intg = next(
+                    (i for i in load_integrations()
+                     if i.get("id") == intg_id and i.get("base_url")),
+                    None,
+                )
+                if not intg:
+                    webhook_error = f"Integration {intg_id!r} not found or missing base URL"
+                else:
+                    # Fall back to a built-in default for known presets so
+                    # users don't have to configure a template for standard
+                    # services like Discord.
+                    if not template:
+                        template = _PRESET_TEMPLATE_DEFAULTS.get(intg.get("preset", ""), "")
+                    if not template:
+                        webhook_error = "No payload template configured"
+                    else:
+                        # Render template: JSON-escape the values so the result
+                        # is always valid JSON regardless of special characters.
+                        # dumps() returns `"value"` — strip outer quotes.
+                        msg = (synthesis or note_body or title or "Reminder")[:4000]
+                        _t = _wjson.dumps(title or "Reminder")[1:-1]
+                        _m = _wjson.dumps(msg)[1:-1]
+                        rendered = template.replace("{{title}}", _t).replace("{{message}}", _m)
+                        hdrs = {"Content-Type": "application/json"}
+                        api_key = intg.get("api_key", "")
+                        auth_type = (intg.get("auth_type") or "none").lower()
+                        if api_key:
+                            if auth_type == "bearer":
+                                hdrs["Authorization"] = f"Bearer {api_key}"
+                            elif auth_type == "header":
+                                hdrs[intg.get("auth_header") or "Authorization"] = api_key
+                        url = intg["base_url"].rstrip("/")
+                        # SSRF guard — matches the pattern used by webhook_routes,
+                        # CalDAV, search, and embeddings. Blocks link-local / metadata
+                        # addresses (169.254.x.x) by default; set
+                        # REMINDER_WEBHOOK_BLOCK_PRIVATE_IPS=true to also block
+                        # RFC-1918 ranges for locked-down deployments.
+                        import os as _os
+                        from src.url_safety import check_outbound_url as _chk
+                        _block = _os.getenv("REMINDER_WEBHOOK_BLOCK_PRIVATE_IPS", "false").lower() == "true"
+                        _ok, _reason = _chk(url, block_private=_block)
+                        if not _ok:
+                            webhook_error = f"Webhook URL rejected: {_reason}"
+                        else:
+                            async with httpx.AsyncClient(timeout=10.0) as client:
+                                resp = await client.post(url, content=rendered.encode(), headers=hdrs)
+                                webhook_sent = resp.is_success
+                                if not webhook_sent:
+                                    webhook_error = f"Webhook returned HTTP {resp.status_code}"
+        except Exception as e:
+            webhook_error = str(e) or e.__class__.__name__
+            logger.warning(f"Reminder webhook send failed: {e}")
+
    ntfy_sent = False
    ntfy_error = ""
    if channel == "ntfy":
@@ -415,7 +517,7 @@ async def dispatch_reminder(
    # second send for the same note within 25 min. Without this, a note
    # whose due_date fires while the user has the app open got TWO emails
    # (frontend-fired here + background-fired by ping_notes 0–5 min later).
-    if (email_sent or ntfy_sent or browser_sent or local_browser_sent) and note_id:
+    if (email_sent or ntfy_sent or webhook_sent or browser_sent or local_browser_sent) and note_id:
        try:
            import json as _json
            from datetime import datetime as _dt, timezone as _tz
@@ -425,13 +527,13 @@ async def dispatch_reminder(
            _STATE = cache_path
            if _STATE is None:
                _slug = "".join(c if (c.isalnum() or c in "-_.@") else "_" for c in (owner or "default"))
-                _STATE = _P(f"data/note_pings_{_slug}.json")
+                _STATE = _P(DATA_DIR) / f"note_pings_{_slug}.json"
            _STATE.parent.mkdir(parents=True, exist_ok=True)
            try:
                _cache = cache or (_json.loads(_STATE.read_text(encoding="utf-8")) if _STATE.exists() else {})
            except Exception:
                _cache = {}
-            sent_channel = "email" if email_sent else "ntfy" if ntfy_sent else "browser"
+            sent_channel = "email" if email_sent else "ntfy" if ntfy_sent else "webhook" if webhook_sent else "browser"
            _cache[cache_key or str(note_id)] = {
                "at": _dt.now(_tz.utc).isoformat(),
                "channel": sent_channel,
@@ -441,11 +543,14 @@ async def dispatch_reminder(
            logger.debug(f"dispatch_reminder: cache write failed: {_e}")

    return {
+        "channel": channel,
        "synthesis": synthesis,
        "email_sent": email_sent,
        "email_error": email_error,
        "ntfy_sent": ntfy_sent,
        "ntfy_error": ntfy_error,
+        "webhook_sent": webhook_sent,
+        "webhook_error": webhook_error,
        "browser_sent": browser_sent or local_browser_sent,
    }

@@ -465,7 +570,33 @@ def setup_note_routes(task_scheduler=None):
    router = APIRouter(prefix="/api/notes", tags=["notes"])

    def _owner(request: Request) -> Optional[str]:
-        return get_current_user(request)
+        # require_user, not bare get_current_user: a request that reaches
+        # these owner-scoped routes with NO identity (auth-middleware
+        # regression, SSRF from a sibling service) must fail closed (401)
+        # when auth is configured — not be treated as the single-user mode
+        # and handed blanket access to every account's notes. The documented
+        # anonymous modes (AUTH_ENABLED=false, LOCALHOST_BYPASS on loopback,
+        # unconfigured first-run) still resolve to None, the single-user
+        # path. fire_reminder below already gated this way; the CRUD routes
+        # did not.
+        return require_user(request) or None
+
+    def _is_admin_or_single_user(request: Request, user: str | None) -> bool:
+        if user == "internal-tool":
+            return True
+        if not user:
+            # require_user() already admitted this request, which only happens
+            # for auth-disabled, loopback-bypass, or unconfigured single-user
+            # modes. There is no separate non-admin account boundary there.
+            return True
+        try:
+            from core.auth import AuthManager
+            auth_mgr = getattr(request.app.state, "auth_manager", None) or AuthManager()
+            if not getattr(auth_mgr, "is_configured", True):
+                return True
+            return bool(auth_mgr.is_admin(user))
+        except Exception:
+            return False

    # --- LIST ---
    @router.get("")
@@ -683,21 +814,52 @@ def setup_note_routes(task_scheduler=None):
        Returns {synthesis, email_sent}.
        """
        # Gate against anonymous callers — LLM synthesis can burn tokens.
-        from src.auth_helpers import require_user as _ru
-        _ru(request)
+        user = require_user(request)
        body = await request.json()
-        note_id = body.get("note_id")
-        title = (body.get("title") or "").strip()
-        note_body = (body.get("body") or "").strip()
+        note_id = str(body.get("note_id") or "").strip()
        if not note_id:
            raise HTTPException(400, "note_id required")

-        # Delegate to the module-level helper so background tasks can reuse
-        # the same dispatch without an HTTP roundtrip + auth cookie.
+        caller = _owner(request)
+        is_test = note_id.startswith("test-")
+        is_admin = _is_admin_or_single_user(request, user or caller)
+        _override: dict = {}
+        if is_test:
+            if not is_admin:
+                raise HTTPException(403, "Admin only")
+            title = (body.get("title") or "Test Reminder").strip() or "Test Reminder"
+            note_body = (body.get("body") or "").strip()
+            # Optional overrides let the admin settings test button pass the
+            # current UI values directly so it never races a pending save.
+            if body.get("channel"):
+                _override["reminder_channel"] = body["channel"]
+            if body.get("webhook_integration_id"):
+                _override["reminder_webhook_integration_id"] = body["webhook_integration_id"]
+            if body.get("webhook_payload_template"):
+                _override["reminder_webhook_payload_template"] = body["webhook_payload_template"]
+            # Mirror the in-UI AI Synthesis toggle + persona so the test
+            # actually exercises the synthesis path before/without a Save.
+            if "llm_synthesis" in body:
+                _override["reminder_llm_synthesis"] = bool(body["llm_synthesis"])
+            if "llm_persona" in body:
+                _override["reminder_llm_persona"] = str(body["llm_persona"] or "")
+        else:
+            db = SessionLocal()
+            try:
+                note = db.query(Note).filter(Note.id == note_id).first()
+                if not note:
+                    raise HTTPException(404, "Note not found")
+                if caller is not None and note.owner != caller:
+                    raise HTTPException(404, "Note not found")
+                title, note_body = _reminder_text_from_note(note)
+            finally:
+                db.close()
+
        return await dispatch_reminder(
            title=title, note_body=note_body, note_id=note_id,
-            owner=_owner(request) or "",
+            owner=caller or "",
            queue_browser=False,
+            settings_override=_override or None,
        )

    # --- REORDER NOTES ---
@@ -6,16 +6,14 @@ import uuid
 from typing import List, Tuple
 from fastapi import APIRouter, HTTPException, Query, Request, UploadFile, File, Depends
 from src.request_models import DirectoryRequest
-from core.constants import BASE_DIR, PERSONAL_DIR
+from core.constants import BASE_DIR, PERSONAL_DIR, PERSONAL_UPLOADS_DIR
 from src.rag_singleton import get_rag_manager
-from src.auth_helpers import get_current_user, require_user
+from src.auth_helpers import require_privilege, require_user
 from core.middleware import require_admin
 from src.upload_handler import secure_filename
+from src.upload_limits import PERSONAL_UPLOAD_MAX_BYTES

-UPLOADS_DIR = os.path.join(BASE_DIR, "data", "personal_uploads")
-MAX_PERSONAL_UPLOAD_BYTES = int(
-    os.getenv("ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES", str(25 * 1024 * 1024))
-)
+UPLOADS_DIR = PERSONAL_UPLOADS_DIR

 logger = logging.getLogger(__name__)

@@ -162,8 +160,11 @@ def setup_personal_routes(personal_docs_manager, rag_manager, rag_available):
            JSON response confirming removal
        """
        try:
-            if not directory:
-                raise HTTPException(400, "Directory path is required")
+            # Confine to PERSONAL_DIR — parity with add_directory_to_rag (which
+            # resolves the path the same way). Without this, an arbitrary or
+            # `..`-escaping path is passed straight to
+            # personal_docs_manager.remove_directory / rag.remove_directory.
+            directory = _resolve_allowed_personal_dir(directory)

            logger.info(f"Removing directory from RAG: {directory}")

@@ -194,7 +195,7 @@ def setup_personal_routes(personal_docs_manager, rag_manager, rag_available):
    @router.post("/upload")
    async def upload_files_to_rag(request: Request, files: List[UploadFile] = File(...)):
        """Upload files directly into RAG. Supports text and PDF."""
-        user = get_current_user(request)
+        user = require_privilege(request, "can_use_documents")
        rag = _rag()
        if not rag:
            raise HTTPException(503, "RAG system is not available — is the embedding service running?")
@@ -208,8 +209,8 @@ def setup_personal_routes(personal_docs_manager, rag_manager, rag_available):
        for upload in files:
            try:
                file_path, stored_name, safe_name = _unique_personal_upload_path(upload_dir, upload.filename)
-                content_bytes = await upload.read(MAX_PERSONAL_UPLOAD_BYTES + 1)
-                if len(content_bytes) > MAX_PERSONAL_UPLOAD_BYTES:
+                content_bytes = await upload.read(PERSONAL_UPLOAD_MAX_BYTES + 1)
+                if len(content_bytes) > PERSONAL_UPLOAD_MAX_BYTES:
                    logger.warning(f"Rejected oversized personal upload: {upload.filename!r}")
                    total_failed += 1
                    continue
@@ -277,8 +278,8 @@ def setup_personal_routes(personal_docs_manager, rag_manager, rag_available):
            # Delete file from disk if it's in uploads dir
            deleted_from_disk = False
            try:
-                abs_target = os.path.abspath(filepath)
-                base_abs = os.path.abspath(UPLOADS_DIR)
+                abs_target = os.path.realpath(filepath)
+                base_abs = os.path.realpath(UPLOADS_DIR)
                in_uploads = (
                    abs_target == base_abs
                    or os.path.commonpath([abs_target, base_abs]) == base_abs
@@ -286,9 +287,12 @@ def setup_personal_routes(personal_docs_manager, rag_manager, rag_available):
            except ValueError:
                # commonpath raises on mixed drives / non-comparable paths
                in_uploads = False
-            if in_uploads and abs_target != base_abs and os.path.exists(abs_target):
-                os.remove(abs_target)
-                deleted_from_disk = True
+            if in_uploads and abs_target != base_abs:
+                try:
+                    os.remove(abs_target)
+                    deleted_from_disk = True
+                except FileNotFoundError:
+                    pass  # already gone — race with another request or cleanup

            # Exclude the file from the listing (persists across restarts)
            personal_docs_manager.exclude_file(filepath)
@@ -4,8 +4,9 @@ import os
 from typing import Optional
 from fastapi import APIRouter, Request
 from src.auth_helpers import get_current_user
+from src.constants import USER_PREFS_FILE

-PREFS_FILE = os.path.join("data", "user_prefs.json")
+PREFS_FILE = USER_PREFS_FILE


 def _load():
@@ -9,6 +9,7 @@ from pydantic import BaseModel, Field

 from src.request_models import PresetUpdateRequest
 from core.middleware import require_admin
+from src.auth_helpers import effective_user

 logger = logging.getLogger(__name__)

@@ -100,7 +101,8 @@ def setup_preset_routes(preset_manager) -> APIRouter:

        try:
            model_spec = data.get("model") or ""
-            url, model, headers = _resolve_model(model_spec)
+            user = effective_user(request)
+            url, model, headers = _resolve_model(model_spec, owner=user)
            result = await llm_call_async(url, model, messages, temperature=0.8, max_tokens=500, headers=headers)
            return {"success": True, "prompt": result.strip()}
        except Exception as e:
@@ -14,6 +14,7 @@ from fastapi.responses import HTMLResponse, StreamingResponse
 from pydantic import BaseModel, Field
 from src.endpoint_resolver import resolve_endpoint
 from src.auth_helpers import _auth_disabled, get_current_user
+from src.constants import DEEP_RESEARCH_DIR

 _SESSION_ID_RE = re.compile(r"^[a-zA-Z0-9-]{1,128}$")

@@ -37,13 +38,15 @@ def _first_chat_model(models) -> str:
    return (models[0] if models else "")


-def _resolve_research_endpoint(sess) -> tuple:
+def _resolve_research_endpoint(sess, owner: Optional[str] = None) -> tuple:
    """Return (endpoint_url, model, headers) for Deep Research, checking admin overrides."""
+    owner = owner or getattr(sess, "owner", None) or None
    url, model, headers = resolve_endpoint(
        "research",
        fallback_url=sess.endpoint_url,
        fallback_model=sess.model,
        fallback_headers=sess.headers,
+        owner=owner,
    )
    return url, model, headers

@@ -72,6 +75,38 @@ def _owned_enabled_endpoint(db, owner, endpoint_id=None):
    return owner_filter(q, ModelEndpoint, owner).first()


+def _resolve_endpoint_runtime(ep, owner=None, model: Optional[str] = None):
+    """Resolve a ModelEndpoint row into (chat_url, model, headers).
+
+    Mirrors endpoint_resolver.resolve_endpoint's provider-auth handling for
+    panel-selected research endpoints. ChatGPT Subscription endpoints keep
+    OAuth tokens in ProviderAuthSession, so ep.api_key is intentionally empty.
+    """
+    from src.endpoint_resolver import (
+        build_chat_url,
+        build_headers,
+        resolve_endpoint_runtime as resolve_model_endpoint_runtime,
+    )
+
+    try:
+        base, api_key = resolve_model_endpoint_runtime(ep, owner=owner)
+    except Exception as e:
+        logger.warning("Could not resolve endpoint credentials for research: %s", e)
+        return None
+
+    ep_model = (model or "").strip()
+    if not ep_model:
+        try:
+            models = json.loads(ep.cached_models) if ep.cached_models else []
+            if models:
+                ep_model = _first_chat_model(models)
+        except Exception:
+            pass
+    if not ep_model:
+        return None
+    return build_chat_url(base), ep_model, build_headers(api_key, base)
+
+
 def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
    router = APIRouter(tags=["research"])

@@ -98,7 +133,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
        if entry is not None:
            return entry.get("owner", "") == user
        # Task no longer in memory — check the persisted JSON.
-        path = Path("data/deep_research") / f"{session_id}.json"
+        path = Path(DEEP_RESEARCH_DIR) / f"{session_id}.json"
        if not path.exists():
            return False
        try:
@@ -162,7 +197,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
    def _assert_owns_research(session_id: str, user: str) -> None:
        """404-not-403 ownership gate for a research session's on-disk JSON.
        Use BEFORE returning any data or mutating the file."""
-        path = Path("data/deep_research") / f"{session_id}.json"
+        path = Path(DEEP_RESEARCH_DIR) / f"{session_id}.json"
        if not path.exists():
            raise HTTPException(404, "Research not found")
        try:
@@ -225,7 +260,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
    ):
        user = _require_user(request)
        """List all completed research for the Library panel."""
-        data_dir = Path("data/deep_research")
+        data_dir = Path(DEEP_RESEARCH_DIR)
        items = []
        for p in data_dir.glob("*.json"):
            try:
@@ -275,7 +310,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
        summary, stats — used by the Library preview panel."""
        user = _require_user(request)
        _validate_session_id(session_id)
-        path = Path("data/deep_research") / f"{session_id}.json"
+        path = Path(DEEP_RESEARCH_DIR) / f"{session_id}.json"
        if not path.exists():
            raise HTTPException(404, "Research not found")
        try:
@@ -292,7 +327,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
        """Soft-archive / restore a research report (sets `archived` in its JSON)."""
        user = _require_user(request)
        _validate_session_id(session_id)
-        path = Path("data/deep_research") / f"{session_id}.json"
+        path = Path(DEEP_RESEARCH_DIR) / f"{session_id}.json"
        if not path.exists():
            raise HTTPException(404, "Research not found")
        try:
@@ -312,7 +347,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
        """Delete a research result from disk."""
        user = _require_user(request)
        _validate_session_id(session_id)
-        data_dir = Path("data/deep_research")
+        data_dir = Path(DEEP_RESEARCH_DIR)
        json_path = data_dir / f"{session_id}.json"
        deleted = False
        if json_path.exists():
@@ -368,7 +403,6 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:

        if body.endpoint_id:
            from src.database import SessionLocal
-            from src.endpoint_resolver import normalize_base, build_chat_url, build_headers
            db = SessionLocal()
            try:
                # Owner-scoped: never resolve another user's private endpoint
@@ -377,35 +411,26 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
                ep = _owned_enabled_endpoint(db, user, body.endpoint_id)
                if not ep:
                    raise HTTPException(404, "Endpoint not found or disabled")
-                base = normalize_base(ep.base_url)
-                ep_url = build_chat_url(base)
-                ep_headers = build_headers(ep.api_key, base)
-                ep_model = body.model or ""
-                if not ep_model:
-                    try:
-                        import json as _json
-                        models = _json.loads(ep.cached_models) if ep.cached_models else []
-                        if models:
-                            ep_model = _first_chat_model(models)
-                    except Exception:
-                        pass
+                resolved = _resolve_endpoint_runtime(ep, owner=user, model=body.model)
+                if not resolved:
+                    raise HTTPException(400, "Endpoint is not configured with a usable model.")
+                ep_url, ep_model, ep_headers = resolved
            finally:
                db.close()
        else:
-            ep_url, ep_model, ep_headers = resolve_endpoint("research")
+            ep_url, ep_model, ep_headers = resolve_endpoint("research", owner=user)
            if not ep_url:
-                ep_url, ep_model, ep_headers = resolve_endpoint("utility")
+                ep_url, ep_model, ep_headers = resolve_endpoint("utility", owner=user)
            # When neither research nor utility is configured, use the user's
            # configured DEFAULT model (default_endpoint_id/default_model) rather
            # than arbitrarily grabbing the first enabled endpoint's first model
            # (which surfaced gpt-3.5). "Default" should mean the default model.
            if not ep_url:
-                ep_url, ep_model, ep_headers = resolve_endpoint("default")
+                ep_url, ep_model, ep_headers = resolve_endpoint("default", owner=user)
            if not ep_url:
-                ep_url, ep_model, ep_headers = resolve_endpoint("chat")
+                ep_url, ep_model, ep_headers = resolve_endpoint("chat", owner=user)
            if not ep_url:
                from src.database import SessionLocal
-                from src.endpoint_resolver import normalize_base, build_chat_url, build_headers
                db = SessionLocal()
                try:
                    # Owner-scoped first-enabled fallback: the caller's own rows
@@ -414,18 +439,9 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
                    # /api/v1/chat fallback (webhook_routes._first_enabled_endpoint).
                    ep = _owned_enabled_endpoint(db, user)
                    if ep:
-                        base = normalize_base(ep.base_url)
-                        ep_url = build_chat_url(base)
-                        ep_headers = build_headers(ep.api_key, base)
-                        ep_model = ""
-                        if ep.cached_models:
-                            try:
-                                import json as _json
-                                models = _json.loads(ep.cached_models)
-                                if models:
-                                    ep_model = _first_chat_model(models)
-                            except Exception:
-                                pass
+                        resolved = _resolve_endpoint_runtime(ep, owner=user)
+                        if resolved:
+                            ep_url, ep_model, ep_headers = resolved
                finally:
                    db.close()
            if not ep_url:
@@ -494,7 +510,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
            raise HTTPException(404, "No research found for this session")
        result = research_handler.get_result(session_id)
        if result is None:
-            p = Path("data/deep_research") / f"{session_id}.json"
+            p = Path(DEEP_RESEARCH_DIR) / f"{session_id}.json"
            if p.exists():
                d = json.loads(p.read_text(encoding="utf-8"))
                return {
@@ -534,7 +550,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
        sources = research_handler.get_sources(session_id) or []
        query = ""

-        path = Path("data/deep_research") / f"{session_id}.json"
+        path = Path(DEEP_RESEARCH_DIR) / f"{session_id}.json"
        if path.exists():
            try:
                disk = json.loads(path.read_text(encoding="utf-8"))
@@ -572,19 +588,18 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
                ep_headers = dict(r_headers)

        if not ep_url or not ep_model:
-            _merge(*resolve_endpoint("chat"))
+            _merge(*resolve_endpoint("chat", owner=user))
        if not ep_url or not ep_model:
-            _merge(*resolve_endpoint("research"))
+            _merge(*resolve_endpoint("research", owner=user))
        if not ep_url or not ep_model:
-            _merge(*resolve_endpoint("utility"))
+            _merge(*resolve_endpoint("utility", owner=user))
        if not ep_url or not ep_model:
-            # Last resort: any enabled endpoint
+            # Last resort: this user's enabled endpoint, plus legacy shared rows.
            from src.database import SessionLocal
-            from src.database import ModelEndpoint
            from src.endpoint_resolver import normalize_base, build_chat_url, build_headers
            db = SessionLocal()
            try:
-                ep = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True).first()
+                ep = _owned_enabled_endpoint(db, user)
                if ep:
                    base = normalize_base(ep.base_url)
                    fallback_url = build_chat_url(base)
@@ -594,7 +609,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
                        try:
                            models = json.loads(ep.cached_models)
                            if models:
-                                fallback_model = models[0]
+                                fallback_model = _first_chat_model(models)
                        except Exception:
                            pass
                    _merge(fallback_url, fallback_model, fallback_headers)
@@ -10,8 +10,9 @@ import logging
 from core.session_manager import SessionManager
 from core.models import ChatMessage
 from src.request_models import SessionResponse
-from core.database import Session as DbSession, SessionLocal, Document, GalleryImage
-from src.auth_helpers import get_current_user, effective_user
+from core.database import Session as DbSession, SessionLocal, Document, GalleryImage, utcnow_naive
+from src.auth_helpers import get_current_user, effective_user, _auth_disabled, owner_filter
+from src.session_actions import is_session_recently_active


 def _sanitize_export_filename(name: str) -> str:
@@ -92,35 +93,30 @@ def _reject_compact_during_active_run(session_id: str) -> None:


 def _verify_session_owner(request: Request, session_id: str, session_manager=None):
-    """Verify the current user owns the session. Raises 404 if not.
+    """Verify the current user owns the session, honoring single-user modes.

-    Ownership is checked against the DB row when one exists (unchanged). If
-    there is no DB row but the caller owns an in-memory "ghost" session — one
-    that lives only in ``session_manager`` because it was never persisted, or
-    its DB row was removed out-of-band — fall back to the in-memory owner so the
-    user can still manage and delete it. Without this fallback such sessions are
-    listed by ``/api/sessions`` (they come from the in-memory manager) yet every
-    per-session operation 404s, making them impossible to delete (issue #1044).
-
-    ``session_manager`` is optional and defaults to ``None`` so existing callers
-    that only care about persisted sessions keep their exact prior behavior.
+    Authenticated requests must match the stored DB or in-memory owner. When
+    auth is disabled and no user is present, treat the app as single-user mode:
+    verify that the session exists, but do not compare its stored owner. This
+    keeps QA/dev instances with AUTH_ENABLED=false from rejecting owner-stamped
+    rows created while auth was previously enabled.
    """
    user = effective_user(request)
-    if not user:
-        raise HTTPException(403, "Authentication required")
+    if not user and not _auth_disabled():
+        raise HTTPException(401, "Authentication required")
    db = SessionLocal()
    try:
        row = db.query(DbSession.owner).filter(DbSession.id == session_id).first()
    finally:
        db.close()
    if row is not None:
-        if row.owner != user:
+        if user and row.owner != user:
            raise HTTPException(404, f"Session {session_id} not found")
        return
    # No DB row — allow the caller to act on an in-memory ghost they own.
    if session_manager is not None:
        ghost = getattr(session_manager, "sessions", {}).get(session_id)
-        if ghost is not None and getattr(ghost, "owner", None) == user:
+        if ghost is not None and (not user or getattr(ghost, "owner", None) == user):
            return
    raise HTTPException(404, f"Session {session_id} not found")

@@ -262,7 +258,9 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
            last_msg_map = {}
            mode_map = {}
            msg_count_map = {}
-            rows = db.query(DbSession.id, DbSession.folder, DbSession.total_input_tokens, DbSession.total_output_tokens, DbSession.is_important, DbSession.created_at, DbSession.updated_at, DbSession.last_message_at, DbSession.mode, DbSession.message_count).filter(DbSession.archived == False).all()
+            q = db.query(DbSession.id, DbSession.folder, DbSession.total_input_tokens, DbSession.total_output_tokens, DbSession.is_important, DbSession.created_at, DbSession.updated_at, DbSession.last_message_at, DbSession.mode, DbSession.message_count).filter(DbSession.archived == False)
+            q = owner_filter(q, DbSession, user)
+            rows = q.all()
            for row in rows:
                folder_map[row.id] = row.folder
                token_map[row.id] = (row.total_input_tokens or 0) + (row.total_output_tokens or 0)
@@ -281,15 +279,19 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
            # Sessions with active documents that have content
            from sqlalchemy import func
            doc_session_ids = set(
-                r[0] for r in db.query(Document.session_id)
-                .filter(Document.is_active == True,
-                        Document.current_content != None,
-                        func.trim(Document.current_content) != "")
+                r[0] for r in owner_filter(
+                    db.query(Document.session_id)
+                    .filter(Document.is_active == True,
+                            Document.current_content != None,
+                            func.trim(Document.current_content) != ""),
+                    Document, user)
                .distinct().all()
            )
            img_session_ids = set(
-                r[0] for r in db.query(GalleryImage.session_id)
-                .filter(GalleryImage.session_id != None)
+                r[0] for r in owner_filter(
+                    db.query(GalleryImage.session_id)
+                    .filter(GalleryImage.session_id != None),
+                    GalleryImage, user)
                .distinct().all()
            )
        finally:
@@ -370,8 +372,13 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
            pass
        elif not model_to_use:
            from src.llm_core import list_model_ids
-            ids = list_model_ids(endpoint_url, timeout=REQUEST_TIMEOUT,
-                                 headers=validation_headers)
+            ids = list_model_ids(
+                endpoint_url,
+                timeout=REQUEST_TIMEOUT,
+                headers=validation_headers,
+                owner=user,
+                endpoint_id=endpoint_id.strip() if endpoint_id else None,
+            )
            if not ids:
                raise HTTPException(400, "Cannot reach /v1/models")
            # Default to the first CHAT model — endpoints often list embedding/
@@ -385,8 +392,13 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
            from src.llm_core import list_model_ids
            import os as _os
            req_base = _os.path.basename(model_to_use.rstrip("/"))
-            avail = list_model_ids(endpoint_url, timeout=REQUEST_TIMEOUT,
-                                   headers=validation_headers)
+            avail = list_model_ids(
+                endpoint_url,
+                timeout=REQUEST_TIMEOUT,
+                headers=validation_headers,
+                owner=user,
+                endpoint_id=endpoint_id.strip() if endpoint_id else None,
+            )
            if not avail:
                raise HTTPException(400, "Cannot reach /v1/models")
            if model_to_use not in avail:
@@ -543,22 +555,25 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
            ids = body.get("ids", [])
        except Exception:
            ids = []
+        deleted_count = 0
        for sid in ids:
            try:
                _verify_session_owner(request, sid, session_manager)
-                session_manager.delete_session(sid)
+                
+                # Enforce "starred" protection consistent with single-session delete
                db = SessionLocal()
                try:
-                    db.query(_CM).filter(_CM.session_id == sid).delete()
-                    db.query(DbSession).filter(DbSession.id == sid).delete()
-                    db.commit()
-                except Exception:
-                    db.rollback()
+                    db_sess = db.query(DbSession).filter(DbSession.id == sid).first()
+                    if db_sess and db_sess.is_important:
+                        continue
                finally:
                    db.close()
+
+                if session_manager.delete_session(sid):
+                    deleted_count += 1
            except Exception:
                pass
-        return {"deleted": len(ids)}
+        return {"deleted": deleted_count}

    @router.delete("/session/{sid}")
    def delete_session(request: Request, sid: str):
@@ -924,7 +939,8 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
        from src.endpoint_resolver import resolve_endpoint
        from src.llm_core import llm_call_async

-        url, model, headers = resolve_endpoint("utility", owner=get_current_user(request))
+        owner = getattr(session, "owner", None) or effective_user(request)
+        url, model, headers = resolve_endpoint("utility", owner=owner)
        if not url or not model:
            url, model, headers = session.endpoint_url, session.model, session.headers
        if not url or not model:
@@ -1006,7 +1022,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
        }
        _THROWAWAY_MAX_MESSAGES = 4  # only delete if <= this many messages
        try:
-            rows = db.query(DbSession).filter(DbSession.archived == False, DbSession.owner == user).all()
+            rows = db.query(DbSession).filter(DbSession.archived == False, DbSession.owner == user).limit(2000).all()
            folder_map = {r.id: r.folder for r in rows}
            # Precompute per-session message counts in TWO aggregate queries
            # instead of 1–3 queries PER session — with many chats the per-row
@@ -1017,6 +1033,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
                db.query(DbMsg.session_id, _sa_func.count(DbMsg.id))
                .filter(DbMsg.role == "assistant").group_by(DbMsg.session_id).all()
            )
+            cleanup_now = utcnow_naive()
            for row in rows:
                # Never delete important sessions
                if getattr(row, 'is_important', False):
@@ -1029,6 +1046,8 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
                    if hasattr(session_manager, 'delete_session'):
                        session_manager.delete_session(row.id)
                    continue
+                if is_session_recently_active(row, now=cleanup_now):
+                    continue
                msg_count = _counts.get(row.id, 0)
                should_delete = False
                if msg_count == 0:
@@ -1,6 +1,7 @@
 """Shell routes — user-facing command execution endpoint."""

 import asyncio
+import importlib
 import json
 import logging
 import os
@@ -13,6 +14,8 @@ import tempfile
 from collections import namedtuple
 from pathlib import Path
 from typing import Dict, Any
+from core.platform_compat import IS_APPLE_SILICON, which_tool
+from src.optional_deps import prepare_optional_dependency_import

 # POSIX-only: `pty`/`fcntl` transitively import `termios`, which does NOT exist
 # on Windows, so importing them unconditionally crashed app startup there
@@ -37,6 +40,7 @@ from core.platform_compat import (
    IS_WINDOWS,
    detached_popen_kwargs,
    find_bash,
+    git_bash_path,
 )


@@ -92,6 +96,7 @@ def _venv_activate_prefix(venv: str | None) -> str:
    act = venv if venv.endswith("/bin/activate") else venv.rstrip("/") + "/bin/activate"
    return f". {act} && "

+
 logger = logging.getLogger(__name__)

 PTY_SUPPORTED = pty is not None and fcntl is not None and hasattr(os, "setsid")
@@ -146,6 +151,11 @@ def _pip_dist_name(pkg: dict) -> str:
    return (pkg.get("name") or "").replace("_", "-")


+def _import_optional_dependency_for_status(name: str):
+    prepare_optional_dependency_import(name)
+    return importlib.import_module(name)
+
+
 def _package_installed_from_probe(name: str, probe: dict) -> bool:
    """Return whether an optional dependency is usable by Cookbook.

@@ -169,7 +179,10 @@ def _package_installed_from_probe(name: str, probe: dict) -> bool:
            and (dists.get("torch") or modules.get("torch", {}).get("real_module"))
        )
    if name == "hf_transfer":
-        return bool(dists.get("hf-transfer") or modules.get("hf_transfer", {}).get("real_module"))
+        return bool(
+            dists.get("hf-transfer")
+            or modules.get("hf_transfer", {}).get("real_module")
+        )
    return bool(dists.get(name) or modules.get(name, {}).get("real_module"))


@@ -194,8 +207,14 @@ def _package_status_note(name: str, probe: dict) -> str:
        if binaries.get("llama-server"):
            parts.append(f"native llama-server: {binaries['llama-server']}")
        if dists.get("llama-cpp-python"):
-            parts.append(f"python package: llama-cpp-python {dists['llama-cpp-python']}")
-        return "; ".join(parts) if parts else "No native llama-server or llama-cpp-python server package found."
+            parts.append(
+                f"python package: llama-cpp-python {dists['llama-cpp-python']}"
+            )
+        return (
+            "; ".join(parts)
+            if parts
+            else "No native llama-server or llama-cpp-python server package found."
+        )
    if name == "diffusers":
        if _package_installed_from_probe(name, probe):
            return f"diffusers {dists.get('diffusers', 'available')} with torch {dists.get('torch', 'available')}"
@@ -205,7 +224,9 @@ def _package_status_note(name: str, probe: dict) -> str:
    return ""


-def _package_pip_update_status(pkg: dict, probe: dict | None = None) -> PackageUpdateStatus:
+def _package_pip_update_status(
+    pkg: dict, probe: dict | None = None
+) -> PackageUpdateStatus:
    """Return whether the Dependencies UI should offer a generic pip update.

    "Installed" means Cookbook can use the dependency. It does not always mean
@@ -213,12 +234,28 @@ def _package_pip_update_status(pkg: dict, probe: dict | None = None) -> PackageU
    native llama-server can come from a package manager/source build, and a CLI
    may be on PATH without matching Python package metadata.
    """
+    if pkg.get("name") == "APFEL":
+        return PackageUpdateStatus(
+            False,
+            "",  # Note is empty because IT DOES allow for updates outside of PIP.
+        )
+
    if pkg.get("kind") == "system" or not pkg.get("pip"):
-        return PackageUpdateStatus(False, "Update this system dependency outside Odysseus.")
+        return PackageUpdateStatus(
+            False, "Update this system dependency outside Odysseus."
+        )

    name = pkg.get("name")
-    binaries = probe.get("binaries") if isinstance(probe, dict) and isinstance(probe.get("binaries"), dict) else {}
-    dists = probe.get("dists") if isinstance(probe, dict) and isinstance(probe.get("dists"), dict) else {}
+    binaries = (
+        probe.get("binaries")
+        if isinstance(probe, dict) and isinstance(probe.get("binaries"), dict)
+        else {}
+    )
+    dists = (
+        probe.get("dists")
+        if isinstance(probe, dict) and isinstance(probe.get("dists"), dict)
+        else {}
+    )

    if name == "llama_cpp" and binaries.get("llama-server"):
        return PackageUpdateStatus(
@@ -231,7 +268,9 @@ def _package_pip_update_status(pkg: dict, probe: dict | None = None) -> PackageU
            "Using a vLLM CLI on PATH without Python package metadata; update it outside Odysseus.",
        )

-    return PackageUpdateStatus(True, "Update uses pip in the selected Python environment.")
+    return PackageUpdateStatus(
+        True, "Update uses pip in the selected Python environment."
+    )


 def _prepend_user_install_bins_to_path() -> None:
@@ -250,7 +289,9 @@ def _prepend_user_install_bins_to_path() -> None:
        candidates = []
    candidates.append(os.path.expanduser("~/.local/bin"))

-    parts = os.environ.get("PATH", "").split(os.pathsep) if os.environ.get("PATH") else []
+    parts = (
+        os.environ.get("PATH", "").split(os.pathsep) if os.environ.get("PATH") else []
+    )
    changed = False
    for path in reversed([p for p in candidates if p]):
        if path not in parts:
@@ -357,9 +398,11 @@ PTY_UNSUPPORTED_ERROR = "pty_unsupported"

 class ShellExecRequest(BaseModel):
    command: str
-    timeout: int | None = None  # optional override; 0 = no timeout (run until client disconnects)
-    use_pty: bool = False       # use pseudo-TTY (for progress bars)
-    use_tmux: bool = False      # run in tmux session (survives browser disconnect)
+    timeout: int | None = (
+        None  # optional override; 0 = no timeout (run until client disconnects)
+    )
+    use_pty: bool = False  # use pseudo-TTY (for progress bars)
+    use_tmux: bool = False  # run in tmux session (survives browser disconnect)


 async def _create_shell(command: str, **kwargs):
@@ -368,8 +411,16 @@ async def _create_shell(command: str, **kwargs):
    POSIX: /bin/sh via create_subprocess_shell (unchanged behaviour).
    Windows: prefer a real bash (Git Bash/WSL) so bash-syntax commands behave
    the same as on Linux; fall back to cmd.exe when no bash is installed.
+    Powershell commands are executed directly via cmd.exe /c to avoid quoting
+    and env variable expansion errors under Git Bash.
    """
    if IS_WINDOWS:
+        # PowerShell commands (used by the frontend for Windows log-file polling
+        # and session management) must run directly — passing them through
+        # bash -c mangles $env:VAR syntax and breaks the command.
+        cmd_trim = command.strip()
+        if cmd_trim.startswith("powershell") or cmd_trim.startswith("cmd "):
+            return await asyncio.create_subprocess_shell(command, **kwargs)
        bash = find_bash()
        if bash:
            return await asyncio.create_subprocess_exec(bash, "-c", command, **kwargs)
@@ -386,9 +437,7 @@ async def _exec_shell(command: str, timeout: int = EXEC_TIMEOUT) -> Dict[str, An
            stderr=asyncio.subprocess.PIPE,
            cwd=str(Path.home()),
        )
-        stdout_b, stderr_b = await asyncio.wait_for(
-            proc.communicate(), timeout=timeout
-        )
+        stdout_b, stderr_b = await asyncio.wait_for(proc.communicate(), timeout=timeout)
        stdout = stdout_b.decode(errors="replace")[:MAX_OUTPUT]
        stderr = stderr_b.decode(errors="replace")[:MAX_OUTPUT]
        return {"stdout": stdout, "stderr": stderr, "exit_code": proc.returncode}
@@ -399,7 +448,11 @@ async def _exec_shell(command: str, timeout: int = EXEC_TIMEOUT) -> Dict[str, An
                await proc.wait()
            except ProcessLookupError:
                pass
-        return {"stdout": "", "stderr": f"Command timed out after {timeout}s", "exit_code": -1}
+        return {
+            "stdout": "",
+            "stderr": f"Command timed out after {timeout}s",
+            "exit_code": -1,
+        }
    except Exception as e:
        return {"stdout": "", "stderr": str(e), "exit_code": -1}

@@ -481,7 +534,7 @@ async def _generate_pty(cmd: str, timeout: int, request: Request):
                if idx == -1:
                    break
                line = buf[:idx].decode(errors="replace")
-                buf = buf[idx + sep_len:]
+                buf = buf[idx + sep_len :]
                if line:
                    yield f"data: {json.dumps({'stream': 'stdout', 'data': line})}\n\n"

@@ -503,7 +556,7 @@ async def _generate_pty(cmd: str, timeout: int, request: Request):
                if idx == -1:
                    break
                line = buf[:idx].decode(errors="replace")
-                buf = buf[idx + sep_len:]
+                buf = buf[idx + sep_len :]
                if line:
                    yield f"data: {json.dumps({'stream': 'stdout', 'data': line})}\n\n"
            if buf:
@@ -534,6 +587,7 @@ def _pty_read(fd: int) -> bytes | None:
    """Blocking read from PTY fd. Called via run_in_executor.
    Returns bytes on data, None on timeout (no data yet)."""
    import select
+
    r, _, _ = select.select([fd], [], [], 1.0)
    if r:
        try:
@@ -557,10 +611,10 @@ async def _generate_tmux(cmd: str, request: Request):
    script_path = TMUX_LOG_DIR / f"{session_id}.sh"
    script_path.write_text(
        f"#!/bin/bash\n"
-        f"ODYSSEUS_USER_SHELL=\"${{SHELL:-}}\"\n"
-        f"if [ -n \"$ODYSSEUS_USER_SHELL\" ] && [ -x \"$ODYSSEUS_USER_SHELL\" ]; then\n"
-        f"  ODYSSEUS_USER_PATH=\"$(\"$ODYSSEUS_USER_SHELL\" -ic 'printf \"__ODYSSEUS_PATH__%s\\n\" \"$PATH\"' 2>/dev/null | sed -n 's/^__ODYSSEUS_PATH__//p' | tail -n 1 || true)\"\n"
-        f"  if [ -n \"$ODYSSEUS_USER_PATH\" ]; then export PATH=\"$ODYSSEUS_USER_PATH:$PATH\"; fi\n"
+        f'ODYSSEUS_USER_SHELL="${{SHELL:-}}"\n'
+        f'if [ -n "$ODYSSEUS_USER_SHELL" ] && [ -x "$ODYSSEUS_USER_SHELL" ]; then\n'
+        f'  ODYSSEUS_USER_PATH="$("$ODYSSEUS_USER_SHELL" -ic \'printf "__ODYSSEUS_PATH__%s\\n" "$PATH"\' 2>/dev/null | sed -n \'s/^__ODYSSEUS_PATH__//p\' | tail -n 1 || true)"\n'
+        f'  if [ -n "$ODYSSEUS_USER_PATH" ]; then export PATH="$ODYSSEUS_USER_PATH:$PATH"; fi\n'
        f"fi\n"
        f"{cmd} 2>&1 | tee '{log_path}'\n"
        f"EC=${{PIPESTATUS[0]}}\n"
@@ -570,7 +624,9 @@ async def _generate_tmux(cmd: str, request: Request):
        encoding="utf-8",
    )
    script_path.chmod(0o755)
-    logger.info("tmux wrapper script created: session=%s path=%s", session_id, script_path)
+    logger.info(
+        "tmux wrapper script created: session=%s path=%s", session_id, script_path
+    )

    tmux_cmd = f"tmux new-session -d -s {session_id} {shlex.quote(str(script_path))}"

@@ -602,7 +658,9 @@ async def _generate_tmux(cmd: str, request: Request):
        # Read new lines from log
        try:
            if log_path.exists():
-                lines = log_path.read_text(encoding="utf-8", errors="replace").splitlines()
+                lines = log_path.read_text(
+                    encoding="utf-8", errors="replace"
+                ).splitlines()
                new_lines = lines[lines_sent:]
                for line in new_lines:
                    if line.startswith(":::EXIT_CODE:::"):
@@ -630,7 +688,9 @@ async def _generate_tmux(cmd: str, request: Request):
            # Session ended — do one final read
            await asyncio.sleep(0.5)
            if log_path.exists():
-                lines = log_path.read_text(encoding="utf-8", errors="replace").splitlines()
+                lines = log_path.read_text(
+                    encoding="utf-8", errors="replace"
+                ).splitlines()
                for line in lines[lines_sent:]:
                    if line.startswith(":::EXIT_CODE:::"):
                        try:
@@ -672,8 +732,8 @@ async def _generate_win_detached(cmd: str, request: Request):
    if bash:
        script_path = TMUX_LOG_DIR / f"{session_id}.sh"
        script_path.write_text(
-            f"{cmd} > {shlex.quote(str(log_path))} 2>&1\n"
-            f"echo $? > {shlex.quote(str(exit_path))}\n",
+            f"{cmd} > {shlex.quote(git_bash_path(log_path))} 2>&1\n"
+            f"echo $? > {shlex.quote(git_bash_path(exit_path))}\n",
            encoding="utf-8",
        )
        argv = [bash, str(script_path)]
@@ -711,7 +771,9 @@ async def _generate_win_detached(cmd: str, request: Request):
            return
        try:
            if log_path.exists():
-                lines = log_path.read_text(encoding="utf-8", errors="replace").splitlines()
+                lines = log_path.read_text(
+                    encoding="utf-8", errors="replace"
+                ).splitlines()
                for line in lines[lines_sent:]:
                    yield f"data: {json.dumps({'stream': 'stdout', 'data': line})}\n\n"
                lines_sent = len(lines)
@@ -723,11 +785,18 @@ async def _generate_win_detached(cmd: str, request: Request):
            await asyncio.sleep(0.3)
            try:
                if log_path.exists():
-                    lines = log_path.read_text(encoding="utf-8", errors="replace").splitlines()
+                    lines = log_path.read_text(
+                        encoding="utf-8", errors="replace"
+                    ).splitlines()
                    for line in lines[lines_sent:]:
                        yield f"data: {json.dumps({'stream': 'stdout', 'data': line})}\n\n"
                    lines_sent = len(lines)
-                exit_code = int((exit_path.read_text(encoding="utf-8", errors="replace").strip() or "0"))
+                exit_code = int(
+                    (
+                        exit_path.read_text(encoding="utf-8", errors="replace").strip()
+                        or "0"
+                    )
+                )
            except Exception:
                exit_code = 0
            break
@@ -753,7 +822,9 @@ def setup_shell_routes() -> APIRouter:
            return {"stdout": "", "stderr": "No command provided", "exit_code": 1}

        logger.info("User shell exec requested: length=%d", len(cmd))
-        result = await _exec_shell(cmd, timeout=EXEC_TIMEOUT)
+        result = await _exec_shell(
+            cmd, timeout=req.timeout if req.timeout is not None else EXEC_TIMEOUT
+        )
        return result

    @router.post("/api/shell/stream")
@@ -762,9 +833,11 @@ def setup_shell_routes() -> APIRouter:
        _require_admin(request)
        cmd = req.command.strip()
        if not cmd:
+
            async def empty():
                yield f"data: {json.dumps({'stream': 'stderr', 'data': 'No command provided'})}\n\n"
                yield f"data: {json.dumps({'exit_code': 1})}\n\n"
+
            return StreamingResponse(empty(), media_type="text/event-stream")

        timeout = req.timeout if req.timeout is not None else STREAM_TIMEOUT
@@ -781,7 +854,11 @@ def setup_shell_routes() -> APIRouter:
        if use_tmux:
            # tmux is POSIX-only; Windows uses a detached-process + logfile tail
            # that preserves the "survives disconnect" behaviour.
-            gen = _generate_win_detached(cmd, request) if IS_WINDOWS else _generate_tmux(cmd, request)
+            gen = (
+                _generate_win_detached(cmd, request)
+                if IS_WINDOWS
+                else _generate_tmux(cmd, request)
+            )
            return StreamingResponse(gen, media_type="text/event-stream")

        if use_pty and not IS_WINDOWS:
@@ -813,7 +890,12 @@ def setup_shell_routes() -> APIRouter:
                            chunk = await stream.read(4096)
                            if not chunk:
                                if buf:
-                                    await q.put((name, buf.decode(errors="replace").rstrip("\r\n")))
+                                    await q.put(
+                                        (
+                                            name,
+                                            buf.decode(errors="replace").rstrip("\r\n"),
+                                        )
+                                    )
                                break
                            buf += chunk
                            while True:
@@ -821,7 +903,7 @@ def setup_shell_routes() -> APIRouter:
                                if idx == -1:
                                    break
                                line = buf[:idx].decode(errors="replace")
-                                buf = buf[idx + sep_len:]
+                                buf = buf[idx + sep_len :]
                                if line:
                                    await q.put((name, line))
                    finally:
@@ -880,7 +962,12 @@ def setup_shell_routes() -> APIRouter:
        return StreamingResponse(generate(), media_type="text/event-stream")

    @router.get("/api/cookbook/packages")
-    async def list_packages(request: Request, host: str | None = None, ssh_port: str | None = None, venv: str | None = None):
+    async def list_packages(
+        request: Request,
+        host: str | None = None,
+        ssh_port: str | None = None,
+        venv: str | None = None,
+    ):
        """Check which optional packages are installed.

        Local-target packages are checked in-process. Remote-target packages
@@ -890,7 +977,12 @@ def setup_shell_routes() -> APIRouter:
        """
        _require_admin(request)
        _reject_cross_site(request)
-        import importlib, importlib.metadata as importlib_metadata, shlex, json as _json, site, sys
+        import importlib.metadata as importlib_metadata
+        import shlex
+        import json as _json
+        import site
+        import sys
+
        _prepend_user_install_bins_to_path()
        importlib.invalidate_caches()
        try:
@@ -905,26 +997,122 @@ def setup_shell_routes() -> APIRouter:
                raise HTTPException(400, "Invalid ssh_port")
        packages = [
            # ── System ── OS binaries, not pip packages
-            {"name": "tmux", "pip": "", "desc": "Required for Linux/Termux Cookbook background downloads and serves", "category": "System", "target": "remote", "kind": "system", "install_hint": "Run Cookbook server setup, or install tmux with apt/pacman/dnf/apk/zypper."},
-            {"name": "docker", "pip": "", "desc": "Required only for Docker-backed launch commands", "category": "System", "target": "remote", "kind": "system", "install_hint": "Install Docker on the selected server and allow this user to run docker."},
+            {
+                "name": "tmux",
+                "pip": "",
+                "desc": "Required for Linux/Termux Cookbook background downloads and serves",
+                "category": "System",
+                "target": "remote",
+                "kind": "system",
+                "install_hint": "Run Cookbook server setup, or install tmux with apt/pacman/dnf/apk/zypper.",
+            },
+            {
+                "name": "docker",
+                "pip": "",
+                "desc": "Required only for Docker-backed launch commands",
+                "category": "System",
+                "target": "remote",
+                "kind": "system",
+                "install_hint": "Install Docker on the selected server and allow this user to run docker.",
+            },
            # ── LLM ── installs on GPU servers for model serving/downloading
-            {"name": "hf_transfer", "pip": "hf_transfer", "desc": "Fast model downloads from HuggingFace", "category": "LLM", "target": "remote"},
-            {"name": "llama_cpp", "pip": "llama-cpp-python[server]", "desc": "Serve GGUF models via llama.cpp", "category": "LLM", "target": "remote"},
-            {"name": "sglang", "pip": "sglang[all]", "desc": "Serve HF safetensors models via SGLang", "category": "LLM", "target": "remote"},
-            {"name": "vllm", "pip": "vllm", "desc": "High-throughput LLM serving engine", "category": "LLM", "target": "remote"},
+            {
+                "name": "hf_transfer",
+                "pip": "hf_transfer",
+                "desc": "Fast model downloads from HuggingFace",
+                "category": "LLM",
+                "target": "remote",
+            },
+            {
+                "name": "llama_cpp",
+                "pip": "llama-cpp-python[server]",
+                "desc": "Serve GGUF models via llama.cpp",
+                "category": "LLM",
+                "target": "remote",
+            },
+            {
+                "name": "sglang",
+                "pip": "sglang[all]",
+                "desc": "Serve HF safetensors models via SGLang",
+                "category": "LLM",
+                "target": "remote",
+            },
+            {
+                "name": "vllm",
+                "pip": "vllm",
+                "desc": "High-throughput LLM serving engine",
+                "category": "LLM",
+                "target": "remote",
+            },
+            {
+                "name": "APFEL",
+                "pip": "",
+                "desc": "OpenAI-compatible API for Apple Foundational Models on Apple Silicon",
+                "category": "LLM",
+                "target": "local",
+                "kind": "system",
+                "install_cmd": "brew install apfel",
+                "update_cmd": "brew upgrade apfel",
+                "install_hint": "Requires a native Apple Silicon Mac with Apple Foundational Models support. Installable via Homebrew on supported Macs.",
+            },
            # ── Image ── editor + diffusion model serving
-            {"name": "diffusers", "pip": "diffusers[torch]", "desc": "Image generation pipelines (SD, Flux) with PyTorch", "category": "Image", "target": "remote"},
-            {"name": "rembg", "pip": "rembg[gpu]", "desc": "AI background removal for image editor", "category": "Image", "target": "local"},
-            {"name": "realesrgan", "pip": "realesrgan", "desc": "AI denoise + upscale (Real-ESRGAN). Used by editor's Denoise and Upscale tools.", "category": "Image", "target": "local"},
+            {
+                "name": "diffusers",
+                "pip": "diffusers[torch]",
+                "desc": "Image generation pipelines (SD, Flux) with PyTorch",
+                "category": "Image",
+                "target": "remote",
+            },
+            {
+                "name": "transformers",
+                "pip": "transformers",
+                "desc": "Hugging Face model components used by SD/Flux pipelines and image tools",
+                "category": "Image",
+                "target": "remote",
+            },
+            {
+                "name": "rembg",
+                "pip": "rembg[gpu]",
+                "desc": "AI background removal for image editor",
+                "category": "Image",
+                "target": "local",
+            },
+            {
+                "name": "realesrgan",
+                "pip": "realesrgan",
+                "desc": "AI denoise + upscale (Real-ESRGAN). Used by editor's Denoise and Upscale tools.",
+                "category": "Image",
+                "target": "local",
+            },
            # ── Tools ──
-            {"name": "playwright", "pip": "playwright", "desc": "Browser automation for web tools", "category": "Tools", "target": "local"},
+            {
+                "name": "playwright",
+                "pip": "playwright",
+                "desc": "Browser automation for web tools",
+                "category": "Tools",
+                "target": "local",
+            },
        ]
+
+        # Most packages should not be installed through external means. Hence, set the default of the
+        # install_cmd and update_cmd to None, which indicates that the recommended way to install/update is through the Cookbook # server setup or pip. Only system packages, should have explicit install/update commands provided.
+        for pkg in packages:
+            pkg.setdefault("install_cmd", None)
+            pkg.setdefault("update_cmd", None)
        # Remote check: for remote-target packages, probe the selected server's
        # venv over SSH so a remote `pip install` actually reflects here.
        remote_status: dict = {}
        remote_details: dict = {}
-        remote_names = [p["name"] for p in packages if p.get("target") == "remote" and p.get("kind") != "system"]
-        remote_system_names = [p["name"] for p in packages if p.get("target") == "remote" and p.get("kind") == "system"]
+        remote_names = [
+            p["name"]
+            for p in packages
+            if p.get("target") == "remote" and p.get("kind") != "system"
+        ]
+        remote_system_names = [
+            p["name"]
+            for p in packages
+            if p.get("target") == "remote" and p.get("kind") == "system"
+        ]
        if host and remote_names:
            try:
                py = _package_probe_script(remote_names)
@@ -934,7 +1122,9 @@ def setup_shell_routes() -> APIRouter:
                inner = f"{src}python3 -c {shlex.quote(py)}"
                argv = _ssh_base_argv(host, ssh_port) + [inner]
                proc = await asyncio.create_subprocess_exec(
-                    *argv, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
+                    *argv,
+                    stdout=asyncio.subprocess.PIPE,
+                    stderr=asyncio.subprocess.PIPE,
                )
                out, _err = await asyncio.wait_for(proc.communicate(), timeout=12)
                txt = out.decode("utf-8", errors="replace").strip()
@@ -958,11 +1148,15 @@ def setup_shell_routes() -> APIRouter:
                checks = []
                for name in remote_system_names:
                    qn = shlex.quote(name)
-                    checks.append(f"if command -v {qn} >/dev/null 2>&1; then echo {qn}=1; else echo {qn}=0; fi")
+                    checks.append(
+                        f"if command -v {qn} >/dev/null 2>&1; then echo {qn}=1; else echo {qn}=0; fi"
+                    )
                inner = " ; ".join(checks)
                argv = _ssh_base_argv(host, ssh_port) + [inner]
                proc = await asyncio.create_subprocess_exec(
-                    *argv, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
+                    *argv,
+                    stdout=asyncio.subprocess.PIPE,
+                    stderr=asyncio.subprocess.PIPE,
                )
                out, _err = await asyncio.wait_for(proc.communicate(), timeout=12)
                txt = out.decode("utf-8", errors="replace").strip()
@@ -987,11 +1181,25 @@ def setup_shell_routes() -> APIRouter:
                    if note:
                        pkg["status_note"] = note
            elif pkg.get("kind") == "system":
-                pkg["installed"] = shutil.which(pkg["name"]) is not None
+                if pkg["name"] == "APFEL":
+                    pkg["applicable"] = IS_APPLE_SILICON
+                    pkg["installed"] = which_tool("apfel") is not None
+                    pkg["status_note"] = (
+                        "Available on Apple Silicon (arm64) devices; exposed through a local OpenAI-compatible API."
+                        if IS_APPLE_SILICON
+                        else "Requires a native Apple Silicon Mac with Apple Foundational Models support."
+                    )
+                else:
+                    pkg["installed"] = shutil.which(pkg["name"]) is not None
            elif pkg["name"] == "llama_cpp" and shutil.which("llama-server"):
                pkg["installed"] = True
-                pkg["status_note"] = f"native llama-server: {shutil.which('llama-server')}"
-                probe = {"binaries": {"llama-server": shutil.which("llama-server")}, "dists": {}}
+                pkg["status_note"] = (
+                    f"native llama-server: {shutil.which('llama-server')}"
+                )
+                probe = {
+                    "binaries": {"llama-server": shutil.which("llama-server")},
+                    "dists": {},
+                }
            elif pkg["name"] == "vllm":
                _vllm_cli = shutil.which("vllm")
                pkg["installed"] = _vllm_cli is not None
@@ -1007,13 +1215,19 @@ def setup_shell_routes() -> APIRouter:
                    pkg["status_note"] = _package_status_note("vllm", probe)
            else:
                try:
-                    importlib.import_module(pkg["name"])
+                    _import_optional_dependency_for_status(pkg["name"])
                    importlib_metadata.version(_pip_dist_name(pkg))
                    pkg["installed"] = True
                except ImportError:
                    pkg["installed"] = False
                except importlib_metadata.PackageNotFoundError:
                    pkg["installed"] = False
+                except Exception:
+                    # Installed but crashes on import — e.g. a CUDA build of
+                    # llama-cpp-python raising FileNotFoundError when the CUDA
+                    # toolkit dir is absent. One broken optional package must not
+                    # 500 the entire packages panel; report it as not usable.
+                    pkg["installed"] = False

            if pkg.get("installed"):
                update_status = _package_pip_update_status(pkg, probe)
@@ -1037,15 +1251,31 @@ def setup_shell_routes() -> APIRouter:
        """Install a package via pip. Admin only — pip install is effectively code exec."""
        _require_admin(request)
        import sys as _sys
+
        body = await request.json()
        pip_name = body.get("pip")
        if not pip_name:
            return {"ok": False, "error": "No package specified"}
        # Validate against known packages to prevent arbitrary pip install
        known = {
-            "rembg[gpu]", "hf_transfer", "llama-cpp-python[server]", "sglang[all]", "diffusers", "diffusers[torch]",
-            "TTS", "bark", "faster-whisper", "playwright", "realesrgan", "gfpgan",
-            "insightface", "onnxruntime-gpu", "onnxruntime", "hdbscan", "vllm",
+            "rembg[gpu]",
+            "hf_transfer",
+            "llama-cpp-python[server]",
+            "sglang[all]",
+            "diffusers",
+            "diffusers[torch]",
+            "transformers",
+            "TTS",
+            "bark",
+            "faster-whisper",
+            "playwright",
+            "realesrgan",
+            "gfpgan",
+            "insightface",
+            "onnxruntime-gpu",
+            "onnxruntime",
+            "hdbscan",
+            "vllm",
        }
        if pip_name not in known:
            return {"ok": False, "error": f"Unknown package: {pip_name}"}
@@ -1071,6 +1301,7 @@ def setup_shell_routes() -> APIRouter:
        """
        _require_admin(request)
        from routes.cookbook_helpers import _llama_cpp_rebuild_cmd
+
        body = await request.json()
        engine = str(body.get("engine") or "llamacpp").strip()
        if engine != "llamacpp":
@@ -1079,7 +1310,11 @@ def setup_shell_routes() -> APIRouter:
        ssh_port = body.get("ssh_port")
        cmd = _llama_cpp_rebuild_cmd()
        try:
-            argv = (_ssh_base_argv(host, ssh_port) + [cmd]) if host else ["bash", "-lc", cmd]
+            argv = (
+                (_ssh_base_argv(host, ssh_port) + [cmd])
+                if host
+                else ["bash", "-lc", cmd]
+            )
        except ValueError as e:
            raise HTTPException(400, str(e))
        try:
@@ -21,10 +21,44 @@ from src.auth_helpers import get_current_user
 logger = logging.getLogger(__name__)


-_DATA_URL_RE = re.compile(
-    r'^data:image/(?P<fmt>png|jpeg|jpg);base64,(?P<data>.+)$',
-    re.IGNORECASE | re.DOTALL,
-)
+_DATA_URL_RE = re.compile(r"^data:image/png;base64,(?P<data>.+)$", re.IGNORECASE | re.DOTALL)
+_ANY_IMAGE_DATA_URL_RE = re.compile(r"^data:image/[^;]+;base64,", re.IGNORECASE)
+_PNG_MAGIC = b"\x89PNG\r\n\x1a\n"
+_MAX_SIGNATURE_BYTES = 2 * 1024 * 1024
+_MAX_SIGNATURE_B64 = ((_MAX_SIGNATURE_BYTES + 2) // 3) * 4
+_MAX_SIGNATURE_DIMENSION = 4096
+
+
+def _normalize_signature_png(raw: str) -> str:
+    raw = (raw or "").strip()
+    m = _DATA_URL_RE.match(raw)
+    if m:
+        b64 = m.group("data")
+    elif _ANY_IMAGE_DATA_URL_RE.match(raw):
+        raise HTTPException(400, "Signature data must be a PNG image")
+    else:
+        b64 = raw
+    if len(b64) > _MAX_SIGNATURE_B64:
+        raise HTTPException(400, "Signature PNG is too large")
+    try:
+        payload = base64.b64decode(b64, validate=True)
+    except Exception:
+        raise HTTPException(400, "Signature data must be base64-encoded PNG bytes")
+    if not payload:
+        raise HTTPException(400, "Signature PNG is empty")
+    if len(payload) > _MAX_SIGNATURE_BYTES:
+        raise HTTPException(400, "Signature PNG is too large")
+    if not payload.startswith(_PNG_MAGIC):
+        raise HTTPException(400, "Signature data must be a PNG image")
+    return base64.b64encode(payload).decode("ascii")
+
+
+def _signature_dimension(value: Optional[int]) -> Optional[int]:
+    if value is None:
+        return None
+    if not isinstance(value, int) or value < 1 or value > _MAX_SIGNATURE_DIMENSION:
+        raise HTTPException(400, "Signature dimensions are invalid")
+    return value


 class SignatureCreate(BaseModel):
@@ -67,24 +101,18 @@ def setup_signature_routes() -> APIRouter:
    @router.post("/api/signatures")
    async def create_signature(request: Request, req: SignatureCreate) -> Dict[str, Any]:
        user = get_current_user(request)
-        raw = (req.data or "").strip()
-        m = _DATA_URL_RE.match(raw)
-        b64 = m.group("data") if m else raw
-        try:
-            payload = base64.b64decode(b64, validate=True)
-            if not payload:
-                raise ValueError("empty payload")
-        except Exception:
-            raise HTTPException(400, "Signature data must be base64-encoded PNG bytes")
+        b64 = _normalize_signature_png(req.data)
+        width = _signature_dimension(req.width)
+        height = _signature_dimension(req.height)

        sig = Signature(
            id=str(uuid.uuid4()),
            owner=user,
            name=(req.name or "Signature").strip() or "Signature",
            data_png=b64,
-            width=req.width,
-            height=req.height,
-            svg=req.svg,
+            width=width,
+            height=height,
+            svg=None,
        )
        db = SessionLocal()
        try:
@@ -11,6 +11,8 @@ import logging
 import re
 from typing import List, Optional

+import httpx
+
 from fastapi import APIRouter, HTTPException, Request
 from pydantic import BaseModel, Field

@@ -51,6 +53,10 @@ class SkillAddRequest(BaseModel):
    steps: List[str] = Field(default_factory=list)


+class SkillImportUrlRequest(BaseModel):
+    url: str = Field(..., min_length=8, max_length=2000)
+
+
 class SkillUpdateRequest(BaseModel):
    name: Optional[str] = None
    description: Optional[str] = None
@@ -685,8 +691,12 @@ async def _run_skill_test_once(md: str, task: str, url, model, headers, owner) -
        {"role": "user", "content": task},
    ]
    try:
+        # max_tokens explicitly set: passing 0 lets some upstreams (Ollama,
+        # OpenAI-compat) generate an empty completion, which manifested as
+        # the skill test returning nothing while chat (which carries its
+        # preset's max_tokens) worked. 4096 matches the chat default.
        async for chunk in stream_agent_loop(url, model, messages, headers=headers,
-                                             temperature=0.3, max_tokens=0, max_rounds=8, owner=owner):
+                                             temperature=0.3, max_tokens=4096, max_rounds=8, owner=owner):
            if not chunk.startswith("data: ") or chunk.strip() == "data: [DONE]":
                continue
            try:
@@ -1014,7 +1024,7 @@ def _resolve_audit_models(owner=None):
            spec = (get_setting("teacher_model", "") or "").strip()
            if spec:
                from src.ai_interaction import _resolve_model
-                t_url, t_model, t_headers = _resolve_model(spec)
+                t_url, t_model, t_headers = _resolve_model(spec, owner=owner)
                if t_url and t_model:
                    teacher = (t_url, t_model, t_headers)
    except Exception as e:
@@ -1103,6 +1113,35 @@ def setup_skills_routes(skills_manager: SkillsManager) -> APIRouter:
        idx = skills_manager.index_for(owner=user)
        return {"index": idx, "count": len(idx)}

+    @router.get("/slash-catalog")
+    async def get_slash_catalog(request: Request):
+        """Return skills that are available as slash commands.
+
+        Mirrors the agent prompt's published-skill index so the UI never offers
+        a slash command the model would not normally be allowed to discover.
+        """
+        user = _owner(request)
+        all_skills = {s.get("name"): s for s in skills_manager.load(owner=user)}
+        entries = []
+        for s in skills_manager.index_for(owner=user):
+            name = (s.get("name") or "").strip()
+            if not name:
+                continue
+            full = all_skills.get(name) or {}
+            category = (s.get("category") or full.get("category") or "general").strip() or "general"
+            entries.append({
+                "type": "skill",
+                "token": f"/{name}",
+                "name": name,
+                "category": f"Skills / {category}",
+                "help": s.get("description") or full.get("description") or "",
+                "usage": f"/{name} <request>",
+                "uses": int(full.get("uses") or 0),
+                "last_used": full.get("last_used"),
+            })
+        entries.sort(key=lambda row: row["name"])
+        return {"skills": entries, "count": len(entries)}
+
    @router.get("/builtin")
    async def list_builtin_skills(request: Request):
        """Read-only list of the agent's built-in tool capabilities (research,
@@ -1203,6 +1242,36 @@ def setup_skills_routes(skills_manager: SkillsManager) -> APIRouter:
            save_settings(settings)
        return {"ok": True, "name": name, "is_overridden": False}

+    @router.post("/import-from-url")
+    async def import_skill_from_url(request: Request, body: SkillImportUrlRequest):
+        """Install a SKILL.md bundle from a public GitHub URL (skills.sh links supported)."""
+        require_admin(request)
+        user = _owner(request)
+        from services.memory.skill_importer import (
+            SkillImportError,
+            fetch_skill_bundle,
+        )
+
+        try:
+            files, _src = fetch_skill_bundle(body.url.strip())
+            entry = skills_manager.import_bundle_from_files(
+                files,
+                owner=user,
+                source_url=body.url.strip(),
+            )
+        except SkillImportError as e:
+            raise HTTPException(400, str(e)) from e
+        except httpx.HTTPError as e:
+            logger.warning("skill import fetch failed: %s", e)
+            detail = str(e).strip() or "Could not download skill from URL"
+            raise HTTPException(502, detail) from e
+        except Exception as e:
+            logger.error("skill import failed: %s", e)
+            raise HTTPException(500, "Skill import failed") from e
+
+        _fire_skill_added(user)
+        return {"ok": True, "skill": entry, "files": len(files)}
+
    @router.post("/add")
    async def add_skill(request: Request, body: SkillAddRequest):
        user = _owner(request)
@@ -1236,6 +1305,47 @@ def setup_skills_routes(skills_manager: SkillsManager) -> APIRouter:
            _fire_skill_added(user)
        return {"ok": True, "deduped": bool(entry.get("_deduped")), "skill": entry}

+    @router.post("/{skill_id}/invoke")
+    async def invoke_skill(request: Request, skill_id: str):
+        """Build a skill-pinned prompt for slash-command invocation.
+
+        This is intentionally server-side so availability, ownership, and usage
+        accounting use the same rules as the SkillsManager.
+        """
+        user = _owner(request)
+        try:
+            body = await request.json()
+        except Exception:
+            body = {}
+        request_text = (body.get("request") or "").strip() if isinstance(body, dict) else ""
+
+        invokable = {
+            s.get("name"): s for s in skills_manager.index_for(owner=user)
+            if (s.get("name") or "").strip()
+        }
+        match = invokable.get(skill_id)
+        if not match:
+            raise HTTPException(404, "Skill is not available for slash invocation")
+
+        name = match.get("name")
+        md = skills_manager.read_skill_md(name, owner=user)
+        if md is None:
+            raise HTTPException(404, "Skill source unavailable")
+
+        skills_manager.record_use(name, owner=user)
+        message = (
+            "Apply the skill below to my request, following its Procedure / Pitfalls / Verification.\n\n"
+            f"--- BEGIN SKILL ---\n{md}\n--- END SKILL ---\n\n"
+            + (f"Request: {request_text}" if request_text else "Request: (use the skill as appropriate)")
+        )
+        return {
+            "ok": True,
+            "type": "skill",
+            "name": name,
+            "command": f"/{name}",
+            "message": message,
+        }
+
    @router.get("/{skill_id}")
    async def get_skill(request: Request, skill_id: str):
        user = _owner(request)
--- a/Show More
+++ b/Show More