diff --git a/.dockerignore b/.dockerignore index ed30dd73b..aed7e9368 100644 --- a/.dockerignore +++ b/.dockerignore @@ -9,6 +9,7 @@ __pycache__/ dist/ build/ .env +.env.bak.* /data/ /logs/ .git/ diff --git a/.env.example b/.env.example index d8c872bb0..e53d2f8f3 100644 --- a/.env.example +++ b/.env.example @@ -16,6 +16,10 @@ LLM_HOST=localhost # when started with OLLAMA_HOST=0.0.0.0:11434. # OLLAMA_BASE_URL=http://host.docker.internal:11434/v1 +# Optional LM Studio URL. In Docker, host LM Studio is reachable here +# when LM Studio is set to serve on all interfaces (0.0.0.0). +# LM_STUDIO_URL=http://host.docker.internal:1234 + # OpenAI API key (only needed if using OpenAI models). # Do not commit real keys. Keep this commented until needed. # OPENAI_API_KEY=your_openai_api_key_here @@ -59,6 +63,10 @@ SEARXNG_INSTANCE=http://localhost:8080 # Keep false for Docker, LAN, reverse proxy, and any shared deployment. # LOCALHOST_BYPASS=false +# Mark session cookies Secure. Set true when Odysseus is served through HTTPS +# by a trusted reverse proxy or private access gateway. +# SECURE_COOKIES=true + # Optional: pre-seed the first admin password during setup. # Do not commit a real password. # ODYSSEUS_ADMIN_PASSWORD=change_me_before_first_boot @@ -141,7 +149,8 @@ SEARXNG_INSTANCE=http://localhost:8080 # # AMD ROCm (requires ROCm drivers on the host and the GID of the render group): # COMPOSE_FILE=docker-compose.yml:docker/gpu.amd.yml -# RENDER_GID=992 +# Find the render GID with: getent group render | cut -d: -f3 +# RENDER_GID=989 # # These overlays only expose the GPU devices. The slim Odysseus image # still needs CUDA/ROCm userspace via Cookbook -> Dependencies (vLLM, diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 000000000..ae95229ef --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,103 @@ +name: Bug Report +description: Report a reproducible bug in Odysseus. +labels: ["bug"] + +body: + - type: markdown + attributes: + value: | + **Before submitting:** search [open issues](https://github.com/pewdiepie-archdaemon/odysseus/issues) + and [discussions](https://github.com/pewdiepie-archdaemon/odysseus/discussions) first. + Duplicate reports slow things down. + + For security vulnerabilities, **do not open a public issue** — + use [GitHub Security Advisories](https://github.com/pewdiepie-archdaemon/odysseus/security/advisories/new) + and read [SECURITY.md](https://github.com/pewdiepie-archdaemon/odysseus/blob/main/SECURITY.md) first. + + - type: checkboxes + id: prerequisites + attributes: + label: Prerequisites + options: + - label: I searched [open issues](https://github.com/pewdiepie-archdaemon/odysseus/issues?q=is%3Aissue+is%3Aopen) and [discussions](https://github.com/pewdiepie-archdaemon/odysseus/discussions) and did not find an existing report of this bug. + required: true + - label: This is **not** a security vulnerability. (Vulnerabilities go to [GitHub Security Advisories](https://github.com/pewdiepie-archdaemon/odysseus/security/advisories/new) — see [SECURITY.md](https://github.com/pewdiepie-archdaemon/odysseus/blob/main/SECURITY.md).) + required: true + - label: I am running the latest code from `main`. + required: true + + - type: dropdown + id: install-method + attributes: + label: Install Method + options: + - Docker (docker compose up) + - Manual Python install (pip / venv) + - Windows native (launch-windows.ps1) + - macOS app (build-macos-app.sh / start-macos.sh) + - Other (describe in the reproduction steps below) + validations: + required: true + + - type: dropdown + id: os + attributes: + label: Operating System + options: + - Linux + - macOS + - Windows + - Other + validations: + required: true + + - type: textarea + id: steps + attributes: + label: Steps to Reproduce + description: Exact steps that reliably trigger the bug. The more specific, the faster this gets fixed. + placeholder: | + 1. Go to ... + 2. Click / type ... + 3. Observe ... + validations: + required: true + + - type: textarea + id: expected + attributes: + label: Expected Behaviour + description: What should have happened? + validations: + required: true + + - type: textarea + id: actual + attributes: + label: Actual Behaviour + description: What actually happened? Include the full error message if there is one. + validations: + required: true + + - type: textarea + id: logs + attributes: + label: Logs / Screenshots + description: Paste relevant terminal output or attach screenshots. Remove API keys, passwords, and personal data before pasting. + render: text + + - type: input + id: model-backend + attributes: + label: Model / Backend (if relevant) + description: "e.g. Ollama + llama3.2:latest, vLLM + mistral-7b, OpenAI API, Anthropic API" + placeholder: "Ollama + llama3.2:latest" + + - type: textarea + id: additional-info + attributes: + label: Additional Information + description: Anything else that might help — browser console errors, related issues, things you already tried, or environment quirks. + placeholder: | + - Any other context goes here. + - If you are willing to submit a PR that fixes this, mention it here. diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 000000000..da163954f --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,13 @@ +blank_issues_enabled: false +contact_links: + - name: Question / Need Help + url: https://github.com/pewdiepie-archdaemon/odysseus/discussions/categories/q-a + about: Ask how-to questions, setup help, and model configuration questions here. Issues are for confirmed bugs and concrete proposals only. + + - name: Idea or Suggestion + url: https://github.com/pewdiepie-archdaemon/odysseus/discussions/categories/ideas + about: Discuss ideas and gauge interest before opening a formal feature request. If there is already a discussion, link it in your feature request. + + - name: Security Vulnerability + url: https://github.com/pewdiepie-archdaemon/odysseus/security/advisories/new + about: Report vulnerabilities privately via GitHub Security Advisories — never as a public issue. Read SECURITY.md before reporting. diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 000000000..733114bbb --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,90 @@ +name: Feature Request +description: Propose a new feature or a concrete improvement to Odysseus. +labels: ["enhancement"] + +body: + - type: markdown + attributes: + value: | + **Before submitting:** search [open issues](https://github.com/pewdiepie-archdaemon/odysseus/issues) + and [discussions](https://github.com/pewdiepie-archdaemon/odysseus/discussions) first. + Feature requests that duplicate [ROADMAP.md](https://github.com/pewdiepie-archdaemon/odysseus/blob/main/ROADMAP.md) + or an existing open issue will be closed as duplicates. + + If your idea needs community input before it becomes a concrete proposal, + start a [discussion](https://github.com/pewdiepie-archdaemon/odysseus/discussions/categories/ideas) instead. + + - type: checkboxes + id: prerequisites + attributes: + label: Prerequisites + options: + - label: I searched [open issues](https://github.com/pewdiepie-archdaemon/odysseus/issues?q=is%3Aissue+is%3Aopen) and this has not already been proposed. + required: true + - label: I searched [discussions](https://github.com/pewdiepie-archdaemon/odysseus/discussions) and this is not already being debated there. + required: true + - label: This is a concrete, actionable proposal — not a vague "it would be nice if..." request. + required: true + + - type: dropdown + id: area + attributes: + label: Area + description: Which part of the application does this affect? + options: + - Chat / Agent + - Email + - Calendar + - Documents / RAG + - Memory + - Cookbook / Local Models / GPU + - Search + - Notes / Editor + - Auth / Security + - Docker / Deployment + - UI / Frontend + - API / Backend + - MCP + - Testing / CI + - Other + validations: + required: true + + - type: textarea + id: problem + attributes: + label: Problem or Motivation + description: What problem does this solve, or what use case does it enable? Be specific — "it would be better" is not enough. + validations: + required: true + + - type: textarea + id: solution + attributes: + label: Proposed Solution + description: Describe the behaviour or change you want to see. Include API shape, UI sketch, or code snippets if that helps make it concrete. + validations: + required: true + + - type: textarea + id: alternatives + attributes: + label: Alternatives Considered + description: What other approaches did you consider and why did you rule them out? If there is an existing workaround, describe it. + + - type: textarea + id: prior-art + attributes: + label: Prior Art / Related Issues + description: Link any related issues, discussions, or external references that informed this proposal. + + - type: dropdown + id: willing_to_implement + attributes: + label: Are you willing to implement this? + options: + - "Yes — I can open a PR" + - "Partially — I can help but need guidance" + - "No — I am only filing the request" + validations: + required: true diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 000000000..8afee6d88 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,53 @@ +## Summary + + + +## Linked Issue + + + +Fixes # + +## Type of Change + +- [ ] Bug fix (non-breaking — fixes a confirmed issue) +- [ ] New feature (non-breaking — adds new behaviour) +- [ ] Breaking change (changes or removes existing behaviour) +- [ ] Refactor / cleanup (behaviour unchanged) +- [ ] Documentation only +- [ ] CI / tooling / configuration + +## Checklist + +- [ ] I searched [open issues](https://github.com/pewdiepie-archdaemon/odysseus/issues) and [open PRs](https://github.com/pewdiepie-archdaemon/odysseus/pulls) — this is not a duplicate. +- [ ] This PR targets `main` +- [ ] My changes are limited to the scope described above — no unrelated refactors or whitespace changes mixed in. +- [ ] I actually ran the app (`docker compose up` or `uvicorn app:app`) and verified the change works end-to-end. Type-checks and unit tests are not enough. + +## How to Test + + + +1. +2. +3. + +## Visual / UI changes — REQUIRED if you touched anything that renders + +**Anything that changes what the UI looks like — buttons, icons, padding, colors, fonts, spacing, layout, CSS, HTML, SVG, or any `static/js/` module that draws to the DOM — needs all of the following. PRs that change rendering without these WILL be closed.** + +- [ ] **Screenshot or short clip** of the change in the running app, attached below. Mobile screenshot too if the change affects mobile. +- [ ] **Style match**: the change uses Odysseus's existing visual language. Specifically: + - Reuse existing CSS variables (`--red`, `--fg`, `--bg`, `--card`, `--border`, etc.) — do not introduce new color values, font sizes, or spacing units. + - Reuse existing button/input/card/border classes. Don't invent parallel styling. + - **No Unicode emoji in UI or code.** Use inline SVG (matching the monochrome icon style already in `static/index.html`) or plain text. + - Monospaced font (`Fira Code`) for primary UI text. Don't override. + - Dark theme is the default; any light-mode work must be wired through the existing theme system, not hard-coded. +- [ ] **No new component patterns.** If a similar widget already exists in the app, extend it instead of writing a parallel one. +- [ ] **I am not an LLM agent submitting a bulk PR.** If you are, please open an issue describing the problem first — bulk auto-generated PRs that don't match the project's visual style are closed on sight, even when the underlying fix is correct. + +### Screenshots / clips + + diff --git a/.gitignore b/.gitignore index cba02b209..c48f6cd61 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ venv/ # Environment .env +.env.bak.* !.env.example # Data — all user data stays local diff --git a/ACKNOWLEDGMENTS.md b/ACKNOWLEDGMENTS.md index c4079e6e5..fdf55c48a 100644 --- a/ACKNOWLEDGMENTS.md +++ b/ACKNOWLEDGMENTS.md @@ -33,8 +33,8 @@ The full license texts are kept in [`licenses/`](licenses/). - **[Tongyi DeepResearch](https://github.com/Alibaba-NLP/DeepResearch)** by **Alibaba-NLP / Tongyi Lab** — the multi-step deep-research agent pipeline. Copyright © Alibaba-NLP / Tongyi Lab. **Apache-2.0.** Adapted for Odysseus's - Deep Research feature (`api/research_*.py`, `routes/research_routes.py`, - `services/search/`). Full text in + Deep Research feature (`services/research/`, `src/research_handler.py`, + `routes/research_routes.py`, `services/search/`). Full text in [`licenses/DeepResearch-Apache-2.0.txt`](licenses/DeepResearch-Apache-2.0.txt). --- @@ -47,7 +47,7 @@ just composed. | Service | Image | Purpose | License | |---|---|---|---| -| [SearXNG](https://github.com/searxng/searxng) | `searxng/searxng:latest` | Default metasearch backend | AGPL-3.0 | +| [SearXNG](https://github.com/searxng/searxng) | `searxng/searxng:2026.5.31-7159b8aed` (pinned tag; see compose) | Default metasearch backend | AGPL-3.0 | | [ChromaDB](https://github.com/chroma-core/chroma) | `chromadb/chroma:latest` | Vector store for memory / RAG | Apache-2.0 | | [ntfy](https://github.com/binwiederhier/ntfy) | `binwiederhier/ntfy` | Push notifications (self-hosted reminders) | Apache-2.0 / GPL-2.0 | @@ -118,6 +118,7 @@ Core (`requirements.txt`) and optional (`requirements-optional.txt`): | croniter | MIT | | pytest / pytest-asyncio | MIT / Apache-2.0 | | duckduckgo-search (optional) | MIT | +| markitdown (optional — Office/EPUB text extraction) | MIT | | **PyMuPDF** *(optional — form-filling only)* | **AGPL-3.0** — see note below | ## Companion services (interoperated with, not bundled) @@ -152,6 +153,9 @@ concerns from earlier are resolved: deployment (Artifex also sells a commercial PyMuPDF license that lifts this). - **`caldav`** (Python lib) is **dual-licensed GPL-3.0-or-later OR Apache-2.0**. Odysseus uses it under **Apache-2.0**, which is permissive and MIT-compatible. +- **`markitdown`** (Microsoft) is **MIT** and used only as an *optional* dependency for Office/EPUB text + extraction (`src/markitdown_runtime.py`), lazy-imported with graceful fallback — the MIT core runs without + it. The cloud `az-doc-intel` extra is deliberately **not** installed, keeping extraction fully local. --- diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 01ed77b71..bdca56bb6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -57,12 +57,32 @@ Good pull requests usually include: - A short explanation of the bug or feature. - The files or areas changed. -- Manual test steps or automated test results. +- Manual test steps or automated test results from running the actual app, not just the test suite. - Screenshots or short recordings for UI changes. - Links to related issues, for example `Fixes #123`. Please keep PRs small. Large PRs that mix unrelated cleanup, formatting, refactors, and behavior changes are much harder to review. +> **Auto-generated PRs.** If you are running an LLM agent (Devin, Cursor, OpenHands, Claude Code, etc.) against this repo: please open an issue describing the problem first instead of opening a PR directly. Bulk agent-generated PRs that don't match the project's visual style or contribution format will be closed without review, even when the underlying fix is correct. + +## Style and visual changes + +Odysseus has an intentional visual style. PRs that ignore it will be closed without merge, no matter how correct the underlying code is. + +Before submitting any change that affects what the app looks like — buttons, icons, fonts, colors, spacing, layout, CSS, HTML, SVG, or any `static/js/` module that draws to the DOM — please: + +1. **Run the app locally** and view the change in a browser. Type-checks and unit tests are not enough. +2. **Attach a screenshot or short clip** of the change in the running app. Add a mobile screenshot too if the change affects mobile. +3. **Match the existing visual language.** Specifically: + - Reuse existing CSS variables (`--red`, `--fg`, `--bg`, `--card`, `--border`, …). Do not introduce new color values, font sizes, or spacing units. + - Reuse existing button, input, card, and border classes. Don't invent parallel styling for similar widgets. + - **No Unicode emoji in UI or code.** Use inline SVG (matching the monochrome icon style already in `static/index.html`) or plain text. + - Monospaced font (`Fira Code`) for primary UI text. Don't override. + - Dark theme is the default; any light-mode work goes through the existing theme system, not hard-coded. +4. **Don't add parallel components.** If a similar widget already exists in the app, extend it instead of writing a new one. + +If you are unsure whether a change is "visual," it is. Default to attaching a screenshot. + ## Issue Reports For bugs, include: diff --git a/README.md b/README.md index 64c54b5e8..d02c13964 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,10 @@ # Odysseus + +``` ─────────────────────────────────────────────── ⊹ ࣪ ˖ ૮( ˶ᵔ ᵕ ᵔ˶ )っ Odysseus vers. 1.0 ─────────────────────────────────────────────── +``` ![Odysseus](docs/odysseus.jpg) @@ -77,8 +80,10 @@ python setup.py python -m uvicorn app:app --host 127.0.0.1 --port 7000 ``` Requirements: Python 3.11+. Cookbook also needs `tmux` for background model -downloads and serves. Use `--host 0.0.0.0` only when you intentionally want -LAN/reverse-proxy access. +downloads and serves. The app itself is lightweight; local model serving is the +heavy part and depends on the model, runtime, GPU, and VRAM, so small hosts can +connect to API or remote model servers instead. Use `--host 0.0.0.0` only when +you intentionally want LAN/reverse-proxy access. ### Apple Silicon Docker on macOS cannot use the Metal GPU. For GPU-accelerated Cookbook on an @@ -90,7 +95,18 @@ cd odysseus ./start-macos.sh ``` -It launches at `http://127.0.0.1:7860`. To build a clickable app wrapper: +It launches at `http://127.0.0.1:7860`. To expose it to your phone over a trusted LAN/VPN such as Tailscale, bind all interfaces: + +```bash +ODYSSEUS_HOST=0.0.0.0 ./start-macos.sh +# then open http://:7860 +``` + +The script also reads `.env` at startup, so `APP_BIND=0.0.0.0` and `APP_PORT` +set there are picked up automatically without a command-line override each run. + +Keep `AUTH_ENABLED=true` (the default) before binding outside loopback. Do not +expose this port directly to the public internet. To build a clickable app wrapper: ```bash ./build-macos-app.sh @@ -117,21 +133,82 @@ Odysseus SSH key and add the public key to the remote server's ssh-copy-id -i data/ssh/id_ed25519.pub user@server ``` -**NVIDIA / AMD Docker GPU overlays.** Install the host runtime first, then add -one of these to `.env`: +**Docker GPU overlays.** CPU-only users can skip this section. Cookbook can +only detect GPUs that Docker exposes to the container — if the host runtime or +device passthrough is not configured, Cookbook sees the iGPU, another card, or +CPU instead of your intended GPU. + +For NVIDIA, `scripts/check-docker-gpu.sh` diagnoses GPU passthrough and can +optionally install the host runtime or update `.env`. + +```bash +# Read-only diagnostic (default — installs nothing, never edits .env): +scripts/check-docker-gpu.sh + +# Print OS-specific install commands without running them: +scripts/check-docker-gpu.sh --print-install-commands + +# Install NVIDIA Container Toolkit on Ubuntu/Debian (requires sudo): +scripts/check-docker-gpu.sh --install-nvidia-toolkit + +# Write COMPOSE_FILE to .env (only when GPU passthrough is confirmed working): +scripts/check-docker-gpu.sh --enable-nvidia-overlay + +# Full assisted setup — install toolkit, then enable overlay if passthrough works: +scripts/check-docker-gpu.sh --install-nvidia-toolkit --enable-nvidia-overlay +``` + +Safety notes: +- The app never installs host GPU runtime automatically. +- The app never edits `.env` automatically. +- `.env` is only modified when `--enable-nvidia-overlay` is explicitly passed, + and only after GPU passthrough succeeds. `--yes` skips prompts but does not + bypass the passthrough gate. +- `.env.bak.*` backups created by `--enable-nvidia-overlay` are ignored by + Git and the Docker build context. + +To enable manually without the script, add this to `.env`: ```bash COMPOSE_FILE=docker-compose.yml:docker/gpu.nvidia.yml -COMPOSE_FILE=docker-compose.yml:docker/gpu.amd.yml ``` -Verify with: +**AMD / ROCm.** AMD setup is read-only diagnostic plus manual `.env` edit. Run: ```bash -docker compose exec odysseus nvidia-smi -L -docker compose exec odysseus rocm-smi +scripts/check-docker-amd-gpu.sh ``` +Then add the reported values to `.env`, replacing `RENDER_GID` with your host's +numeric render group id: + +```bash +COMPOSE_FILE=docker-compose.yml:docker/gpu.amd.yml +RENDER_GID=989 +``` + +For NVIDIA/AMD GPU support, also read the comments in the selected overlay file: docker/gpu.nvidia.yml or docker/gpu.amd.yml. + +Verify after enabling either overlay: + +```bash +docker compose exec odysseus nvidia-smi -L # NVIDIA +docker compose exec odysseus sh -lc 'test -e /dev/kfd && test -d /dev/dri && ls -l /dev/kfd /dev/dri/renderD*' # AMD +``` + +> **GPU passthrough ≠ llama.cpp CUDA.** `nvidia-smi` passing inside the +> container confirms Docker GPU access, but llama.cpp also needs `cudart` and +> the CUDA Toolkit at runtime. If Cookbook logs show `Unable to find cudart +> library`, `Could NOT find CUDAToolkit`, `CUDA Toolkit not found`, or +> tensors/layers assigned to CPU, that is a Cookbook/llama.cpp build issue — +> not a Docker passthrough failure. Re-install the serve engine via +> **Cookbook → Dependencies** to get a CUDA-enabled build. +> +> The same split applies to AMD/ROCm: seeing `/dev/kfd` and `/dev/dri` inside +> the container confirms device passthrough, not ROCm userspace or a +> ROCm-enabled vLLM/llama.cpp build. `rocm-smi` and `rocminfo` are not expected +> inside the slim Odysseus image. + **Ollama with Docker.** If Ollama runs on the host, add this endpoint in Settings: @@ -145,6 +222,13 @@ Ollama must listen outside its own loopback interface: OLLAMA_HOST=0.0.0.0:11434 ollama serve ``` +This connects Odysseus in Docker to an Ollama server that is already running on +your host machine; it does not start Ollama inside the container. +`host.docker.internal` is Docker's hostname for the host machine from inside the +container. Cookbook **Serve** is a separate workflow for serving downloaded +models through Odysseus/llama.cpp, so Windows users with an existing Ollama +install usually only need to add the endpoint in Settings. + **Useful checks.** ```bash @@ -176,13 +260,16 @@ Or do it by hand: ```powershell git clone https://github.com/pewdiepie-archdaemon/odysseus.git cd odysseus -python -m venv venv +py -3.11 -m venv venv venv\Scripts\Activate.ps1 pip install -r requirements.txt python setup.py python -m uvicorn app:app --host 127.0.0.1 --port 7000 ``` +If `python` points at an older interpreter, use `py -3.12` (or another installed +3.11+ version) for the venv step. + **Requirements:** Python 3.11+. The core app (chat, agent, memory, documents, email, calendar, deep research) runs fully native. For full **Cookbook** background model downloads and the agent shell tool, also install @@ -194,31 +281,77 @@ Local GPU *serving* of vLLM/SGLang needs Linux/WSL2; for a local model on Window Open `http://localhost:7000`, log in with the generated admin password, and configure everything else inside **Settings**. +## Troubleshooting & Advanced Setup + +### `chromadb-client` conflicts with embedded ChromaDB +If `chromadb-client` (the lightweight HTTP-only package) is installed alongside the full `chromadb` package, Odysseus starts but ChromaDB silently falls back to HTTP-only mode and fails. + +**Fix:** uninstall `chromadb-client` and force-reinstall the full package: +```bash +./venv/bin/pip uninstall chromadb-client -y +./venv/bin/pip install --force-reinstall chromadb +``` + +### HTTPS + LAN/Tailscale exposure +To expose Odysseus on a local network or Tailscale with HTTPS: +1. Change the bind address to `0.0.0.0` in `.env` (`APP_BIND=0.0.0.0` or `ODYSSEUS_HOST=0.0.0.0`). +2. Generate a locally-trusted cert for your LAN/Tailscale IPs using [mkcert](https://github.com/FiloSottile/mkcert): + ```bash + mkcert -install + mkcert -cert-file cert.pem -key-file key.pem 192.168.1.100 tailscale-ip + ``` +3. Run `uvicorn` with the generated certs: + ```bash + python -m uvicorn app:app --host 0.0.0.0 --port 7000 --ssl-certfile=cert.pem --ssl-keyfile=key.pem + ``` +4. Install the `mkcert` CA on any other device you want to access Odysseus from (e.g., for iOS, email the `rootCA.pem` to yourself, install the profile, and trust it in Certificate Trust Settings). + +### Optional Dependencies +`requirements-optional.txt` contains packages that unlock extra features. It is not installed by default. + +| Package | Feature unlocked | +|---------|-----------------| +| `faster-whisper` | Local speech-to-text (microphone -> text) via the "local" STT provider. | +| `duckduckgo-search` | DuckDuckGo as a search provider option. | +| `PyMuPDF` | PDF page rendering in the side viewer panel and form-filling. (Note: AGPL-3.0) | +| `markitdown` | Office/EPUB document text extraction (converts .docx/.xlsx/.pptx/.xls/.epub to Markdown). | + ## Security Notes Odysseus is a self-hosted workspace with powerful local tools: shell access, file uploads, model downloads, web research, email/calendar integrations, and API tokens. Treat it like an admin console. - Keep `AUTH_ENABLED=true` for any network-accessible deployment. -- Do not expose it directly to the public internet without HTTPS and a trusted reverse proxy. -- Keep `data/`, `.env`, logs, databases, and uploaded/generated media out of Git. They are ignored by default. +- Keep `LOCALHOST_BYPASS=false` outside local development. +- Use `SECURE_COOKIES=true` when Odysseus is served through HTTPS by a trusted reverse proxy or private access gateway. +- Do not expose it directly to the public internet without HTTPS and a trusted reverse proxy or private access layer. +- Keep `.env`, `data/`, `logs/`, databases, uploads, generated media, backups, auth/session files, API keys, and model/provider tokens out of Git and private shares. They are ignored by default. - Review `data/auth.json` after first boot: disable open signup unless you intentionally want it, make only your own account admin, and keep demo/test accounts non-admin. - Non-admin users do not get shell/Python/file read/write by default, and admin-only routes/tools such as MCP management, API tokens, webhooks, model/cookbook serving, backup/vault, and app settings are admin-gated. Other features are controlled by per-user privileges, so review each user's privileges before exposing a deployment. - Rotate any API keys or tokens that were ever pasted into a shared chat, demo, screenshot, or log. - If you enable API tokens or webhooks, create separate tokens per integration and delete unused ones. - Prefer binding manual development runs to `127.0.0.1`; bind to `0.0.0.0` only when you intentionally want LAN/reverse-proxy access. +- Keep ChromaDB, SearXNG, ntfy, Ollama, vLLM, llama.cpp, databases, and raw model/provider APIs internal-only. Expose only the authenticated Odysseus web/API entrypoint through your trusted proxy or private access layer. - Before publishing a fork, run `git status --short` and confirm no private files from `.env`, `data/`, `logs/`, uploads, backups, or local databases are staged. -### Putting it behind HTTPS -Odysseus serves plain HTTP on its port. That's fine for `localhost` and trusted LAN/VPN use, but browsers will warn ("Password fields present on an insecure page") and the login + API tokens travel in cleartext. For anything reachable outside your machine — including a Tailscale IP shared with other devices — put a TLS-terminating reverse proxy in front. +### Private or proxied deployments +Odysseus serves plain HTTP on its app port. Docker Compose binds Odysseus and the bundled services to `127.0.0.1` by default, so a typical production/private setup is: -Shortest path with [Caddy](https://caddyserver.com/) (auto-renews Let's Encrypt certs): +1. Keep Odysseus on localhost, for example `127.0.0.1:7000`. +2. Terminate HTTPS at a trusted reverse proxy or private access gateway. +3. Put the authenticated Odysseus web/API entrypoint behind that layer. +4. Keep raw service and model ports internal-only. -```caddy -odysseus.example.com { - reverse_proxy localhost:7000 -} -``` +Cloudflare Access, Tailscale, Caddy, nginx, and Traefik can all fit this pattern; none are required by Odysseus. If your access layer reaches Odysseus on the same host, proxy to `http://127.0.0.1:7000` and keep `AUTH_ENABLED=true`, `LOCALHOST_BYPASS=false`, and `SECURE_COOKIES=true`. -For a LAN-only Tailscale deployment, Caddy + [tailscale-cert](https://caddyserver.com/docs/caddyfile/options#auto-https) or the built-in MagicDNS HTTPS feature both work. nginx/Traefik configs are similar — proxy `localhost:7000`, terminate TLS at the proxy. Once that's in place, the browser warning goes away and your login is encrypted. +Common internal-only ports from the default docs/compose setup: + +| Port | Service | +|---|---| +| `7000` | Odysseus raw app port | +| `8080` | SearXNG | +| `8091` | ntfy | +| `8100` | ChromaDB host port for manual/compose access | +| `11434` | Ollama | +| `8000-8020` | Common local model/provider APIs | ## Contributing Help is welcome. The best entry points are fresh-install testing, provider setup @@ -241,6 +374,7 @@ Key settings: | `APP_PORT` | `7000` | Docker Compose host port for the web UI. | | `AUTH_ENABLED` | `true` | Enable/disable login | | `LOCALHOST_BYPASS` | `false` | Development-only auth bypass for loopback requests. Keep false for shared/network deployments. | +| `SECURE_COOKIES` | `false` | Set true when serving Odysseus through HTTPS at a trusted proxy or private access gateway. | | `DATABASE_URL` | `sqlite:///./data/app.db` | Database connection string | | `CHROMADB_HOST` | `localhost` | ChromaDB host for vector memory. Docker overrides this to `chromadb`. | | `CHROMADB_PORT` | `8100` | ChromaDB port for manual host runs. Docker overrides this to `8000`. | diff --git a/ROADMAP.md b/ROADMAP.md index aa79c3088..7c59c1f6a 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -1,6 +1,6 @@ # Roadmap / Help Wanted -Odysseus is on a voyage, but not home yet. It works great for me (lol), but this is ship is moving fast and feedback/help would be appreciated! (I dont know what I'm doing hlep). +Odysseus is on a voyage, but not home yet. It works great for me (lol), but this ship is moving fast and feedback/help would be appreciated! (I don't know what I'm doing, help). If you see weird CSS, strange layout behavior, or a suspiciously murky corner of the codebase, you are probably right to stay away. @@ -8,25 +8,60 @@ the codebase, you are probably right to stay away. ## High Priority - SQUASH BUGS -- Fresh Docker install smoke tests on Linux, macOS, and Windows!! +- Fresh install smoke tests on Linux, macOS, and Windows. Docker, native Python, + and WSL all need coverage. - Integration audit: do integrations even work? Confirm what works, what needs setup docs, and what should be removed or hidden. - Self-host troubleshooting cookbook. Document the weird 30-second fixes that otherwise become 30-minute searches: Dovecot cleartext auth for local stacks, ntfy Android Instant Delivery for non-ntfy.sh servers, clipboard limits on plain-HTTP Tailscale URLs, Radicale collection URLs, and similar traps. - Cookbook reliability on other computers. This is probably the area most likely to need work across different machines, GPUs, drivers, shells, and Python environments. -- Tile/window management correctness. I had to brute force my way a bit here, I'm aware, popups, dropdowns, and fixed-position UI inside transformed modals can land in the wrong place. -- Esc button, it's small but a lot of windows that arent still close on esc and alot of them doesnt. -- Skill audit, how does your model respond to skill injection, does it follow? Does its parsing miss? +- Cookbook SGLang support across platforms. Make sure SGLang setup/serve works + predictably on Linux, Windows/WSL, macOS where possible, Docker, and common + NVIDIA/AMD hardware paths. +- Deep Research model presets by hardware. Recommend approved model/parameter + profiles for small, medium, and large local setups so people with different + hardware can use Deep Research without guessing. Surface this either in Deep + Research settings or as a Cookbook scan/dropdown suggestion. +- Cookbook model scan/download ranking. Prioritize newer architectures and + better hardware-fit models instead of scoring everything almost the same. + Ranking should account for architecture age, quant format, VRAM/RAM fit, + backend support, vision/mmproj requirements, and likely serve reliability. +- Cookbook error feedback and logging. Failed downloads, dependency installs, + preflights, and serve jobs should show the actual command/output/error in the + UI, with copyable logs and clear next steps instead of just "crashed". +- Agent prompt/context bloat. Agent mode is too heavy for smaller local models: + tool schemas, skills, memory, documents, and instructions can eat the context + before the user request really starts. We need slimmer prompts, better tool + selection, smaller default tool sets, and clearer guidance for models with + 4k/8k/16k context windows. +- Skill/tool prompt-injection audit. User-editable skills, notes, documents, + fetched pages, and memories should be treated as untrusted data. Keep testing + whether models follow malicious instructions from those surfaces. - Better degraded-state reporting for ChromaDB, SearXNG, email, ntfy, and provider probes. +- Email performance audit. Fetching, searching, opening, deleting, and sending + email can feel slow, especially over IMAP/SMTP providers with high latency. + Need someone who knows mail performance to profile the current flow, identify + whether the bottleneck is IMAP folder select/fetch, cache invalidation, + attachment/body loading, SMTP handshakes, or frontend refresh behavior, then + propose safer caching/prefetch/batching without breaking multi-account state. - Provider setup/probing audit for Anthropic, Gemini, Groq, xAI, OpenRouter, OpenAI, and DeepSeek. ## Refactor Targets - CSS cleanup. `static/style.css` basically Calypso's island atm. - Tour core helper. The onboarding tours have too much copy-pasted scaffolding; promote a shared `tour-core.js` helper before adding more tours. +- Modal/window positioning cleanup. Some window controls have improved, but the + underlying popup/dropdown/fixed-position behavior is still too fragile. - Mobile media override discoverability. A lot of "CSS did not move" bugs are mobile `@media` overrides of the same selector; comments or linting around desktop/mobile paired rules would help. - Dead code pass for old routes, stale feature flags, and unused UI states. ## Frontend +- Expand the Editor for quicker, more robust everyday use. Better file/document + handling, smoother window behavior, clearer save/export flows, stronger image + editing affordances, and fewer brittle edge cases. +- Better AI integration for Notes and Todos. Notes should be easier for the + agent to read, update, summarize, and turn into actions. Todos should be + assignable to an agent from the UI, possibly through a button, task action, + or dedicated skill/tool flow. - Mobile gallery/editor polish. Easier to launch/download inpaint model or any missing pieces. - Accessibility pass: keyboard navigation, focus states, contrast, reduced motion. - Improve empty states and error messages on fresh installs. diff --git a/SECURITY.md b/SECURITY.md index 2cca34be9..1fa5b0b3b 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -8,16 +8,20 @@ Security fixes are handled on the default branch until formal releases are cut. ## Deployment Guidance -- Keep `AUTH_ENABLED=true`. +- Keep `AUTH_ENABLED=true` for any network-accessible deployment. +- Keep `LOCALHOST_BYPASS=false` outside local development. +- Set `SECURE_COOKIES=true` when Odysseus is served through HTTPS by a trusted reverse proxy or private access gateway. - Use HTTPS when exposing the app beyond localhost. -- Put the app behind a trusted reverse proxy or private network. -- Protect `.env`, `data/`, logs, uploaded files, generated media, and database files. +- Put the authenticated Odysseus web/API entrypoint behind a trusted reverse proxy or private access layer such as Cloudflare Access, Tailscale, or a VPN. +- Keep ChromaDB, SearXNG, ntfy, Ollama, vLLM, llama.cpp, databases, and raw model/provider APIs internal-only. +- Protect `.env`, `data/`, `logs/`, uploads, generated media, backups, auth/session files, database files, API keys, and model/provider tokens. - Disable open signup unless you intentionally want new accounts. - Keep demo/test users non-admin, and remove them entirely on serious deployments. - Give admin accounts strong passwords and enable 2FA where possible. - Leave high-risk agent tools restricted to admins: shell, Python, file read/write, email send/read, MCP, app API, task/skill/memory management, settings, tokens, and model serving. - Rotate API keys, webhook secrets, and Odysseus API tokens if they appear in logs, screenshots, demos, or shared chats. - Treat shell, model-serving, MCP, email, calendar, and vault features as privileged admin functionality. +- Common internal-only ports are Odysseus `7000`, SearXNG `8080`, ntfy `8091`, ChromaDB `8100`, Ollama `11434`, and local model/provider APIs such as `8000-8020`. ## Publishing A Fork @@ -29,7 +33,7 @@ git check-ignore -v .env data/auth.json data/app.db logs/compound.log odysseus.d git grep -n -I -E "(sk-[A-Za-z0-9_-]{20,}|xox[baprs]-|AIza[0-9A-Za-z_-]{20,}|Bearer [A-Za-z0-9._~+/-]{20,})" -- . ':!static/lib/**' ':!package-lock.json' ``` -Only `.env.example`, docs, source, tests, and static assets should be committed. Never commit live `data/` contents, local databases, uploaded files, generated media, logs, backups, API keys, password hashes, or personal documents. +Only `.env.example`, docs, source, tests, and static assets should be committed. Never commit live `.env` values, `data/` contents, local databases, uploaded files, generated media, logs, backups, auth/session files, API keys, model/provider tokens, password hashes, or personal documents. ## Reporting diff --git a/THREAT_MODEL.md b/THREAT_MODEL.md new file mode 100644 index 000000000..48665a61d --- /dev/null +++ b/THREAT_MODEL.md @@ -0,0 +1,81 @@ +# Threat Model + +Odysseus is a **self-hosted AI workspace with privileged local access**. This document states the trust boundary so contributors can reason about security decisions without reading through the full auth and middleware stack. + +## Trust Boundary + +Odysseus is designed for **trusted users on a private network**, not public exposure. The README describes it as "treat it like an admin console" — that framing is accurate. A logged-in admin can execute shell commands, read and write files, send email, and control model serving. This is intentional. The threat model does not try to prevent admins from doing these things. It does try to prevent: + +- Unauthenticated access +- Non-admins reaching admin-only capabilities +- The AI agent acting on instructions injected through untrusted content (web results, emails, fetched pages, memories) +- Internal services (ChromaDB, Ollama, SearXNG, etc.) being reachable from outside the host + +## Roles and Capabilities + +| Capability | Admin | Non-admin (default) | +|---|---|---| +| Chat with agent | ✓ | ✓ | +| Browser tool | ✓ | ✓ | +| Documents | ✓ | ✓ | +| Research mode | ✓ | ✓ | +| Image generation | ✓ | ✓ | +| Memory management | ✓ | ✓ | +| Shell / Python execution | ✓ | ✗ | +| File read / write | ✓ | ✗ | +| Email send / read | ✓ | ✗ | +| MCP tools | ✓ | ✗ | +| Calendar management | ✓ | ✗ | +| Token / webhook management | ✓ | ✗ | +| Model serving | ✓ | ✗ | +| Vault | ✓ | ✗ | +| Settings | ✓ | ✗ | + +Non-admin defaults are in `core/auth.py:DEFAULT_PRIVILEGES`. Tool enforcement is in `src/tool_security.py:NON_ADMIN_BLOCKED_TOOLS`. Any tool whose name starts with `mcp__` is also blocked for non-admins. Admins always get full access regardless of stored privilege values. + +## Authentication + +- **Sessions:** bcrypt passwords, 7-day session tokens stored atomically in `data/sessions.json` via `core/atomic_io.py`. +- **2FA:** TOTP with 8 single-use backup codes. Verified after password check, before session issuance. +- **Reserved usernames:** `internal-tool`, `api`, `demo`, `system` cannot be registered or renamed into. Defined in `core/auth.py:RESERVED_USERNAMES`. + - `internal-tool` is security-critical: `core/middleware.py:require_admin` treats any request where `request.state.current_user == "internal-tool"` as the in-process tool loopback and grants admin unconditionally. A real account with that name would silently pass every `require_admin` check. +- **Orphan sessions:** `validate_token` re-checks that the user record still exists on every call. A deleted user's cookie is dropped on next request rather than continuing to authenticate. + +## Internal Tool Loopback + +Agent tool calls reach admin-gated HTTP routes over an in-process HTTP loopback. The mechanism: + +1. At app startup, `core/middleware.py` generates a random `INTERNAL_TOOL_TOKEN` via `secrets.token_hex(32)`. It is never persisted and never sent to clients. +2. Loopback requests carry `X-Odysseus-Internal-Token: ` or have `request.state.current_user` already set to `"internal-tool"` by the auth middleware. +3. `require_admin` recognises either signal and grants access without checking the session user. + +The agent may be running in a non-admin user's session, but tool dispatch first calls `src/tool_security.py:owner_is_admin_or_single_user` to verify the session owner is an admin before issuing any loopback call. Non-admin users cannot invoke admin tools even via the agent. + +## Prompt-Injection Hardening + +External content that reaches the LLM is treated as untrusted via `src/prompt_security.py`: + +- `untrusted_context_message(label, content)` wraps the content in a `user`-role message with a header block instructing the model not to follow instructions inside it. Content goes in as data, not as a system instruction. +- `UNTRUSTED_CONTEXT_POLICY` is a system-prompt preamble that states the same policy at the top of every session where untrusted data may appear. + +**Untrusted surfaces that must go through this wrapper:** web search results, fetched URLs, emails (read), saved memories, skill text, notes, and any tool output sourced from outside the server. Injecting untrusted content directly into the system role is a security bug. + +## Security Headers + +`core/middleware.py:SecurityHeadersMiddleware` sets headers on every response: + +- `X-Frame-Options: DENY` + `frame-ancestors 'none'` on all routes except tool-render iframes (which are sandboxed at the HTML level). +- `X-Content-Type-Options: nosniff` and `Referrer-Policy: no-referrer` everywhere. +- **CSP:** nonce-based `script-src 'self' 'nonce-{nonce}' https://cdn.jsdelivr.net`. `style-src 'unsafe-inline'` is intentionally kept — `static/index.html` ships inline ` +
+

Pair a device

+

Generate a one-time pairing code (a chat-scoped API token) for a LAN client.

+
+ +
+

Admin only. Each code mints a new token, shown once. Manage or revoke under Settings → API tokens.

+
""" + return HTMLResponse(page) + + @router.post("/pair") + def pair_create(request: Request): + """Mint a pairing code. Admin-cookie only; CSRF-safe because the + SameSite=Lax session cookie is not sent on a cross-site POST (same + protection as POST /api/tokens). Minting invalidates the token cache so + the code works immediately, no restart. `?format=json` returns the + payload for an in-app pairing screen.""" + require_admin(request) + owner = get_current_user(request) + invalidate = getattr(request.app.state, "invalidate_token_cache", None) + token_id, raw_token = mint_pairing_token(owner, invalidate) + + hosts = _pairing.lan_ip_candidates() + host = hosts[0] if hosts else "127.0.0.1" + port = request.url.port or _pairing.default_port() + payload = _pairing.pairing_payload(host, port, raw_token) + qr = _pairing.pairing_qr_png_data_uri(payload) + qr_ok = bool(qr and qr.startswith("data:image/png;base64,")) + + if (request.query_params.get("format") or "").lower() == "json": + return { + "host": host, + "port": port, + "token": raw_token, + "token_id": token_id, + "hosts": hosts, + "payload": payload, + "qr": qr if qr_ok else None, + } + + import json as _json + payload_json = _json.dumps(payload, separators=(",", ":")) + # Only ever emit a known PNG data-URI into the src; every other value is + # html.escaped. + qr_block = ( + f'Pairing QR' + if qr_ok else "

QR rendering unavailable -- enter the details manually.

" + ) + page = f""" + +Pairing code + +
+

Pairing code

+ {qr_block} +
Host: {html.escape(host)}
+
Port: {html.escape(str(port))}
+
Token: {html.escape(raw_token)}
+
Payload: {html.escape(payload_json)}
+

Shown once. This grants chat access to your Odysseus; revoke it + in Settings → API tokens (id {html.escape(token_id)}). The + device must be on the same network, and the server must bind to your LAN.

+
""" + return HTMLResponse(page) + + return router diff --git a/core/auth.py b/core/auth.py index 1e68a721b..54635d829 100644 --- a/core/auth.py +++ b/core/auth.py @@ -266,7 +266,8 @@ class AuthManager: renamed_sessions = 0 with self._sessions_lock: for sess in self._sessions.values(): - if (sess or {}).get("username") == old_username: + sess_user = str((sess or {}).get("username") or "").strip().lower() + if sess_user == old_username: sess["username"] = new_username renamed_sessions += 1 if renamed_sessions: @@ -375,7 +376,10 @@ class AuthManager: return True # 2FA not enabled, always pass secret = user.get("totp_secret") if not secret: - return True + # 2FA is enabled but no secret is stored (corrupt/partially-written + # auth.json). Fail closed — returning True here bypassed the second + # factor entirely. + return False # Check backup codes first backup = user.get("totp_backup_codes", []) if code in backup: diff --git a/core/database.py b/core/database.py index 7fcc0f388..293a30386 100644 --- a/core/database.py +++ b/core/database.py @@ -1,7 +1,9 @@ import os import logging +import sqlite3 from datetime import datetime -from sqlalchemy import create_engine, Column, String, Text, Boolean, DateTime, Integer, ForeignKey, JSON, Index, func, text +from sqlalchemy import event, create_engine, Column, String, Text, Boolean, DateTime, Integer, ForeignKey, JSON, Index, func, text +from sqlalchemy.engine import Engine from sqlalchemy.types import TypeDecorator from sqlalchemy.ext.declarative import declarative_base, declared_attr from sqlalchemy.orm import relationship, sessionmaker, backref @@ -34,6 +36,18 @@ engine = create_engine( SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) +# Listening on the Engine class ensures this listener fires for all Engine +# instances created within the process, not just the primary application engine. +# The isinstance(sqlite3.Connection) check ensures that this PRAGMA foreign_keys=ON +# configuration remains a no-op when using non-SQLite database backends. +@event.listens_for(Engine, "connect") +def set_sqlite_pragma(dbapi_connection, connection_record): + if isinstance(dbapi_connection, sqlite3.Connection): + cursor = dbapi_connection.cursor() + cursor.execute("PRAGMA foreign_keys=ON") + cursor.close() + + class EncryptedText(TypeDecorator): """Text column transparently encrypted at rest via src.secret_storage. @@ -298,6 +312,7 @@ class EmailAccount(TimestampMixin, Base): # SMTP (sending) smtp_host = Column(String, default="") smtp_port = Column(Integer, default=465) + smtp_security = Column(String, default="ssl") # ssl | starttls | none smtp_user = Column(String, default="") smtp_password = Column(String, default="") @@ -1483,6 +1498,10 @@ def _migrate_seed_email_account(): logging.getLogger(__name__).warning(f"seed email account migration: {e}") +# WARNING: Foreign-key enforcement is enabled globally for all SQLite connections. +# Any future migrations or schema changes that temporarily violate foreign-key +# constraints will fail. To perform such operations, foreign_keys must be +# temporarily disabled around the migration workflow. def init_db(): """ Initialize the database by creating all tables. @@ -1517,6 +1536,7 @@ def init_db(): _migrate_drop_ping_notes_tasks() _migrate_add_crew_member_id() _migrate_add_assistant_columns() + _migrate_add_email_smtp_security() _migrate_seed_email_account() _migrate_add_calendar_metadata() _migrate_add_calendar_is_utc() @@ -1525,6 +1545,32 @@ def init_db(): _migrate_encrypt_endpoint_keys() +def _migrate_add_email_smtp_security(): + """Add explicit SMTP security mode for Proton Bridge/custom local SMTP.""" + import sqlite3 + db_path = DATABASE_URL.replace("sqlite:///", "") + if not os.path.exists(db_path): + return + try: + conn = sqlite3.connect(db_path) + cursor = conn.execute("PRAGMA table_info(email_accounts)") + columns = [row[1] for row in cursor.fetchall()] + if columns and "smtp_security" not in columns: + conn.execute("ALTER TABLE email_accounts ADD COLUMN smtp_security TEXT DEFAULT 'ssl'") + conn.execute( + "UPDATE email_accounts SET smtp_security = CASE " + "WHEN COALESCE(smtp_port, 465) = 587 THEN 'starttls' " + "WHEN COALESCE(smtp_port, 465) = 465 THEN 'ssl' " + "ELSE 'ssl' END " + "WHERE smtp_security IS NULL OR smtp_security = ''" + ) + conn.commit() + logging.getLogger(__name__).info("Migrated: added smtp_security column to email_accounts") + conn.close() + except Exception as e: + logging.getLogger(__name__).warning(f"smtp_security migration skipped: {e}") + + def _migrate_encrypt_endpoint_keys(): """Encrypt any plaintext provider API keys in model_endpoints. Idempotent; raw SQL so the EncryptedText decorator isn't applied twice.""" diff --git a/core/platform_compat.py b/core/platform_compat.py index 01ebe325e..f9712446f 100644 --- a/core/platform_compat.py +++ b/core/platform_compat.py @@ -14,6 +14,7 @@ Design rules: from __future__ import annotations import os +import ntpath import shutil import subprocess from pathlib import Path @@ -134,11 +135,40 @@ _BASH_CACHE: Optional[str] = None _BASH_PROBED = False # Common Git-for-Windows install locations to probe when bash isn't on PATH. -_WINDOWS_BASH_FALLBACKS = ( - r"C:\Program Files\Git\bin\bash.exe", - r"C:\Program Files\Git\usr\bin\bash.exe", - r"C:\Program Files (x86)\Git\bin\bash.exe", +_WINDOWS_BASH_ROOT_ENV_VARS = ( + "ProgramFiles", + "ProgramW6432", + "ProgramFiles(x86)", + "LocalAppData", ) +_WINDOWS_BASH_DEFAULT_ROOTS = ( + r"C:\Program Files\Git", + r"C:\Program Files (x86)\Git", +) +_WINDOWS_BASH_RELATIVE_PATHS = ( + ("bin", "bash.exe"), + ("usr", "bin", "bash.exe"), +) + + +def _windows_bash_fallbacks() -> List[str]: + roots: List[str] = [] + for env_name in _WINDOWS_BASH_ROOT_ENV_VARS: + base = os.environ.get(env_name) + if base: + roots.append(ntpath.join(base, "Git")) + roots.extend(_WINDOWS_BASH_DEFAULT_ROOTS) + + paths: List[str] = [] + seen = set() + for root in roots: + for rel in _WINDOWS_BASH_RELATIVE_PATHS: + path = ntpath.join(root, *rel) + key = path.lower() + if key not in seen: + seen.add(key) + paths.append(path) + return paths def find_bash() -> Optional[str]: @@ -153,9 +183,9 @@ def find_bash() -> Optional[str]: if _BASH_PROBED: return _BASH_CACHE _BASH_PROBED = True - found = shutil.which("bash") + found = which_tool("bash") if not found and IS_WINDOWS: - for cand in _WINDOWS_BASH_FALLBACKS: + for cand in _windows_bash_fallbacks(): if os.path.exists(cand): found = cand break diff --git a/core/session_manager.py b/core/session_manager.py index e9a274097..6a884f88f 100644 --- a/core/session_manager.py +++ b/core/session_manager.py @@ -29,6 +29,21 @@ def _message_timestamp_iso(value: Optional[datetime]) -> Optional[str]: return value.isoformat().replace("+00:00", "Z") +def _parse_msg_content(raw): + """Parse message content from DB — deserialises JSON arrays back to lists + (multimodal content with image/audio attachments).""" + if isinstance(raw, list): + return raw + if isinstance(raw, str) and raw.startswith('[{') and '"type"' in raw: + try: + parsed = json.loads(raw) + if isinstance(parsed, list) and all(isinstance(p, dict) for p in parsed): + return parsed + except (json.JSONDecodeError, ValueError): + pass + return raw + + class SessionManager: """ Manages chat sessions with database persistence. @@ -119,7 +134,7 @@ class SessionManager: meta.setdefault('timestamp', _message_timestamp_iso(db_msg.timestamp)) history.append(ChatMessage( role=db_msg.role, - content=db_msg.content, + content=_parse_msg_content(db_msg.content), metadata=meta, )) else: @@ -134,7 +149,7 @@ class SessionManager: meta.setdefault('timestamp', _message_timestamp_iso(db_msg.timestamp)) history.append(ChatMessage( role=db_msg.role, - content=db_msg.content, + content=_parse_msg_content(db_msg.content), metadata=meta, )) @@ -187,30 +202,43 @@ class SessionManager: """Persist a single message to the database.""" db = SessionLocal() try: + db_session = db.query(DbSession).filter(DbSession.id == session_id).first() + if db_session is None: + # A stream/tool callback can outlive a session delete. Do not + # create a chat_messages row with no parent session; also drop + # any stale cached session so later writes fail closed too. + self.sessions.pop(session_id, None) + logger.warning("Dropping message for deleted session %s", session_id) + return + msg_id = str(uuid.uuid4()) msg_time = datetime.utcnow() if message.metadata is None: message.metadata = {} message.metadata.setdefault('timestamp', _message_timestamp_iso(msg_time)) + # Multimodal content (image/audio attachments) is a list — serialize + # to JSON so the Text column can store it. On reload, _db_to_session + # detects the JSON-array prefix and parses it back. + _content = message.content + if isinstance(_content, list): + _content = json.dumps(_content) db_message = DbChatMessage( id=msg_id, session_id=session_id, role=message.role, - content=message.content, + content=_content, meta_data=json.dumps(message.metadata) if message.metadata else None, timestamp=msg_time, ) db.add(db_message) - db_session = db.query(DbSession).filter(DbSession.id == session_id).first() - if db_session: - db_session.message_count = len(self.sessions.get(session_id, {}).history) if session_id in self.sessions else 0 - _now = datetime.now(timezone.utc) - db_session.last_accessed = _now - # Clean "last conversation" timestamp — only bumped here on a - # real message persist, so it powers an accurate "Last active" - # sort that ignores renames / model swaps / mere opens. - db_session.last_message_at = _now + db_session.message_count = len(self.sessions.get(session_id, {}).history) if session_id in self.sessions else 0 + _now = datetime.now(timezone.utc) + db_session.last_accessed = _now + # Clean "last conversation" timestamp — only bumped here on a + # real message persist, so it powers an accurate "Last active" + # sort that ignores renames / model swaps / mere opens. + db_session.last_message_at = _now db.commit() @@ -276,7 +304,15 @@ class SessionManager: id=msg_id, session_id=session_id, role=message.role, - content=message.content, + # Multimodal content (image/audio attachments) is a list; + # serialize to JSON so the Text column round-trips via + # _parse_msg_content. Storing the raw list let SQLAlchemy + # bind its single-quoted repr, which _parse_msg_content + # cannot parse (it looks for double-quoted "type"), so the + # attachment was destroyed on reload. Mirrors _persist_message. + content=(json.dumps(message.content) + if isinstance(message.content, list) + else message.content), meta_data=json.dumps(message.metadata) if message.metadata else None, timestamp=now + timedelta(microseconds=i), ) @@ -466,11 +502,17 @@ class SessionManager: db_session = db.query(DbSession).filter(DbSession.id == session_id).first() if db_session: db.delete(db_session) + + # Drop the in-memory copy even when there is no DB row. A "ghost" + # session lives only here (never persisted, or its row was removed + # out-of-band); without this it can never be cleared and keeps + # 404ing on every operation (issue #1044). + removed_in_memory = self.sessions.pop(session_id, None) is not None + + if db_session or removed_in_memory: + # Commit the document-detach / message-delete above (a no-op when + # the ghost had no rows) together with the session delete. db.commit() - - if session_id in self.sessions: - del self.sessions[session_id] - logger.info(f"Deleted session {session_id}") return True return False diff --git a/docker-compose.yml b/docker-compose.yml index f91017b86..f3a8dcc49 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,28 +4,53 @@ services: ports: - "${APP_BIND:-127.0.0.1}:${APP_PORT:-7000}:7000" volumes: - - ./data:/app/data - - ./logs:/app/logs + - ./data:/app/data:z + - ./logs:/app/logs:z # Cookbook remote-server SSH identity. Odysseus can generate a key here; # add the shown public key to each remote server's authorized_keys. - - ./data/ssh:/app/.ssh + - ./data/ssh:/app/.ssh:z # Cookbook local model cache. Inside Docker, "Local" means the Odysseus # container, so persist its HuggingFace cache under ./data/huggingface. - - ./data/huggingface:/app/.cache/huggingface + - ./data/huggingface:/app/.cache/huggingface:z # Cookbook-installed Python CLIs/packages (vLLM, llama-cpp-python, etc.) # land under /app/.local for the odysseus user. Persist them so a # container recreate does not silently remove installed serve engines. - - ./data/local:/app/.local + - ./data/local:/app/.local:z extra_hosts: # Lets the container reach local services on the Docker host, including # Ollama at http://host.docker.internal:11434. - "host.docker.internal:host-gateway" - env_file: - - .env environment: + - LLM_HOST=${LLM_HOST:-localhost} + - LLM_HOSTS=${LLM_HOSTS:-} + - OPENAI_API_KEY=${OPENAI_API_KEY:-} + - OLLAMA_BASE_URL=${OLLAMA_BASE_URL:-} + - RESEARCH_LLM_ENDPOINT=${RESEARCH_LLM_ENDPOINT:-} + - HF_TOKEN=${HF_TOKEN:-} + - HUGGING_FACE_HUB_TOKEN=${HUGGING_FACE_HUB_TOKEN:-} - SEARXNG_INSTANCE=http://searxng:8080 - CHROMADB_HOST=chromadb - CHROMADB_PORT=8000 + - DATABASE_URL=${DATABASE_URL:-sqlite:///./data/app.db} + - AUTH_ENABLED=${AUTH_ENABLED:-true} + - LOCALHOST_BYPASS=${LOCALHOST_BYPASS:-false} + - ODYSSEUS_ADMIN_USER=${ODYSSEUS_ADMIN_USER:-admin} + - ODYSSEUS_ADMIN_PASSWORD=${ODYSSEUS_ADMIN_PASSWORD:-} + - ALLOWED_ORIGINS=${ALLOWED_ORIGINS:-http://localhost,http://127.0.0.1} + - SECURE_COOKIES=${SECURE_COOKIES:-false} + - EMBEDDING_URL=${EMBEDDING_URL:-} + - EMBEDDING_MODEL=${EMBEDDING_MODEL:-} + - FASTEMBED_MODEL=${FASTEMBED_MODEL:-sentence-transformers/all-MiniLM-L6-v2} + - FASTEMBED_CACHE_PATH=${FASTEMBED_CACHE_PATH:-} + - CLEANUP_INTERVAL_HOURS=${CLEANUP_INTERVAL_HOURS:-24} + - ODYSSEUS_INPROCESS_POLLERS=${ODYSSEUS_INPROCESS_POLLERS:-1} + - ODYSSEUS_INPROCESS_TASKS=${ODYSSEUS_INPROCESS_TASKS:-1} + - ODYSSEUS_SCRIPT_HOST=${ODYSSEUS_SCRIPT_HOST:-localhost} + - DATA_BRAVE_API_KEY=${DATA_BRAVE_API_KEY:-} + - GOOGLE_API_KEY=${GOOGLE_API_KEY:-} + - GOOGLE_PSE_CX=${GOOGLE_PSE_CX:-} + - TAVILY_API_KEY=${TAVILY_API_KEY:-} + - SERPER_API_KEY=${SERPER_API_KEY:-} # PUID / PGID — the user/group the container drops to before # running uvicorn (entrypoint also chowns /app/data + /app/logs # to match, so bind-mounted files stay editable from the host). @@ -54,7 +79,12 @@ services: restart: unless-stopped searxng: - image: docker.io/searxng/searxng:latest + # Pinned, not :latest — odysseus waits on searxng's healthcheck + # (depends_on: condition: service_healthy), so a broken upstream `latest` + # tag blocks the whole app from starting. 2026.6.2 crashes on boot with + # `KeyError: 'default_doi_resolver'`, failing the healthcheck (issue #1414). + # Bump this deliberately after verifying a newer tag boots clean. + image: docker.io/searxng/searxng:2026.5.31-7159b8aed entrypoint: - /bin/sh - -c @@ -72,10 +102,24 @@ services: - "127.0.0.1:8080:8080" volumes: - searxng-data:/etc/searxng - - ./config/searxng/settings.yml:/tmp/searxng-settings.yml.template:ro + - ./config/searxng/settings.yml:/tmp/searxng-settings.yml.template:ro,z environment: - SEARXNG_BASE_URL=http://localhost:8080/ - SEARXNG_SECRET=${SEARXNG_SECRET:-} + # The official searxng image runs as the non-root `searxng` user, but its + # entrypoint still needs to chown /etc/searxng on first boot, drop privs via + # su-exec, and (with our wrapper above) write settings.yml into the named + # volume. Without these capabilities the wrapper aborts at the redirection + # with EACCES and the container fails its healthcheck with permission + # errors during setup. Mirrors the cap set recommended by the upstream + # searxng-docker compose file. See issue #721. + cap_drop: + - ALL + cap_add: + - CHOWN + - SETGID + - SETUID + - DAC_OVERRIDE healthcheck: test: ["CMD-SHELL", "python -c \"import urllib.request; urllib.request.urlopen('http://localhost:8080/', timeout=5).read(1)\""] interval: 5s diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index a378ff234..668018ac1 100644 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -76,6 +76,15 @@ done # nvcc" even when the GPU itself is fully visible to the container. export VLLM_USE_FLASHINFER_SAMPLER="${VLLM_USE_FLASHINFER_SAMPLER:-0}" +# Make Cookbook-installed Python CLIs visible after `pip install --user`. +# vLLM and helper scripts land here because /app is the non-root user's HOME. +export PATH="/app/.local/bin:$PATH" + +# Run first-time setup as the app user so data/ files get the right ownership. +# setup.py is idempotent — skips auth.json / .env if they already exist. +# || true so a setup failure never prevents the container from starting. +gosu "$PUID:$PGID" python /app/setup.py || true + # Drop root and run the actual app. `gosu` is preferred over `su` / # `sudo` because it cleans up the process tree (no extra shell layer) # so signals (SIGTERM from `docker stop`) reach uvicorn directly. diff --git a/docker/gpu.amd.yml b/docker/gpu.amd.yml index 6d427c824..1bda9cfdd 100644 --- a/docker/gpu.amd.yml +++ b/docker/gpu.amd.yml @@ -1,5 +1,6 @@ # AMD ROCm GPU overlay. Enable by setting COMPOSE_FILE in .env: # COMPOSE_FILE=docker-compose.yml:docker/gpu.amd.yml +# RENDER_GID= # # Requires ROCm drivers on the host (kfd + DRI devices). The host user # running Docker must be in the `video` and `render` groups. diff --git a/docker/gpu.nvidia.yml b/docker/gpu.nvidia.yml index 32f7fb2dc..5590ba439 100644 --- a/docker/gpu.nvidia.yml +++ b/docker/gpu.nvidia.yml @@ -1,6 +1,11 @@ # NVIDIA GPU overlay. Enable by setting COMPOSE_FILE in .env: # COMPOSE_FILE=docker-compose.yml:docker/gpu.nvidia.yml # +# Use scripts/check-docker-gpu.sh to diagnose GPU passthrough, optionally +# install the NVIDIA Container Toolkit (Ubuntu/Debian), and write COMPOSE_FILE +# to .env. The script is read-only by default — it installs nothing and never +# edits .env unless explicitly asked. +# # Requires the NVIDIA Container Toolkit on the host. # Arch: sudo pacman -S nvidia-container-toolkit # Debian: sudo apt install nvidia-container-toolkit diff --git a/docs/a11y/focus-after.png b/docs/a11y/focus-after.png deleted file mode 100644 index 7c9938a20..000000000 Binary files a/docs/a11y/focus-after.png and /dev/null differ diff --git a/docs/a11y/focus-before.png b/docs/a11y/focus-before.png deleted file mode 100644 index d5cf76b8d..000000000 Binary files a/docs/a11y/focus-before.png and /dev/null differ diff --git a/docs/a11y/login-after.png b/docs/a11y/login-after.png deleted file mode 100644 index cc2571d6f..000000000 Binary files a/docs/a11y/login-after.png and /dev/null differ diff --git a/docs/a11y/login-before.png b/docs/a11y/login-before.png deleted file mode 100644 index bb76ea463..000000000 Binary files a/docs/a11y/login-before.png and /dev/null differ diff --git a/docs/gallery-314-desktop.png b/docs/gallery-314-desktop.png deleted file mode 100644 index ac3d80f11..000000000 Binary files a/docs/gallery-314-desktop.png and /dev/null differ diff --git a/docs/gallery-314-mobile.png b/docs/gallery-314-mobile.png deleted file mode 100644 index 3a3d71a71..000000000 Binary files a/docs/gallery-314-mobile.png and /dev/null differ diff --git a/docs/index.html b/docs/index.html index 8c6a21d89..540237840 100644 --- a/docs/index.html +++ b/docs/index.html @@ -25,7 +25,7 @@ --radius: 8px; } * { box-sizing: border-box; } - html { scroll-behavior: smooth; scroll-snap-type: y mandatory; scroll-padding-top: 60px; } + html { scroll-behavior: smooth; scroll-snap-type: y proximity; scroll-padding-top: 60px; } /* Each section is a full-viewport "page" with its content centered, so only one shows at a time and the snap is obvious. */ .hero, section { diff --git a/launch-windows.ps1 b/launch-windows.ps1 index 827bfdcb4..88ede8d66 100644 --- a/launch-windows.ps1 +++ b/launch-windows.ps1 @@ -30,23 +30,80 @@ function Fail($msg) { exit 1 } -# 1. Locate a Python interpreter (3.11+ recommended) +function Find-GitBash { + $cmd = Get-Command bash -ErrorAction SilentlyContinue + if ($cmd) { return $cmd.Source } + + $roots = @() + foreach ($name in @("ProgramFiles", "ProgramW6432", "ProgramFiles(x86)", "LocalAppData")) { + $base = [Environment]::GetEnvironmentVariable($name) + if ($base) { $roots += (Join-Path $base "Git") } + } + $roots += @("C:\Program Files\Git", "C:\Program Files (x86)\Git") + + foreach ($root in ($roots | Select-Object -Unique)) { + foreach ($relative in @("bin\bash.exe", "usr\bin\bash.exe")) { + $candidate = Join-Path $root $relative + if (Test-Path $candidate) { return $candidate } + } + } + return $null +} + +# 1. Locate a Python interpreter (3.11+ required) Write-Step "Checking for Python" +function Get-PythonVersionText($launcher, $launcherArgs) { + try { + return (& $launcher @launcherArgs -c "import sys; print('.'.join(map(str, sys.version_info[:3])))" 2>$null).Trim() + } catch { + return $null + } +} + $pyExe = $null -foreach ($c in @("python", "py")) { - $cmd = Get-Command $c -ErrorAction SilentlyContinue - if ($cmd) { $pyExe = $cmd.Source; break } +$pyArgs = @() +$pyVersion = $null + +$pyLauncher = Get-Command py -ErrorAction SilentlyContinue +if ($pyLauncher) { + foreach ($v in @("-3.13", "-3.12", "-3.11")) { + $ver = Get-PythonVersionText $pyLauncher.Source @($v) + if ($ver) { + $pyExe = $pyLauncher.Source + $pyArgs = @($v) + $pyVersion = $ver + break + } + } } + if (-not $pyExe) { - Fail "Python not found on PATH. Install Python 3.11+ from https://www.python.org/downloads/ (check 'Add to PATH'), then re-run this script." + $pythonCmd = Get-Command python -ErrorAction SilentlyContinue + if ($pythonCmd) { + $ver = Get-PythonVersionText $pythonCmd.Source @() + if ($ver) { + $versionParts = $ver.Split('.') + $major = [int]$versionParts[0] + $minor = [int]$versionParts[1] + if ($major -gt 3 -or ($major -eq 3 -and $minor -ge 11)) { + $pyExe = $pythonCmd.Source + $pyVersion = $ver + } + } + } } -Write-Host ("Using Python: " + $pyExe) + +if (-not $pyExe) { + Fail "Couldn't find Python 3.11+ for Windows setup. Install Python 3.11+ (or open the Python launcher with 'py -3.11') from https://www.python.org/downloads/, then re-run this script." +} +$pythonLabel = ("Using Python {0}: {1} {2}" -f $pyVersion, $pyExe, ($pyArgs -join ' ')).TrimEnd() +Write-Host $pythonLabel # 2. Create the virtualenv if missing $venvPy = Join-Path $PSScriptRoot "venv\Scripts\python.exe" if (-not (Test-Path $venvPy)) { Write-Step "Creating virtual environment (venv)" - & $pyExe -m venv venv + & $pyExe @pyArgs -m venv venv if ($LASTEXITCODE -ne 0 -or -not (Test-Path $venvPy)) { Fail "Failed to create the virtual environment." } } else { Write-Host "venv already exists - skipping creation." @@ -64,7 +121,7 @@ Write-Step "Running first-time setup" if ($LASTEXITCODE -ne 0) { Fail "setup.py failed." } # 5. Friendly note about Git Bash (full Cookbook / agent-shell parity) -if (-not (Get-Command bash -ErrorAction SilentlyContinue)) { +if (-not (Find-GitBash)) { Write-Host "" Write-Host "NOTE: Git Bash (bash.exe) was not found on PATH." -ForegroundColor Yellow Write-Host " The core app works without it. For full Cookbook background" -ForegroundColor Yellow diff --git a/mcp_servers/_common.py b/mcp_servers/_common.py index 641c8522d..341bfe64e 100644 --- a/mcp_servers/_common.py +++ b/mcp_servers/_common.py @@ -13,6 +13,10 @@ SEARCH_TIMEOUT = 30 def truncate(text: str, limit: int = MAX_OUTPUT_CHARS) -> str: """Truncate text to *limit* characters with a suffix note.""" + if not isinstance(text, str): + # Tool output is occasionally None or a non-string; len(None) would + # raise. Coerce so this shared helper never crashes a tool response. + text = "" if text is None else str(text) if len(text) > limit: return text[:limit] + f"\n... (truncated, {len(text)} chars total)" return text diff --git a/mcp_servers/email_server.py b/mcp_servers/email_server.py index bde4307fe..8438577f7 100644 --- a/mcp_servers/email_server.py +++ b/mcp_servers/email_server.py @@ -70,10 +70,12 @@ def _list_accounts_raw() -> list: try: conn = sqlite3.connect(str(path)) conn.row_factory = sqlite3.Row - rows = conn.execute(""" + columns = {r[1] for r in conn.execute("PRAGMA table_info(email_accounts)").fetchall()} + smtp_security_select = "smtp_security" if "smtp_security" in columns else "'' AS smtp_security" + rows = conn.execute(f""" SELECT id, name, is_default, enabled, imap_host, imap_port, imap_user, imap_password, imap_starttls, - smtp_host, smtp_port, smtp_user, smtp_password, from_address + smtp_host, smtp_port, {smtp_security_select}, smtp_user, smtp_password, from_address FROM email_accounts WHERE enabled = 1 ORDER BY is_default DESC, created_at ASC """).fetchall() @@ -145,6 +147,7 @@ def _load_config(account: str | None = None) -> dict: "imap_starttls": os.environ.get("IMAP_STARTTLS", "true").lower() == "true", "smtp_host": os.environ.get("SMTP_HOST", ""), "smtp_port": int(os.environ.get("SMTP_PORT", "465")), + "smtp_security": os.environ.get("SMTP_SECURITY", ""), "smtp_user": os.environ.get("SMTP_USER", ""), "smtp_password": os.environ.get("SMTP_PASSWORD", ""), "smtp_starttls": os.environ.get("SMTP_STARTTLS", "false").lower() == "true", @@ -189,6 +192,7 @@ def _load_config(account: str | None = None) -> dict: cfg["imap_ssl"] = int(cfg["imap_port"]) == 993 and not cfg["imap_starttls"] cfg["smtp_host"] = row["smtp_host"] or cfg["smtp_host"] cfg["smtp_port"] = int(row["smtp_port"] or cfg["smtp_port"]) + cfg["smtp_security"] = row["smtp_security"] or cfg["smtp_security"] or ("starttls" if int(cfg["smtp_port"]) == 587 else "ssl") cfg["smtp_user"] = row["smtp_user"] or cfg["smtp_user"] cfg["smtp_password"] = _decrypt(row["smtp_password"]) if row["smtp_password"] else cfg["smtp_password"] cfg["from_address"] = row["from_address"] or row["imap_user"] or cfg["from_address"] @@ -333,14 +337,25 @@ def _decode_header(raw): """Decode MIME encoded header.""" if not raw: return "" - parts = email.header.decode_header(raw) - decoded = [] - for data, charset in parts: - if isinstance(data, bytes): - decoded.append(data.decode(charset or "utf-8", errors="replace")) - else: - decoded.append(data) - return " ".join(decoded) + try: + # make_header concatenates per RFC 2047: no spurious space between an + # encoded-word and adjacent plain text (plain runs keep their own + # whitespace), and whitespace between two adjacent encoded-words is + # dropped. The old " ".join produced "Re: Jose" style double spaces + # on every non-ASCII subject or sender. + return str(email.header.make_header(email.header.decode_header(raw))) + except Exception: + # Malformed header or unknown charset: lossy per-part decode + decoded = [] + for data, charset in email.header.decode_header(raw): + if isinstance(data, bytes): + try: + decoded.append(data.decode(charset or "utf-8", errors="replace")) + except LookupError: + decoded.append(data.decode("utf-8", errors="replace")) + else: + decoded.append(data) + return "".join(decoded) def _extract_text(msg): @@ -413,6 +428,11 @@ def _list_emails(folder="INBOX", max_results=20, unresponded_only=False, status, data = conn.uid("SEARCH", None, "(UNSEEN UNANSWERED)") elif unread_only: status, data = conn.uid("SEARCH", None, "(UNSEEN)") + elif unresponded_only: + # Was missing — unresponded_only=True (without unread_only) fell through + # to "ALL" and returned answered mail too, despite the documented + # "emails without replies" behaviour. + status, data = conn.uid("SEARCH", None, "(UNANSWERED)") else: # Include read too — IMAP search "ALL" returns the entire folder status, data = conn.uid("SEARCH", None, "ALL") @@ -739,17 +759,17 @@ def _smtp_connect(account=None, cfg=None): if not _smtp_ready(cfg): raise ValueError(f"Email account {cfg.get('account_name') or account or 'default'} has no SMTP configured") port = int(cfg.get("smtp_port") or 465) - # Account rows only store host/port, not the legacy env-level smtp_ssl - # toggle. Infer the conventional TLS mode from the port so MCP tools match - # the web send path: 465 = implicit SSL, 587 = STARTTLS. - if port == 587: + security = str(cfg.get("smtp_security") or "").strip().lower() + if security not in {"ssl", "starttls", "none"}: + security = "starttls" if port == 587 else "ssl" + if security == "starttls": conn = smtplib.SMTP( cfg["smtp_host"], port, timeout=EMAIL_SOCKET_TIMEOUT, ) conn.starttls() - elif cfg.get("smtp_ssl", True): + elif security == "ssl": conn = smtplib.SMTP_SSL( cfg["smtp_host"], port, @@ -761,8 +781,6 @@ def _smtp_connect(account=None, cfg=None): port, timeout=EMAIL_SOCKET_TIMEOUT, ) - if cfg["smtp_starttls"]: - conn.starttls() if cfg["smtp_user"] and cfg["smtp_password"]: conn.login(cfg["smtp_user"], cfg["smtp_password"]) return conn diff --git a/mcp_servers/memory_server.py b/mcp_servers/memory_server.py index c2812e1c0..1f226ad1d 100644 --- a/mcp_servers/memory_server.py +++ b/mcp_servers/memory_server.py @@ -161,10 +161,9 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]: deleted_text = m.get("text", "") deleted_category = m.get("category", "") break - original_len = len(memories) - memories = [m for m in memories if not m.get("id", "").startswith(memory_id)] - if len(memories) == original_len: + if not full_id: return [TextContent(type="text", text=f"Error: Memory '{memory_id}' not found")] + memories = [m for m in memories if m.get("id") != full_id] _memory_manager.save(memories) if _memory_vector and _memory_vector.healthy and full_id: try: diff --git a/mcp_servers/rag_server.py b/mcp_servers/rag_server.py index 2d50b4b4f..71aa1b60b 100644 --- a/mcp_servers/rag_server.py +++ b/mcp_servers/rag_server.py @@ -101,10 +101,13 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]: return [TextContent(type="text", text=f"Error: {e}")] elif action == "add_directory": - directory = arguments.get("directory", "").strip() + _dir = arguments.get("directory") + directory = _dir.strip() if isinstance(_dir, str) else "" if not directory: return [TextContent(type="text", text="Error: add_directory needs a directory path")] - directory = os.path.expanduser(directory) + # Store an absolute path so indexed `source` metadata is absolute and + # remove_directory (which abspath-normalizes) can match it later (#1660). + directory = os.path.abspath(os.path.expanduser(directory)) if not os.path.isdir(directory): return [TextContent(type="text", text=f"Error: Directory not found: {directory}")] if not _rag_manager: @@ -112,14 +115,27 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]: try: result = _rag_manager.index_personal_documents(directory) indexed = result.get("indexed_count", 0) if isinstance(result, dict) else 0 + # Record the directory so `list` and `remove_directory` can see it. + # Indexing was just done above, so pass index=False to avoid a second + # (ownerless) pass. Without this the directory was indexed but never + # tracked in indexed_directories, so it was invisible/unremovable. + if _personal_docs_manager and hasattr(_personal_docs_manager, "add_directory"): + try: + _personal_docs_manager.add_directory(directory, index=False) + except Exception: + pass return [TextContent(type="text", text=f"Directory '{directory}' added to RAG index ({indexed} chunks indexed)")] except Exception as e: return [TextContent(type="text", text=f"Error: Failed to index directory: {e}")] elif action == "remove_directory": - directory = arguments.get("directory", "").strip() + _dir = arguments.get("directory") + directory = _dir.strip() if isinstance(_dir, str) else "" if not directory: return [TextContent(type="text", text="Error: remove_directory needs a directory path")] + # Expand ~ to match add_directory, which indexes the expanded path. + # Without this, removing "~/docs" never matches the stored absolute path. + directory = os.path.expanduser(directory) if not _personal_docs_manager: return [TextContent(type="text", text="Error: Personal docs manager not available")] try: diff --git a/odysseus-ui.service b/odysseus-ui.service index fea436398..835c8cc5a 100644 --- a/odysseus-ui.service +++ b/odysseus-ui.service @@ -9,7 +9,7 @@ Type=simple # CHANGE THESE to match your user and install path: User=YOURUSER WorkingDirectory=/home/YOURUSER/odysseus-ui -ExecStart=/home/YOURUSER/odysseus-ui/venv/bin/uvicorn app:app --port 8000 --host 0.0.0.0 +ExecStart=/home/YOURUSER/odysseus-ui/venv/bin/uvicorn app:app --port 7000 --host 0.0.0.0 Restart=always RestartSec=3 EnvironmentFile=-/home/YOURUSER/odysseus-ui/.env diff --git a/requirements-optional.txt b/requirements-optional.txt index 72d9f7e69..eeb57c151 100644 --- a/requirements-optional.txt +++ b/requirements-optional.txt @@ -4,6 +4,14 @@ # Note: chromadb-client + fastembed moved to requirements.txt — RAG, semantic # memory, and tool selection are core paths, so they ship by default now. +# Local speech-to-text (microphone -> text) via faster-whisper, for the +# "local" STT provider. Runs on CPU out of the box (CTranslate2 backend, no +# torch needed). Install if you want to dictate/transcribe with the mic +# without sending audio to an external endpoint. +# Optional extra: install `torch` too if you have a CUDA GPU and want +# GPU-accelerated transcription — it's auto-detected, CPU is used otherwise. +faster-whisper + # DuckDuckGo as a search provider option. # Install if you want DDG in the search-provider dropdown. # Alternatives: SearXNG, Brave, Tavily, Serper, Google PSE. @@ -15,3 +23,14 @@ duckduckgo-search # network-served app — see ACKNOWLEDGMENTS.md. The MIT core (PDF *text* # extraction via pypdf) works without it; this only unlocks form-filling. PyMuPDF + +# Office / EPUB document text extraction (chat attachments + the personal-docs +# RAG index). markitdown (MIT, Microsoft) converts .docx/.xlsx/.pptx/.xls/.epub +# to Markdown — more token-efficient and model-legible than a raw dump. Optional +# and lazy-imported via src/markitdown_runtime.py; without it those formats fall +# back to a friendly "install to extract" banner and the core stays pure-MIT. +# Extras pull mammoth/lxml/python-pptx/pandas/openpyxl/xlrd; the base also pulls +# magika (onnxruntime), already a core dep via fastembed. We avoid the +# [all]/Azure/audio extras (cloud + heavy). Pinned to a release >30 days old per +# the dependency-age discussion in issue #485. +markitdown[docx,pptx,xlsx,xls]==0.1.5 diff --git a/routes/admin_wipe_routes.py b/routes/admin_wipe_routes.py index 668b02d92..01511c373 100644 --- a/routes/admin_wipe_routes.py +++ b/routes/admin_wipe_routes.py @@ -27,6 +27,7 @@ from core.database import ( Document, DocumentVersion, GalleryImage, + GalleryAlbum, CalendarEvent, CalendarCal, ) @@ -145,8 +146,9 @@ def setup_admin_wipe_routes(session_manager): return {"status": "deleted", "kind": kind, "count": count} if kind == "gallery": - count = db.query(GalleryImage).count() + count = db.query(GalleryImage).count() + db.query(GalleryAlbum).count() db.query(GalleryImage).delete() + db.query(GalleryAlbum).delete() db.commit() # Also drop the upload dir so disk doesn't keep orphans. _rmtree_quiet(os.path.join(DATA_DIR, "gallery")) diff --git a/routes/auth_routes.py b/routes/auth_routes.py index a81731930..5728d3ee3 100644 --- a/routes/auth_routes.py +++ b/routes/auth_routes.py @@ -67,6 +67,8 @@ class DeleteUserRequest(BaseModel): class RenameUserRequest(BaseModel): username: str +class SetOpenRegistrationRequest(BaseModel): + enabled: bool SESSION_COOKIE = "odysseus_session" @@ -295,6 +297,7 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter: # owner-scoped DB rows before changing auth so the account keeps # access to its sessions, docs, email accounts, tasks, etc. try: + from sqlalchemy import func from core.database import Base, SessionLocal db = SessionLocal() try: @@ -304,7 +307,7 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter: continue ( db.query(model) - .filter(model.owner == old_username) + .filter(func.lower(model.owner) == old_username) .update({"owner": new_username}, synchronize_session=False) ) db.commit() @@ -322,9 +325,15 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter: from routes.prefs_routes import _load as _load_prefs, _save as _save_prefs prefs = _load_prefs() users = prefs.get("_users") if isinstance(prefs, dict) else None - if isinstance(users, dict) and old_username in users and new_username not in users: - users[new_username] = users.pop(old_username) - _save_prefs(prefs) + if isinstance(users, dict): + prefs_key = next( + (k for k in users if str(k).strip().lower() == old_username), + None, + ) + new_taken = any(str(k).strip().lower() == new_username for k in users) + if prefs_key is not None and not new_taken: + users[new_username] = users.pop(prefs_key) + _save_prefs(prefs) except Exception as e: logger.warning("Failed to rename user prefs %s -> %s: %s", old_username, new_username, e) @@ -333,15 +342,31 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter: raise HTTPException(400, "Cannot rename user") return {"ok": True, "username": new_username, "renamed_self": old_username == user} - @router.post("/signup-toggle") + @router.post("/signup-toggle", deprecated=True) async def toggle_signup(request: Request): - """Toggle open registration on/off. Admin only.""" + """ + Toggle open registration on/off. Admin only. + + DEPRECATED: This endpoint uses toggle semantics which can lead to unsafe state changes. + Use PUT /open-signup instead. + + This endpoint is kept for backward compatibility and may be removed in future versions. + """ user = _get_current_user(request) if not user or not auth_manager.is_admin(user): raise HTTPException(403, "Admin only") auth_manager.signup_enabled = not auth_manager.signup_enabled return {"ok": True, "signup_enabled": auth_manager.signup_enabled} + @router.put("/open-signup") + async def set_signup_enabled(body: SetOpenRegistrationRequest, request: Request): + """Set open signup enabled state. Admin only.""" + user = _get_current_user(request) + if not user or not auth_manager.is_admin(user): + raise HTTPException(403, "Admin only") + auth_manager.signup_enabled = body.enabled + return {"ok": True,"signup_enabled": auth_manager.signup_enabled} + @router.delete("/users") async def admin_delete_user(body: DeleteUserRequest, request: Request): user = _get_current_user(request) diff --git a/routes/backup_routes.py b/routes/backup_routes.py index b165fcce7..2b92a1529 100644 --- a/routes/backup_routes.py +++ b/routes/backup_routes.py @@ -77,7 +77,12 @@ def setup_backup_routes(memory_manager, preset_manager, skills_manager) -> APIRo # ── Memories ── if "memories" in body and isinstance(body["memories"], list): existing = memory_manager.load_all() - existing_texts = {e.get("text", "").strip().lower() for e in existing} + # Dedup against THIS user's own memories only. Using every tenant's + # rows (load_all) meant a memory whose text matched any other + # user's was silently skipped, so the importing user lost their own + # data. The full store is still saved back below. + existing_texts = {e.get("text", "").strip().lower() + for e in existing if e.get("owner") == user} added = 0 for mem in body["memories"]: if not isinstance(mem, dict) or not mem.get("text"): diff --git a/routes/calendar_routes.py b/routes/calendar_routes.py index 3c767f233..1352e408b 100644 --- a/routes/calendar_routes.py +++ b/routes/calendar_routes.py @@ -12,10 +12,27 @@ from dateutil.rrule import rrulestr, rruleset from dateutil.rrule import DAILY, WEEKLY, MONTHLY, YEARLY from core.database import SessionLocal, CalendarCal, CalendarEvent -from src.auth_helpers import get_current_user +from src.auth_helpers import get_current_user, require_user logger = logging.getLogger(__name__) + +def _ics_naive_dtstart(dt): + """Naive value matching how import_ics STORES CalendarEvent.dtstart. + + Timed tz-aware events are stored as UTC with tzinfo stripped, all-day + dates as midnight datetimes, naive datetimes unchanged. The ICS dedup + must compute the same value or a re-import never matches the stored row. + """ + if isinstance(dt, datetime): + if dt.tzinfo is not None: + from datetime import timezone as _tz + return dt.astimezone(_tz.utc).replace(tzinfo=None) + return dt + if isinstance(dt, date): + return datetime(dt.year, dt.month, dt.day) + return dt + # Single-user fallback identity. Used only when: # 1. The app is configured for single-user (no auth middleware), AND # 2. The request didn't resolve to an authenticated user. @@ -28,16 +45,17 @@ _SINGLE_USER_MODE = _os.environ.get("ODYSSEUS_SINGLE_USER", "1") != "0" def _require_user(request: Request) -> str: - """Return the authenticated user. In multi-user mode an unauthenticated - request raises 401; in single-user mode it falls through to - FALLBACK_OWNER. Prevents the silent cross-user data write that would - happen if a request slipped past auth middleware in a real deployment.""" - u = get_current_user(request) - if u: - return u - if _SINGLE_USER_MODE: - return FALLBACK_OWNER - raise HTTPException(401, "Authentication required") + """Return the authenticated user. Uses require_user so AUTH_ENABLED=false + and single-user mode both work: require_user returns "" when auth is + disabled or unconfigured, and only raises 401 when auth is configured but + the caller is unauthenticated. Falls back to FALLBACK_OWNER for calendar + writes so data isn't stored under an empty owner in single-user mode.""" + user = require_user(request) + if user: + return user + # require_user returned "" — auth is off or unconfigured (single-user). + # Use FALLBACK_OWNER so calendar rows have a stable owner for filtering. + return FALLBACK_OWNER def _get_or_404_calendar(db, cal_id: str, owner: str) -> CalendarCal: @@ -64,6 +82,24 @@ def _get_or_404_event(db, uid: str, owner: str) -> CalendarEvent: return ev +def _ics_escape(text: str) -> str: + """Escape a value for an iCalendar TEXT field (RFC 5545 §3.3.11). + + Backslash, semicolon and comma are structural in TEXT values and must be + escaped, and newlines become a literal ``\\n``. Backslash is escaped first + so the escapes we add aren't re-escaped. + """ + return ( + (text or "") + .replace("\\", "\\\\") + .replace(";", "\\;") + .replace(",", "\\,") + .replace("\r\n", "\\n") + .replace("\n", "\\n") + .replace("\r", "\\n") + ) + + def _resolve_base_uid(uid: str) -> str: """Extract the base series UID from a compound occurrence UID. @@ -319,8 +355,8 @@ def _parse_dt(s: str) -> datetime: return None return h, mn - # today/tomorrow/yesterday [at] TIME - m = _re.match(r'^(today|tomorrow|tmrw|yesterday)(?:\s+at)?\s*(.*)$', lower) + # today/tonight/tomorrow/yesterday [at] TIME + m = _re.match(r'^(today|tonight|tomorrow|tmrw|yesterday)(?:\s+at)?\s*(.*)$', lower) if m: word, rest = m.group(1), m.group(2).strip() base = today @@ -434,8 +470,21 @@ def _expand_rrule( return [d] # Parse the rrule, applying it to the base dtstart. + rrule_str = ev.rrule + if ev.dtstart is not None and getattr(ev.dtstart, "tzinfo", None) is None: + # Events are stored with a naive (UTC) dtstart, but standard .ics + # exporters (Google/Apple/Outlook/Fastmail) write the bound as an + # absolute UTC value, e.g. UNTIL=20240105T090000Z. dateutil refuses to + # mix a tz-aware UNTIL with a naive DTSTART ("RRULE UNTIL values must be + # specified in UTC when DTSTART is timezone-aware"), so the except branch + # below would silently collapse the whole series to a single event. + # Drop the trailing Z so UNTIL matches the naive DTSTART. + import re as _re + rrule_str = _re.sub( + r"(UNTIL=\d{8}(?:T\d{6})?)Z", r"\1", rrule_str, flags=_re.IGNORECASE + ) try: - rule = rrulestr(ev.rrule, dtstart=ev.dtstart) + rule = rrulestr(rrule_str, dtstart=ev.dtstart) except Exception as ex: logger.warning( "Failed to parse rrule=%r for event %s: %s", ev.rrule, ev.uid, ex @@ -509,13 +558,20 @@ def setup_calendar_routes() -> APIRouter: owner = _require_user(request) from routes.prefs_routes import _load_for_user cfg = (_load_for_user(owner) or {}).get("caldav", {}) or {} + caldav_password = cfg.get("password") or "" + if caldav_password: + try: + from src.secret_storage import decrypt + caldav_password = decrypt(caldav_password) + except Exception: + pass # Surface url+username but never hand the password back to the # client — saved-state UI shouldn't leak the credential. return { "url": cfg.get("url", "") or "", "username": cfg.get("username", "") or "", "password": "", - "has_password": bool(cfg.get("password")), + "has_password": bool(caldav_password), "local": not bool(cfg.get("url")), } @@ -534,12 +590,20 @@ def setup_calendar_routes() -> APIRouter: prefs.pop("caldav", None) _save_for_user(owner, prefs) return {"ok": True, "cleared": True} - cfg["url"] = body.get("url", "").strip() + from src.caldav_sync import validate_caldav_url + try: + cfg["url"] = validate_caldav_url(body.get("url", "")) + except ValueError as e: + raise HTTPException(400, str(e)) cfg["username"] = (body.get("username") or "").strip() # Preserve the stored password when the client sends an empty # one (edit form re-submitted without re-typing the password). if body.get("password"): - cfg["password"] = body["password"] + from src.secret_storage import encrypt + cfg["password"] = encrypt(body["password"]) + elif cfg.get("password"): + from src.secret_storage import encrypt + cfg["password"] = encrypt(cfg["password"]) prefs["caldav"] = cfg _save_for_user(owner, prefs) return {"ok": True} @@ -566,9 +630,21 @@ def setup_calendar_routes() -> APIRouter: cfg = (_load_for_user(owner) or {}).get("caldav", {}) or {} url = url or (cfg.get("url") or "") user = user or (cfg.get("username") or "") - pw = pw or (cfg.get("password") or "") + if not pw: + pw = cfg.get("password") or "" + if pw: + try: + from src.secret_storage import decrypt + pw = decrypt(pw) + except Exception: + pass if not (url and user and pw): return {"ok": False, "error": "Missing URL, username, or password"} + from src.caldav_sync import validate_caldav_url + try: + url = validate_caldav_url(url) + except ValueError as e: + return {"ok": False, "error": str(e)} import httpx propfind_body = ( '\n' @@ -576,13 +652,25 @@ def setup_calendar_routes() -> APIRouter: '' ) try: - async with httpx.AsyncClient(timeout=8.0, follow_redirects=True) as cx: + async with httpx.AsyncClient(timeout=8.0, follow_redirects=False, trust_env=False) as cx: r = await cx.request( "PROPFIND", url, auth=(user, pw), headers={"Depth": "0", "Content-Type": "application/xml"}, content=propfind_body, ) + # If the server demands Digest (Baïkal default, SabreDAV-based + # servers, Radicale with htdigest), the Basic attempt above + # 401s. Retry once with httpx.DigestAuth so this test matches + # what the real sync does via caldav.DAVClient in + # src/caldav_sync.py (which negotiates the scheme). + if r.status_code == 401 and "digest" in r.headers.get("www-authenticate", "").lower(): + r = await cx.request( + "PROPFIND", url, + auth=httpx.DigestAuth(user, pw), + headers={"Depth": "0", "Content-Type": "application/xml"}, + content=propfind_body, + ) # 207 = Multi-Status — standard CalDAV success. 200 also # acceptable. Anything else (401/403/404/5xx) means trouble. if r.status_code in (200, 207): @@ -593,6 +681,8 @@ def setup_calendar_routes() -> APIRouter: return {"ok": False, "error": "Forbidden — user can't access that URL"} if r.status_code == 404: return {"ok": False, "error": "Not found — check the URL path"} + if 300 <= r.status_code < 400: + return {"ok": False, "error": "Redirects are not followed for CalDAV safety; use the final URL"} return {"ok": False, "error": f"HTTP {r.status_code}"} except httpx.ConnectError as e: return {"ok": False, "error": f"Connection refused: {e}"[:200]} @@ -739,6 +829,16 @@ def setup_calendar_routes() -> APIRouter: ) db.add(ev) db.commit() + if cal.source == "caldav": + # Push the new event to the remote so it appears on the user's + # other devices — the sync is otherwise pull-only (#800). + from src.caldav_writeback import writeback_event + await writeback_event(owner, cal.source, cal.id, { + "uid": uid, "summary": data.summary, "description": data.description, + "location": data.location, "dtstart": dtstart, "dtend": dtend, + "all_day": data.all_day, "is_utc": _is_utc and not data.all_day, + "rrule": data.rrule or "", + }) return {"ok": True, "uid": uid} except HTTPException: raise @@ -785,6 +885,14 @@ def setup_calendar_routes() -> APIRouter: if data.color is not None: ev.color = data.color if data.color else None db.commit() + cal = db.query(CalendarCal).filter(CalendarCal.id == ev.calendar_id).first() + if cal and cal.source == "caldav": + from src.caldav_writeback import writeback_event + await writeback_event(owner, cal.source, cal.id, { + "uid": ev.uid, "summary": ev.summary, "description": ev.description, + "location": ev.location, "dtstart": ev.dtstart, "dtend": ev.dtend, + "all_day": ev.all_day, "is_utc": ev.is_utc, "rrule": ev.rrule or "", + }) return {"ok": True} except HTTPException: raise @@ -805,8 +913,15 @@ def setup_calendar_routes() -> APIRouter: db = SessionLocal() try: ev = _get_or_404_event(db, base_uid, owner) + # Capture what the remote push needs BEFORE the row is gone. + _cal = db.query(CalendarCal).filter(CalendarCal.id == ev.calendar_id).first() + _is_caldav = bool(_cal and _cal.source == "caldav") + _cal_id, _ev_uid = ev.calendar_id, ev.uid db.delete(ev) db.commit() + if _is_caldav: + from src.caldav_writeback import writeback_event + await writeback_event(owner, "caldav", _cal_id, {"uid": _ev_uid}, delete=True) return {"ok": True} except HTTPException: raise @@ -938,7 +1053,12 @@ def setup_calendar_routes() -> APIRouter: source_uid = str(comp.get("uid", "")) or None if source_uid: src_dtstart = dtstart.dt - naive_src = src_dtstart.replace(tzinfo=None) if hasattr(src_dtstart, 'tzinfo') and src_dtstart.tzinfo else src_dtstart + # Normalize to the SAME naive form import_ics stores, so a + # re-import of a tz-aware event matches the existing row. + # The old code stripped tzinfo WITHOUT converting to UTC + # (wall clock), while storage converts to UTC first, so + # every re-import of a TZID event created a duplicate. + naive_src = _ics_naive_dtstart(src_dtstart) existing = ( db.query(CalendarEvent) .filter( @@ -1032,23 +1152,23 @@ def setup_calendar_routes() -> APIRouter: "BEGIN:VCALENDAR", "VERSION:2.0", "PRODID:-//Odysseus//Calendar//EN", - f"X-WR-CALNAME:{cal.name}", + f"X-WR-CALNAME:{_ics_escape(cal.name)}", ] for ev in events: lines.append("BEGIN:VEVENT") lines.append(f"UID:{ev.uid}") - lines.append(f"SUMMARY:{ev.summary or ''}") + lines.append(f"SUMMARY:{_ics_escape(ev.summary or '')}") if ev.all_day: lines.append(f"DTSTART;VALUE=DATE:{ev.dtstart.strftime('%Y%m%d')}") lines.append(f"DTEND;VALUE=DATE:{ev.dtend.strftime('%Y%m%d')}") else: - lines.append(f"DTSTART:{ev.dtstart.strftime('%Y%m%dT%H%M%S')}") - lines.append(f"DTEND:{ev.dtend.strftime('%Y%m%dT%H%M%S')}") + _dt_suffix = "Z" if getattr(ev, "is_utc", False) else "" + lines.append(f"DTSTART:{ev.dtstart.strftime('%Y%m%dT%H%M%S')}{_dt_suffix}") + lines.append(f"DTEND:{ev.dtend.strftime('%Y%m%dT%H%M%S')}{_dt_suffix}") if ev.description: - desc = ev.description.replace(chr(10), '\\n') - lines.append(f"DESCRIPTION:{desc}") + lines.append(f"DESCRIPTION:{_ics_escape(ev.description)}") if ev.location: - lines.append(f"LOCATION:{ev.location}") + lines.append(f"LOCATION:{_ics_escape(ev.location)}") if ev.rrule: lines.append(f"RRULE:{ev.rrule}") lines.append("END:VEVENT") diff --git a/routes/chat_helpers.py b/routes/chat_helpers.py index 7e7a76432..cc2003677 100644 --- a/routes/chat_helpers.py +++ b/routes/chat_helpers.py @@ -3,6 +3,7 @@ import asyncio import json import logging +import os import re from dataclasses import dataclass, field from typing import Any, Optional @@ -11,6 +12,7 @@ from core.models import ChatMessage from core.database import SessionLocal from core.database import Session as DBSession, ModelEndpoint from src.llm_core import normalize_model_id +from src.endpoint_resolver import normalize_base from src.context_compactor import maybe_compact, trim_for_context from src.auth_helpers import get_current_user from src.prompt_security import untrusted_context_message @@ -119,7 +121,7 @@ def needs_auto_name(name: str) -> bool: if name.startswith("Chat:") or name == "Chat": return True # Default frontend name: "modelname HH:MM:SS AM/PM" - if re.match(r'^.+ \d{1,2}:\d{2}:\d{2}\s*(AM|PM)$', name): + if re.match(r"^.+ \d{1,2}:\d{2}:\d{2}(\s*(AM|PM))?$", name, re.IGNORECASE): return True return False @@ -146,9 +148,13 @@ async def auto_name_session(session_manager, sess): if not first_msg: return + owner = getattr(sess, "owner", None) t_url, t_model, t_headers = resolve_task_endpoint( - sess.endpoint_url, sess.model, sess.headers, + sess.endpoint_url, sess.model, sess.headers, owner=owner, ) + if not t_model: + logger.debug("[auto-name] No model provided, skipping") + return # max_tokens big enough that reasoning models (Minimax M2, # DeepSeek R1, QwQ, etc.) have headroom for @@ -306,7 +312,24 @@ def fire_message_event(request, webhook_manager, session_id: str, sess, message: fire_event("message_sent", user) -def resolve_session_auth(sess, session_id: str): +def _session_url_matches_endpoint(session_url: str, endpoint_base: str) -> bool: + if not session_url or not endpoint_base: + return False + try: + from src.endpoint_resolver import build_chat_url, normalize_base + + sess_url = session_url.rstrip("/") + base = normalize_base(endpoint_base).rstrip("/") + return sess_url in { + base, + base + "/chat/completions", + build_chat_url(base).rstrip("/"), + } + except Exception: + return False + + +def resolve_session_auth(sess, session_id: str, owner: Optional[str] = None): """Ensure session has auth headers — resolve from endpoint DB if missing.""" has_auth = sess.headers and isinstance(sess.headers, dict) and any( k.lower() in ('authorization', 'x-api-key') for k in sess.headers @@ -315,25 +338,96 @@ def resolve_session_auth(sess, session_id: str): return try: - from src.endpoint_resolver import build_headers + from src.endpoint_resolver import build_headers, normalize_base db = SessionLocal() try: - domain = sess.endpoint_url.split("//")[1].split("/")[0] if "//" in sess.endpoint_url else "" - if domain: - ep = db.query(ModelEndpoint).filter(ModelEndpoint.base_url.contains(domain)).first() - if ep and ep.api_key: - sess.headers = build_headers(ep.api_key, ep.base_url) - db.query(DBSession).filter(DBSession.id == session_id).update( - {"headers": json.dumps(sess.headers)} - ) - db.commit() - logger.info(f"Resolved and persisted auth headers for session {session_id} from endpoint {ep.name}") + target_url = getattr(sess, "endpoint_url", "") or "" + if not target_url: + return + q = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True) + if owner: + # Missing headers usually means "recover from the saved endpoint". + # Scope that lookup to the session owner, otherwise two users + # with similar endpoint URLs can borrow each other's API key. + from src.auth_helpers import owner_filter + q = owner_filter(q, ModelEndpoint, owner) + for ep in q.all(): + if not _session_url_matches_endpoint(target_url, ep.base_url or ""): + continue + if not ep.api_key: + return + base = normalize_base(ep.base_url or "") + sess.headers = build_headers(ep.api_key, base) + update_q = db.query(DBSession).filter(DBSession.id == session_id) + if owner: + update_q = update_q.filter(DBSession.owner == owner) + update_q.update({"headers": sess.headers}) + db.commit() + logger.info(f"Resolved and persisted auth headers for session {session_id} from endpoint {ep.name}") + return finally: db.close() except Exception as e: logger.warning(f"Failed to resolve session headers: {e}") +def _match_cached_model_id(requested: str, models) -> Optional[str]: + if not requested or not models: + return None + model_ids = [str(m) for m in models if m] + if requested in model_ids: + return requested + + req_base = os.path.basename(requested.rstrip("/")) + for model_id in model_ids: + if os.path.basename(model_id.rstrip("/")) == req_base: + return model_id + return None + + +def _normalize_model_id_from_cache(sess) -> Optional[str]: + """Use stored endpoint model IDs before falling back to a live /models probe.""" + endpoint_url = getattr(sess, "endpoint_url", "") or "" + requested = getattr(sess, "model", "") or "" + if not endpoint_url or not requested: + return None + + try: + session_base = normalize_base(endpoint_url) + except Exception: + session_base = endpoint_url.rstrip("/") + if not session_base: + return None + + db = SessionLocal() + try: + endpoints = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True).all() + for ep in endpoints: + try: + if normalize_base(getattr(ep, "base_url", "") or "") != session_base: + continue + except Exception: + continue + + raw_models = getattr(ep, "cached_models", None) + if not raw_models: + continue + try: + models = json.loads(raw_models) if isinstance(raw_models, str) else raw_models + except Exception: + continue + + matched = _match_cached_model_id(requested, models) + if matched: + return matched + except Exception as e: + logger.debug("Cached model normalization skipped: %s", e) + finally: + db.close() + + return None + + async def build_chat_context( sess, request, @@ -434,8 +528,9 @@ async def build_chat_context( for transcript in preprocessed.youtube_transcripts: preface.append(untrusted_context_message("youtube transcript", transcript)) - # Normalize model ID - norm = normalize_model_id(sess.endpoint_url, sess.model) + # Normalize model ID. Prefer cached endpoint models so group chat does not + # re-hit slow local /models endpoints on every participant turn. + norm = _normalize_model_id_from_cache(sess) or normalize_model_id(sess.endpoint_url, sess.model) if norm: sess.model = norm @@ -743,7 +838,7 @@ def run_post_response_tasks( from services.memory.memory_extractor import extract_and_store from src.task_endpoint import resolve_task_endpoint t_url, t_model, t_headers = resolve_task_endpoint( - sess.endpoint_url, sess.model, sess.headers, + sess.endpoint_url, sess.model, sess.headers, owner=owner, ) asyncio.create_task(extract_and_store( sess, memory_manager, memory_vector, @@ -780,7 +875,7 @@ def run_post_response_tasks( from services.memory.skill_extractor import maybe_extract_skill from src.task_endpoint import resolve_task_endpoint s_url, s_model, s_headers = resolve_task_endpoint( - sess.endpoint_url, sess.model, sess.headers, + sess.endpoint_url, sess.model, sess.headers, owner=owner, ) logger.debug("[skill-extract] dispatching extractor (model=%s)", s_model) asyncio.create_task(maybe_extract_skill( diff --git a/routes/chat_routes.py b/routes/chat_routes.py index 3cdcb8586..f54c26529 100644 --- a/routes/chat_routes.py +++ b/routes/chat_routes.py @@ -23,10 +23,12 @@ from src.prompt_security import untrusted_context_message from core.exceptions import SessionNotFoundError from src.auth_helpers import get_current_user from routes.session_routes import _verify_session_owner +from routes.document_helpers import _owner_session_filter from core.database import SessionLocal, get_session_mode, set_session_mode from core.database import Session as DBSession, ChatMessage as DBChatMessage from core.database import Document as DBDocument, ModelEndpoint from routes.research_routes import _resolve_research_endpoint +from routes.model_routes import _visible_models from routes.chat_helpers import ( resolve_session_auth, build_chat_context, @@ -41,6 +43,7 @@ logger = logging.getLogger(__name__) # Track active streams for partial-save safety net _active_streams: Dict[str, dict] = {} +_IMAGE_MODEL_PREFIXES = ("gpt-image", "dall-e", "chatgpt-image") def _stream_set(session_id: str, **fields) -> None: @@ -69,13 +72,17 @@ def _session_url_matches_endpoint(session_url: str, endpoint_base: str) -> bool: return sess in variants or sess.startswith(base + "/") -def _clear_orphaned_session_endpoint(sess) -> bool: +def _clear_orphaned_session_endpoint(sess, owner: str | None = None) -> bool: """Clear a session model if its endpoint was deleted from ModelEndpoint.""" if not getattr(sess, "endpoint_url", ""): return False db = SessionLocal() try: - endpoints = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True).all() + q = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True) + if owner: + from src.auth_helpers import owner_filter + q = owner_filter(q, ModelEndpoint, owner) + endpoints = q.all() for ep in endpoints: if _session_url_matches_endpoint(sess.endpoint_url or "", ep.base_url or ""): return False @@ -96,6 +103,132 @@ def _clear_orphaned_session_endpoint(sess) -> bool: db.close() +def _endpoint_cache_contains_model(endpoint, model: str) -> bool: + """Return True when a populated endpoint model cache includes ``model``. + + Empty/malformed caches are treated as unknown rather than a negative match + so older image endpoints without cached models still work. + """ + raw = getattr(endpoint, "cached_models", None) + if not raw: + return True + try: + models = json.loads(raw) if isinstance(raw, str) else raw + except Exception: + return True + if not isinstance(models, list) or not models: + return True + wanted = (model or "").strip() + return wanted in {str(item).strip() for item in models} + + +def _is_image_generation_session(sess, owner: str | None = None) -> bool: + """Whether this chat session should bypass text chat and generate images. + + Model-name prefixes are explicit image models. Endpoint type is only used + when the current session endpoint actually matches that image endpoint, and + when a populated endpoint model cache includes the selected model. This + prevents an image endpoint on the same host from misrouting ordinary text + models into the image-generation path. + """ + model = (getattr(sess, "model", "") or "").strip() + if any(model.lower().startswith(prefix) for prefix in _IMAGE_MODEL_PREFIXES): + return True + + endpoint_url = (getattr(sess, "endpoint_url", "") or "").strip() + if not endpoint_url: + return False + + db = SessionLocal() + try: + q = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True) + if owner: + from src.auth_helpers import owner_filter + q = owner_filter(q, ModelEndpoint, owner) + endpoints = q.all() + for endpoint in endpoints: + if (getattr(endpoint, "model_type", None) or "llm") != "image": + continue + if not _session_url_matches_endpoint(endpoint_url, getattr(endpoint, "base_url", "") or ""): + continue + if _endpoint_cache_contains_model(endpoint, model): + return True + except Exception: + return False + finally: + db.close() + return False + + +def _recover_empty_session_model(sess, session_id: str, owner: str | None = None) -> bool: + """Re-populate sess.model from the matching endpoint's cached models. + + Covers the window between endpoint setup and the first chat send: the + picker showed a model in the dropdown but the session record never got + written (Issue #587 — UI uses the cached endpoint list, not s.model). + Without this, we'd POST the upstream with model="" and get a generic + 401/503 instead of using the model the user already picked. + + Returns True iff sess.model was repaired. + """ + if getattr(sess, "model", None): + return False + db = SessionLocal() + try: + # Prefer the endpoint whose base URL matches the session — we know the + # user already pointed this session at that endpoint, so its first + # cached model is the most defensible default. + ep = None + if getattr(sess, "endpoint_url", ""): + q = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True) + if owner: + from src.auth_helpers import owner_filter + q = owner_filter(q, ModelEndpoint, owner) + endpoints = q.all() + for cand in endpoints: + if _session_url_matches_endpoint(sess.endpoint_url or "", cand.base_url or ""): + ep = cand + break + if not ep: + return False + try: + cached = json.loads(ep.cached_models) if isinstance(ep.cached_models, str) else (ep.cached_models or []) + except Exception: + cached = [] + if not cached: + return False + try: + visible = _visible_models(cached, getattr(ep, "hidden_models", None)) + except Exception: + visible = cached + if not visible: + return False + model = visible[0] + if not isinstance(model, str) or not model.strip(): + return False + model = model.strip() + # Persist so the next request, websocket reconnect, or page reload + # picks up the same model (we'd otherwise re-pick on every send + # and silently switch on the user if the cached order shifts). + db_session = db.query(DBSession).filter(DBSession.id == session_id).first() + if db_session: + db_session.model = model + db_session.updated_at = datetime.utcnow() + db.commit() + sess.model = model + logger.info( + "Recovered empty session model for %s — picked %r from endpoint %s", + session_id, model, ep.id, + ) + return True + except Exception as e: + db.rollback() + logger.warning("Failed to recover empty session model for %s: %s", session_id, e) + return False + finally: + db.close() + + def setup_chat_routes( session_manager, chat_handler, @@ -130,9 +263,20 @@ def setup_chat_routes( sess = session_manager.get_session(session) except KeyError: raise HTTPException(404, f"Session '{session}' not found") - if _clear_orphaned_session_endpoint(sess): + owner = get_current_user(request) + if _clear_orphaned_session_endpoint(sess, owner=owner): raise HTTPException(400, "Selected model endpoint was removed. Pick another model in Settings.") + # Empty model + live endpoint = setup race (Issue #587). Repair from + # the endpoint's cached model list before privilege checks, which + # otherwise see "" and behave inconsistently with the allowlist. + _recover_empty_session_model(sess, session, owner=owner) + if not getattr(sess, "model", "").strip(): + raise HTTPException( + 400, + "No model selected for this chat. Open the model picker and choose one before sending.", + ) + # Same allowed_models + daily-cap gate as chat_stream (mirror so the # non-streaming path can't be used to bypass). _enforce_chat_privileges(request, sess) @@ -270,8 +414,21 @@ def setup_chat_routes( # but BEFORE loading. Prevents cross-user session hijack. _verify_session_owner(request, session) sess = session_manager.get_session(session) - if _clear_orphaned_session_endpoint(sess): + owner = get_current_user(request) + if _clear_orphaned_session_endpoint(sess, owner=owner): raise HTTPException(400, "Selected model endpoint was removed. Pick another model in Settings.") + # Issue #587: picker shows a model from the endpoint cache but + # s.model never made it onto the DB row (first-send race after + # endpoint setup, or a previous endpoint delete/recreate). Pull + # the first cached model off the matching endpoint so the + # upstream isn't called with model="" (which surfaces as a + # generic 401/503). + _recover_empty_session_model(sess, session, owner=owner) + if not getattr(sess, "model", "").strip(): + raise HTTPException( + 400, + "No model selected for this chat. Open the model picker and choose one before sending.", + ) except SessionNotFoundError as e: raise HTTPException(404, str(e)) except (ValueError, ValidationError): @@ -288,7 +445,7 @@ def setup_chat_routes( _enforce_chat_privileges(request, sess) # Ensure session has auth headers - resolve_session_auth(sess, session) + resolve_session_auth(sess, session, owner=get_current_user(request)) # Check for research_pending BEFORE mode persist overwrites it do_research = str(use_research).lower() == "true" @@ -343,18 +500,22 @@ def setup_chat_routes( try: if active_doc_id: logger.info(f"[doc-inject] active_doc_id from frontend: {active_doc_id}") - active_doc = _doc_db.query(DBDocument).filter( - DBDocument.id == active_doc_id, - ).first() + # Scope to the caller's documents. The session and in-memory + # fallbacks below are already owner/session-bound; this + # explicit-id path looked up by id alone, so a user could + # inject another user's document by passing its id. + _doc_q = _doc_db.query(DBDocument).filter(DBDocument.id == active_doc_id) + active_doc = _owner_session_filter(_doc_q, ctx.user).first() if active_doc: logger.info(f"[doc-inject] found by ID: title={active_doc.title!r}, lang={active_doc.language!r}, is_active={active_doc.is_active}, content_len={len(active_doc.current_content or '')}") else: logger.warning(f"[doc-inject] NOT FOUND by ID {active_doc_id}") if not active_doc: - active_doc = _doc_db.query(DBDocument).filter( + _session_doc_q = _doc_db.query(DBDocument).filter( DBDocument.session_id == session, DBDocument.is_active == True - ).order_by(DBDocument.updated_at.desc()).first() + ) + active_doc = _owner_session_filter(_session_doc_q, ctx.user).order_by(DBDocument.updated_at.desc()).first() if active_doc: logger.info(f"[doc-inject] found by session fallback: title={active_doc.title!r}") # Last resort: the document the agent itself just created/edited @@ -368,7 +529,8 @@ def setup_chat_routes( from src.tool_implementations import get_active_document _mem_id = get_active_document() if _mem_id: - cand = _doc_db.query(DBDocument).filter(DBDocument.id == _mem_id).first() + _mem_q = _doc_db.query(DBDocument).filter(DBDocument.id == _mem_id) + cand = _owner_session_filter(_mem_q, ctx.user).first() if cand and (not cand.session_id or cand.session_id == session): active_doc = cand logger.info(f"[doc-inject] found by in-memory active id: title={active_doc.title!r} (session_id={cand.session_id!r})") @@ -563,6 +725,7 @@ def setup_chat_routes( prior_findings=_prior_findings, prior_urls=_prior_urls, on_complete=_on_research_done, + owner=_user, ) _heartbeat_counter = 0 @@ -619,7 +782,7 @@ def setup_chat_routes( # output. Resolved once per request. try: from src.endpoint_resolver import resolve_chat_fallback_candidates - _fallback_candidates = resolve_chat_fallback_candidates() + _fallback_candidates = resolve_chat_fallback_candidates(owner=_user) except Exception: _fallback_candidates = [] @@ -632,28 +795,7 @@ def setup_chat_routes( _model_info["character_name"] = ctx.preset.character_name yield f'data: {json.dumps(_model_info)}\n\n' - # Detect image models and route directly to image generation - _IMAGE_MODEL_PREFIXES = ("gpt-image", "dall-e", "chatgpt-image") - _is_image_model = any(sess.model.lower().startswith(p) for p in _IMAGE_MODEL_PREFIXES) - - # Also check if the endpoint is registered as an image-type endpoint - if not _is_image_model: - try: - from src.endpoint_resolver import normalize_base as _nb - _ep_base = _nb(sess.endpoint_url) - _db = SessionLocal() - try: - _is_image_model = _db.query(ModelEndpoint).filter( - ModelEndpoint.model_type == "image", - ModelEndpoint.is_enabled == True, - ModelEndpoint.base_url.contains(_ep_base.split("://")[-1].split("/")[0]), - ).first() is not None - finally: - _db.close() - except Exception: - pass - - if _is_image_model: + if _is_image_generation_session(sess, owner=_user): from src.settings import get_setting if not get_setting("image_gen_enabled", True): yield f'data: {json.dumps({"delta": "Image generation is disabled by the administrator."})}\n\n' @@ -664,7 +806,7 @@ def setup_chat_routes( _user_msg = message or "" yield f'data: {json.dumps({"type": "tool_start", "tool": "generate_image", "command": _user_msg[:100]})}\n\n' yield ": heartbeat\n\n" - _img_result = await do_generate_image(f"{_user_msg}\n{sess.model}", session) + _img_result = await do_generate_image(f"{_user_msg}\n{sess.model}", session, owner=_user) _img_output = _img_result.get("results", _img_result.get("error", "")) _img_tool_data = {"type": "tool_output", "tool": "generate_image", "command": _user_msg[:100], "output": _img_output, "exit_code": 0 if "error" not in _img_result else 1} for _k in ("image_url", "image_id", "image_prompt", "image_model", "image_size", "image_quality"): @@ -688,6 +830,7 @@ def setup_chat_routes( return elif chat_mode == "chat": _chat_start = time.time() + _answered_by = None # set if the selected model failed and a fallback answered # ── Chat mode: call stream_llm directly, NO tools, NO document access ── try: _chat_candidates = [(sess.endpoint_url, sess.model, sess.headers)] + _fallback_candidates @@ -708,16 +851,35 @@ def setup_chat_routes( try: data = json.loads(chunk[6:]) if "delta" in data: - full_response += data["delta"] - _stream_set(session, partial=full_response) + # Reasoning tokens arrive flagged thinking:true. + # Forward them so the client can show a thinking + # indicator, but don't fold them into the saved + # reply (mirrors the rewrite path below). + if not data.get("thinking"): + full_response += data["delta"] + _stream_set(session, partial=full_response) + yield chunk + elif data.get("type") == "fallback": + # Selected model failed; a fallback answered. + # Forward the notice and remember the real model. + _answered_by = data.get("answered_by") or _answered_by yield chunk elif data.get("type") == "usage": last_metrics = data.get("data", {}) - last_metrics["model"] = sess.model + last_metrics["model"] = _answered_by or sess.model if ctx.context_length and last_metrics.get("input_tokens"): pct = min(round((last_metrics["input_tokens"] / ctx.context_length) * 100, 1), 100.0) last_metrics["context_percent"] = pct last_metrics["context_length"] = ctx.context_length + # The frontend reads `tokens_per_second`; the raw usage event + # carries the backend's true gen speed as `gen_tps` (llama.cpp + # timings). Map it through so this direct-chat path shows real + # t/s instead of "n/a" → falling back to a bare token count. + if last_metrics.get("gen_tps") and not last_metrics.get("tokens_per_second"): + last_metrics["tokens_per_second"] = last_metrics["gen_tps"] + last_metrics["tps_source"] = "backend" + # Wall-clock response time for the stats popup ("Time"). + last_metrics.setdefault("response_time", round(time.time() - _chat_start, 2)) yield f'data: {json.dumps({"type": "metrics", "data": last_metrics})}\n\n' except json.JSONDecodeError: yield chunk @@ -781,6 +943,7 @@ def setup_chat_routes( # ── Agent mode: full agent loop with tools ── _agent_rounds = 0 _agent_tool_calls = 0 + _answered_by = None # set if the selected model failed and a fallback answered try: from src.settings import get_setting _tool_budget = int(get_setting("agent_max_tool_calls", 0)) @@ -805,8 +968,12 @@ def setup_chat_routes( try: data = json.loads(chunk[6:]) if "delta" in data: - full_response += data["delta"] - _stream_set(session, partial=full_response) + # Reasoning tokens arrive flagged thinking:true. + # Forward them for the live indicator, but keep + # them out of the saved reply (same as chat mode). + if not data.get("thinking"): + full_response += data["delta"] + _stream_set(session, partial=full_response) yield chunk elif data.get("type") == "web_sources": web_sources = data.get("data", []) @@ -821,9 +988,16 @@ def setup_chat_routes( elif data.get("type") == "tool_start": _agent_tool_calls += 1 yield chunk + elif data.get("type") == "fallback": + # Selected model failed; a fallback answered. + # Forward the notice and remember the real + # model so metrics reflect it, not the masked + # selected model. + _answered_by = data.get("answered_by") or _answered_by + yield chunk elif data.get("type") == "metrics": last_metrics = data.get("data", {}) - last_metrics["model"] = sess.model + last_metrics["model"] = _answered_by or sess.model yield f'data: {json.dumps({"type": "metrics", "data": last_metrics})}\n\n' except json.JSONDecodeError: yield chunk @@ -920,11 +1094,15 @@ def setup_chat_routes( _verify_session_owner(request, session_id) # A detached run can still be going even if _active_streams was popped; # report it as active so the client knows to reconnect via /resume. - if session_id not in _active_streams: + # Read once via .get() to avoid a KeyError race between the membership + # check and the indexed read if a sibling stream's finally pops the + # entry in between (same pattern _stream_set already uses). + rec = _active_streams.get(session_id) + if rec is None: if agent_runs.is_active(session_id): return {"status": "streaming", "detached": True} raise HTTPException(404, "No active stream for this session") - return _active_streams[session_id] + return rec # ------------------------------------------------------------------ # # POST /api/inject_context @@ -1088,7 +1266,7 @@ def setup_chat_routes( db_msg = ( db.query(DBChatMessage) .filter(DBChatMessage.session_id == session_id, DBChatMessage.role == 'assistant') - .order_by(DBChatMessage.created_at.desc()) + .order_by(DBChatMessage.timestamp.desc()) .first() ) if db_msg: diff --git a/routes/contacts_routes.py b/routes/contacts_routes.py index 8db546308..409184fa1 100644 --- a/routes/contacts_routes.py +++ b/routes/contacts_routes.py @@ -130,21 +130,28 @@ def _parse_vcards(text: str) -> List[Dict]: contact = {"name": "", "emails": [], "phones": [], "uid": ""} for line in block.split("\n"): line = line.strip() - if line.startswith("FN:") or line.startswith("FN;"): - contact["name"] = _vunesc(line.split(":", 1)[1]) if ":" in line else "" - elif line.startswith("EMAIL"): + # Strip an optional RFC 6350 group prefix (e.g. "item1.EMAIL;...") + # that Apple Contacts / iCloud / many CardDAV servers emit by + # default — without this the property-name checks below miss those + # lines and silently drop the email / phone. The group token only + # precedes the property name, so it is safe to strip for matching + # and value extraction, and a no-op for non-grouped lines. + name_part = re.sub(r"^[A-Za-z0-9-]+\.", "", line, count=1) + if name_part.startswith("FN:") or name_part.startswith("FN;"): + contact["name"] = _vunesc(name_part.split(":", 1)[1]) if ":" in name_part else "" + elif name_part.startswith("EMAIL"): # Handle EMAIL:foo@bar OR EMAIL;TYPE=...:foo@bar OR EMAIL;PREF=1:foo@bar - if ":" in line: - email_addr = _vunesc(line.split(":", 1)[1]) + if ":" in name_part: + email_addr = _vunesc(name_part.split(":", 1)[1]) if email_addr and email_addr not in contact["emails"]: contact["emails"].append(email_addr) - elif line.startswith("TEL"): - if ":" in line: - phone = _vunesc(line.split(":", 1)[1]) + elif name_part.startswith("TEL"): + if ":" in name_part: + phone = _vunesc(name_part.split(":", 1)[1]) if phone and phone not in contact["phones"]: contact["phones"].append(phone) - elif line.startswith("UID:"): - contact["uid"] = _vunesc(line[4:]) + elif name_part.startswith("UID:"): + contact["uid"] = _vunesc(name_part[4:]) if contact["name"] or contact["emails"]: contacts.append(contact) return contacts @@ -676,8 +683,8 @@ def setup_contacts_routes(): @router.post("/add") async def add_contact(data: dict, _admin: str = Depends(require_admin)): """Add a new contact.""" - name = data.get("name", "").strip() - email = data.get("email", "").strip() + name = (data.get("name") or "").strip() + email = (data.get("email") or "").strip() if not email: return {"success": False, "error": "Email required"} # Check if already exists diff --git a/routes/cookbook_helpers.py b/routes/cookbook_helpers.py index c311b24e6..c60940a91 100644 --- a/routes/cookbook_helpers.py +++ b/routes/cookbook_helpers.py @@ -148,6 +148,108 @@ def _local_tooling_path_export(executable: str) -> str: return f'export PATH="{esc}:$PATH"' +def _pip_install_no_cache(cmd: str) -> str: + """Add ``--no-cache-dir`` to a pip install command. + + Cookbook dependency installs (vLLM, llama-cpp-python, …) build large wheels; + pip's default cache lives under ``$HOME/.cache/pip`` and these builds can fill + a small home filesystem with ``[Errno 28] No space left on device`` mid-build + (issue #1219), leaving the dependency "installed" but unusable (#1459). + Disabling the cache for these one-off installs keeps them off the home disk + (the maintainer's suggested ``PIP_CACHE_DIR=`` workaround, made the default). + Idempotent; leaves non-pip-install commands untouched.""" + if not cmd or "pip install" not in cmd or "--no-cache-dir" in cmd: + return cmd + return cmd.replace("pip install", "pip install --no-cache-dir", 1) + + +def _pip_install_attempt(pip_cmd: str) -> str: + """Wrap a single pip install command so its exit status survives the + fallback chain and its stderr is visible in the tmux log on failure. + + Without this wrapper, `pip … 2>&1 | tail -5` returns ``tail``'s exit + code (0), masking pip's real failure and preventing the next fallback + from running. The generated snippet captures all output to a temp + file, prints the last 5 lines on failure (so the Cookbook log panel + shows useful diagnostics), cleans up, and exits with pip's original + status. + """ + return ( + "bash -c '" + f'_out=$(mktemp) && {pip_cmd} >"$_out" 2>&1; _rc=$?; ' + 'tail -5 "$_out"; rm -f "$_out"; exit $_rc' + "'" + ) + + +def _pip_install_fallback_chain(package: str, *, python_cmd: str = "python3 -m pip", upgrade: bool = False) -> str: + """Build a bash pip install fallback chain that surfaces errors. + + Try the active interpreter/environment first. ``--user`` is invalid + inside many venvs, so only attempt the ``--user`` fallback when NOT + inside a venv. + + Each attempt is wrapped via :func:`_pip_install_attempt` so pip's real + exit code is preserved (no ``| tail`` masking) and the last 5 lines of + pip output appear in the Cookbook log on failure. + """ + upgrade_flag = " -U" if upgrade else "" + # Shell-quote the package spec: an extras spec like ``llama-cpp-python[server]`` + # contains brackets that bash would treat as a glob, so it must be quoted + # before being embedded in the install command. Plain names (e.g. + # ``huggingface_hub``) are returned unchanged by ``shlex.quote``. + pkg = shlex.quote(package) + base = _pip_install_attempt(f"{python_cmd} install -q{upgrade_flag} {pkg}") + user = _pip_install_attempt(f"{python_cmd} install --user --break-system-packages -q{upgrade_flag} {pkg}") + # Derive the python executable for the venv detection check. + # Must use the same interpreter that pip belongs to; hardcoding + # python3 breaks when pip lives in a venv that only has "python". + if " -m pip" in python_cmd: + python_exe = python_cmd.replace(" -m pip", "") + elif python_cmd.strip() == "pip": + python_exe = "python" + elif python_cmd.strip() == "pip3": + python_exe = "python3" + else: + python_exe = "python3" + venv_check = f'{python_exe} -c "import sys; sys.exit(0 if sys.prefix != sys.base_prefix else 1)"' + # Negated: `! venv_check` succeeds (exit 0) when NOT in a venv → `&&` tries + # --user. When IN a venv `! venv_check` fails → `&&` skips --user and the + # group exits non-zero, propagating the base-install failure instead of + # masking it as success (the `|| { venv_check || … }` shape from #903 + # swallowed the exit code because venv_check's exit-0 became the group's + # result). + return f"{base} || {{ ! {venv_check} && {user}; }}" + + +def _venv_safe_local_pip_install_cmd(cmd: str, *, local: bool, in_venv: bool) -> str: + """Drop pip user-install flags that are invalid for local venv installs. + + Cookbook dependency installs run through the model-serve task path so users + can watch progress in the same log UI. For local POSIX runs, that task + prepends Odysseus' own interpreter directory to PATH. If Odysseus itself is + running from a venv, `python3` resolves to the venv Python and pip rejects + `--user` with "User site-packages are not visible in this virtualenv". + + Keep remote and non-venv installs unchanged: remotes may intentionally use + system Python, and Docker/non-venv installs still need user-site fallback. + """ + if not local or not in_venv: + return cmd + if "pip install" not in (cmd or ""): + return cmd + try: + parts = shlex.split(cmd) + except ValueError: + return cmd + stripped = [ + part + for part in parts + if part not in {"--user", "--break-system-packages"} + ] + return shlex.join(stripped) + + def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str: """Build the standalone Python scanner used by /api/model/cached.""" lines = [ @@ -166,6 +268,38 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str: " for root, dirs, fns in os.walk(top, followlinks=False):", " dirs[:] = [d for d in dirs if not os.path.islink(os.path.join(root, d)) and safe_path(os.path.join(root, d))]", " yield root, dirs, fns", + "def gguf_role(name):", + " n = name.lower()", + " if n.startswith('mmproj') or 'mmproj' in n: return 'projector'", + " return 'model'", + "def gguf_quant(name):", + " m = re.search(r'(?i)(UD-)?(IQ[0-9]_[A-Z0-9_]+|Q[0-9](?:_[A-Z0-9]+)+|BF16|F16|FP16|F32|Q8_0)', name)", + " return m.group(0).upper() if m else ''", + "def collect_ggufs(base):", + " files = []", + " split_groups = {}", + " if not os.path.isdir(base) or not safe_path(base): return files", + " for root, dirs, fns in safe_walk(base):", + " for fn in sorted(fns):", + " if not fn.lower().endswith('.gguf'): continue", + " fp = os.path.join(root, fn)", + " try: size = os.path.getsize(fp)", + " except Exception: size = 0", + " try: rel = os.path.relpath(fp, base).replace(os.sep, '/')", + " except Exception: rel = fn", + " sm = re.match(r'(?i)^(.+)-(\\d+)-of-(\\d+)\\.gguf$', fn)", + " if sm:", + " prefix, part_s, total_s = sm.group(1), sm.group(2), sm.group(3)", + " key = (root, prefix, total_s)", + " g = split_groups.setdefault(key, {'name':fn,'rel_path':rel,'size_bytes':0,'role':gguf_role(fn),'quant':gguf_quant(fn),'parts':int(total_s),'split':True})", + " g['size_bytes'] += size", + " if int(part_s) == 1:", + " g.update({'name':fn,'rel_path':rel,'role':gguf_role(fn),'quant':gguf_quant(fn)})", + " continue", + " files.append({'name':fn,'rel_path':rel,'size_bytes':size,'role':gguf_role(fn),'quant':gguf_quant(fn)})", + " files.extend(split_groups.values())", + " files.sort(key=lambda f: (f.get('role') != 'model', f.get('rel_path', '')))", + " return files", "def scan_hf(cache):", " if not os.path.isdir(cache): return", " for d in sorted(os.listdir(cache)):", @@ -180,16 +314,14 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str: " if f.is_file(): nf += 1; sz += f.stat().st_size", " if f.name.endswith('.incomplete'): ic = True", " snap = os.path.join(cache, d, 'snapshots')", - " is_diffusion = False; is_gguf = False", + " is_diffusion = False; gguf_files = []", " if os.path.isdir(snap):", " for sd in os.listdir(snap):", " sf = os.path.join(snap, sd)", " if not os.path.isdir(sf): continue", " if os.path.exists(os.path.join(sf, 'model_index.json')): is_diffusion = True", - " try:", - " if any(x.endswith('.gguf') for x in os.listdir(sf)): is_gguf = True", - " except Exception: pass", - " models.append({'repo_id':rid,'size_bytes':sz,'nb_files':nf,'has_incomplete':ic,'path':cache,'is_diffusion':is_diffusion,'is_gguf':is_gguf})", + " for f in collect_ggufs(sf): f['rel_path'] = sd + '/' + f['rel_path']; gguf_files.append(f)", + " models.append({'repo_id':rid,'size_bytes':sz,'nb_files':nf,'has_incomplete':ic,'path':cache,'is_diffusion':is_diffusion,'is_gguf':bool(gguf_files),'gguf_files':gguf_files})", "def scan_dir(p):", " if not os.path.isdir(p) or not safe_path(p): return", " for d in sorted(os.listdir(p)):", @@ -198,13 +330,14 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str: " fp = os.path.join(p, d)", " if not os.path.isdir(fp) or os.path.islink(fp) or not safe_path(fp): continue", " if d in seen: continue", - " is_model = False; is_gguf = False", + " is_model = False; gguf_files = []", " for root, dirs, fns in safe_walk(fp):", " for fn in fns:", - " if fn.endswith('.gguf'): is_gguf = True; is_model = True", + " if fn.lower().endswith('.gguf'): is_model = True", " elif fn == 'config.json' or fn.endswith('.safetensors') or fn.endswith('.bin'): is_model = True", " if is_model: break", " if not is_model: continue", + " gguf_files = collect_ggufs(fp)", " seen.add(d)", " sz, nf = 0, 0", " for dp, _, fns in safe_walk(fp):", @@ -212,7 +345,7 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str: " try: nf += 1; sz += os.path.getsize(os.path.join(dp, fn))", " except Exception: pass", " is_diff = os.path.exists(os.path.join(fp, 'model_index.json'))", - " models.append({'repo_id':d,'size_bytes':sz,'nb_files':nf,'has_incomplete':False,'path':p,'is_local_dir':True,'is_diffusion':is_diff,'is_gguf':is_gguf})", + " models.append({'repo_id':d,'size_bytes':sz,'nb_files':nf,'has_incomplete':False,'path':p,'is_local_dir':True,'is_diffusion':is_diff,'is_gguf':bool(gguf_files),'gguf_files':gguf_files})", "def parse_size(num, unit):", " try: n = float(num)", " except Exception: return 0", @@ -293,6 +426,38 @@ _SERVE_CMD_ALLOWLIST = { _GGUF_PRELUDE_RE = re.compile( r'^MODEL_FILE=\$\([^\n]*?\)\s*&&\s*\{[^{}]*\}\s*\|\|\s*\{[^{}]*\}\s*&&\s*' ) +_OLLAMA_HOST_ASSIGNMENT_RE = re.compile(r"(?:^|\s)OLLAMA_HOST=([^\s]+)") +_OLLAMA_BIND_RE = re.compile(r"^\[([^\]]+)\]:(\d+)$|^([^:]+):(\d+)$") +_OLLAMA_BIND_HOST_RE = re.compile(r"^[A-Za-z0-9._:-]+$") + + +def _ollama_bind_from_cmd(cmd: str | None, *, default_host: str = "127.0.0.1") -> tuple[str, str]: + """Return the Ollama bind host/port requested by a serve command. + + Plain local `ollama serve` defaults to loopback. Remote callers can pass a + wider default host so the resulting API is reachable by Odysseus. + """ + if not cmd: + return default_host, "11434" + match = _OLLAMA_HOST_ASSIGNMENT_RE.search(cmd) + if not match: + return default_host, "11434" + value = match.group(1).strip("'\"") + bind_match = _OLLAMA_BIND_RE.match(value) + if not bind_match: + return "127.0.0.1", "11434" + bracketed_host = bind_match.group(1) + host = bracketed_host or bind_match.group(3) or "127.0.0.1" + port = bind_match.group(2) or bind_match.group(4) or "11434" + if not _OLLAMA_BIND_HOST_RE.match(host): + return "127.0.0.1", "11434" + try: + port_num = int(port, 10) + except ValueError: + return "127.0.0.1", "11434" + if port_num < 1 or port_num > 65535: + return "127.0.0.1", "11434" + return f"[{host}]" if bracketed_host else host, port def _check_serve_binary(seg: str) -> None: @@ -370,6 +535,83 @@ def _append_serve_exit_code_lines(runner_lines: list[str], *, keep_shell_open: b runner_lines.append('echo ""; echo "=== Process exited with code $ODYSSEUS_CMD_EXIT ==="; exec "${SHELL:-/bin/bash}"') else: runner_lines.append('echo ""; echo "=== Process exited with code $ODYSSEUS_CMD_EXIT ==="') + runner_lines.append('exit "$ODYSSEUS_CMD_EXIT"') + + +def _append_llama_cpp_linux_accel_build_lines(runner_lines: list[str]) -> None: + """Append Linux llama.cpp build lines that prefer ROCm/HIP when available. + + Cookbook already detects AMD GPUs elsewhere, but the llama.cpp bootstrap used + to hard-wire CUDA on Linux. That made ROCm hosts attempt a CUDA configure and + fail with "CUDA Toolkit not found" instead of building with HIP. + """ + # Detect pip-installed nvcc (from vLLM/nvidia CUDA wheels) and put it on PATH + # so cmake's CUDA configure can find it. We keep this after the ROCm/HIP + # check — a machine with both stacks should honor the native HIP toolchain on + # AMD hosts instead of accidentally preferring a stray nvcc wheel. + runner_lines.append(' for _cudir in ~/.local/lib/python*/site-packages/nvidia/cu13 ~/.local/lib/python*/site-packages/nvidia/cu12 ~/.local/lib/python*/site-packages/nvidia/cuda_nvcc; do') + runner_lines.append(' [ -x "$_cudir/bin/nvcc" ] && export CUDA_HOME="$_cudir" && export PATH="$_cudir/bin:$PATH" && break') + runner_lines.append(' done') + # rm -rf build so a prior poisoned CMakeCache.txt (e.g. from a failed CUDA + # or HIP attempt) doesn't cause the next configure to reuse stale settings. + runner_lines.append(' cd ~/llama.cpp && rm -rf build') + runner_lines.append(' if command -v hipconfig &>/dev/null || [ -d /opt/rocm ] || [ -n "$ROCM_PATH" ] || [ -n "$HIP_PATH" ]; then') + runner_lines.append(' if command -v hipconfig &>/dev/null; then') + runner_lines.append(' export HIPCXX="${HIPCXX:-$(hipconfig -l)/clang}"') + runner_lines.append(' export HIP_PATH="${HIP_PATH:-$(hipconfig -R)}"') + runner_lines.append(' fi') + runner_lines.append(' echo "[odysseus] ROCm/HIP detected — building llama-server with HIP support..."') + runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_HIP=ON && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server') + runner_lines.append(' elif command -v nvcc &>/dev/null; then') + # nvcc alone is not sufficient — pip-installed CUDA wheels or incomplete + # tooling can expose nvcc without shipping libcudart, causing cmake to fail + # mid-build with "CUDA runtime library not found". Check cudart explicitly + # via a small helper so the guard stays readable. + runner_lines.append(' _odysseus_has_cudart() {') + runner_lines.append(' ldconfig -p 2>/dev/null | grep -q \'libcudart\\.so\' && return 0') + runner_lines.append(' local _cuh="${CUDA_HOME:-/usr/local/cuda}"') + runner_lines.append(' ls "$_cuh/lib64/libcudart.so"* &>/dev/null && return 0') + runner_lines.append(' ls "$_cuh/lib/libcudart.so"* &>/dev/null && return 0') + runner_lines.append(' ls /usr/local/cuda/lib64/libcudart.so* &>/dev/null && return 0') + runner_lines.append(' ls /usr/local/cuda/lib/libcudart.so* &>/dev/null && return 0') + runner_lines.append(' ls "${_cuh%/cuda_nvcc}/cuda_runtime/lib/libcudart.so"* &>/dev/null && return 0') + runner_lines.append(' return 1') + runner_lines.append(' }') + runner_lines.append(' if _odysseus_has_cudart; then') + runner_lines.append(' echo "[odysseus] CUDA nvcc + cudart found — building llama-server with CUDA (GPU) support..."') + runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_CUDA=ON && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server') + runner_lines.append(' else') + runner_lines.append(' echo "[odysseus] WARNING: nvcc found but CUDA runtime (libcudart.so) is not visible — building llama-server for CPU only."') + runner_lines.append(' echo "[odysseus] GPU inference will not be available for this llama.cpp build."') + runner_lines.append(' echo "[odysseus] Ensure libcudart is installed (e.g. cuda-runtime package) and visible via ldconfig or CUDA_HOME."') + runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server') + runner_lines.append(' fi') + runner_lines.append(' else') + runner_lines.append(' echo "[odysseus] WARNING: no HIP/CUDA toolchain found — building llama-server for CPU only."') + runner_lines.append(' echo "[odysseus] GPU inference will not be available for this llama.cpp build."') + runner_lines.append(' echo "[odysseus] Install ROCm for AMD GPUs or vLLM/CUDA tooling for NVIDIA, then re-launch this serve task."') + runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server') + runner_lines.append(' fi') + + +def _llama_cpp_rebuild_cmd() -> str: + """Shell command that clears the Cookbook-managed llama.cpp build. + + Removes the cached ``llama-server`` symlink and the ``~/llama.cpp/build`` + directory so the next llama.cpp serve recompiles from source, picking up a + CUDA or HIP toolchain if one is now available. The serve bootstrap only + builds when ``llama-server`` is missing from PATH, so without this an + existing CPU-only build is reused forever. It deliberately installs and + downloads nothing; the rebuild itself happens on the next serve. + """ + return ( + 'mkdir -p "$HOME/bin" && ' + 'rm -f "$HOME/bin/llama-server" && ' + 'rm -rf "$HOME/llama.cpp/build" && ' + 'echo "[odysseus] Cleared the cached llama.cpp build. ' + 'Re-launch the serve task to rebuild llama-server from source ' + '(CUDA or HIP will be used if a toolchain is now available)."' + ) class ModelDownloadRequest(BaseModel): diff --git a/routes/cookbook_routes.py b/routes/cookbook_routes.py index d794aee05..7f2157b1a 100644 --- a/routes/cookbook_routes.py +++ b/routes/cookbook_routes.py @@ -37,7 +37,8 @@ from routes.cookbook_helpers import ( _validate_local_dir, _validate_ssh_port, _validate_gpus, _shell_path, _ps_squote, _bash_squote, _validate_serve_cmd, _parse_serve_phase, _safe_env_prefix, _local_tooling_path_export, _append_serve_preflight_exit_lines, - _append_serve_exit_code_lines, _cached_model_scan_script, + _append_serve_exit_code_lines, _append_llama_cpp_linux_accel_build_lines, _cached_model_scan_script, + _ollama_bind_from_cmd, _pip_install_fallback_chain, _pip_install_no_cache, _venv_safe_local_pip_install_cmd, ModelDownloadRequest, ServeRequest, ) @@ -148,6 +149,15 @@ def setup_cookbook_routes() -> APIRouter: "No GPUs are visible to the serve process.", [{"label": "clear Cookbook GPU selection or choose available GPUs", "op": "settings", "field": "gpus", "value": ""}], ), + ( + r"Failed to infer device type|NVML Shared Library Not Found|No module named 'amdsmi'|platform is not available", + "vLLM could not find a supported GPU (CUDA or ROCm). " + "This machine may have integrated or unsupported graphics only.", + [ + {"label": "switch to llama.cpp (CPU/Metal, works without a discrete GPU)", "op": "manual"}, + {"label": "switch to Ollama (CPU/Metal, works without a discrete GPU)", "op": "manual"}, + ], + ), ( r"vllm.*command not found|No module named vllm|ERROR: vLLM is not installed", "vLLM is not installed or not in PATH on this server.", @@ -163,6 +173,11 @@ def setup_cookbook_routes() -> APIRouter: "llama.cpp / llama-cpp-python dependencies are missing.", [{"label": "install llama.cpp dependencies or llama-cpp-python[server]", "op": "dependency", "package": "llama-cpp-python[server]"}], ), + ( + r"No GGUF found on this host|no \.gguf file|No GGUF file found", + "No GGUF file found for this model on this host. The llama.cpp backend needs a .gguf file.", + [{"label": "download a GGUF build of this model (repo name usually ends in -GGUF, file like Q4_K_M.gguf)", "op": "manual"}], + ), ( r"No module named 'torch'|No module named torch|No module named 'diffusers'|No module named diffusers", "Diffusion serving requires PyTorch and diffusers.", @@ -368,11 +383,15 @@ def setup_cookbook_routes() -> APIRouter: encoding="utf-8", ) argv = [os.environ.get("ComSpec", "cmd.exe"), "/c", str(script_path)] + env = os.environ.copy() + env["PYTHONUTF8"] = "1" + env["PYTHONIOENCODING"] = "utf-8" proc = subprocess.Popen( argv, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, stdin=subprocess.DEVNULL, + env=env, **detached_popen_kwargs(), ) pid_path.write_text(str(proc.pid), encoding="utf-8") @@ -432,12 +451,12 @@ def setup_cookbook_routes() -> APIRouter: # throughput. Retries set disable_hf_transfer to fall back to the plain, # slower-but-reliable downloader (resumes cleanly from the .incomplete files). # Use `python3 -m pip` not `pip` — macOS has no bare `pip` command. - lines.append("command -v hf >/dev/null 2>&1 || python3 -m pip install --user --break-system-packages -q -U huggingface_hub 2>/dev/null || python3 -m pip install -q -U huggingface_hub 2>/dev/null") + lines.append(f"command -v hf >/dev/null 2>&1 || {_pip_install_fallback_chain('huggingface_hub', upgrade=True)}") if req.disable_hf_transfer: lines.append("export HF_HUB_ENABLE_HF_TRANSFER=0") lines.append("export HF_HUB_DOWNLOAD_MAX_WORKERS=4") else: - lines.append("python3 -c 'import hf_transfer' 2>/dev/null || python3 -m pip install --user --break-system-packages -q hf_transfer 2>/dev/null || python3 -m pip install -q hf_transfer 2>/dev/null") + lines.append(f"python3 -c 'import hf_transfer' 2>/dev/null || {_pip_install_fallback_chain('hf_transfer')}") lines.append("python3 -c 'import hf_transfer' 2>/dev/null && export HF_HUB_ENABLE_HF_TRANSFER=1") lines.append("export HF_HUB_DOWNLOAD_MAX_WORKERS=8") @@ -531,12 +550,18 @@ def setup_cookbook_routes() -> APIRouter: ) # Ensure pip-user scripts (e.g. hf CLI installed via --user) are on PATH runner_lines.append('export PATH="$HOME/.local/bin:$PATH"') - # Install hf CLI + hf_transfer best-effort so future runs get the fast path. + # Install hf CLI + optional hf_transfer best-effort. Retries disable + # hf_transfer because the Rust parallel path is fast but has been + # flaky near the end of very large multi-file downloads. # Use --break-system-packages on PEP-668 systems (Arch, newer Debian) so it doesn't bail. - runner_lines.append("command -v hf >/dev/null 2>&1 || pip install --user --break-system-packages -q -U huggingface_hub 2>/dev/null || pip install -q -U huggingface_hub 2>/dev/null") - runner_lines.append("python3 -c 'import hf_transfer' 2>/dev/null || pip install --user --break-system-packages -q hf_transfer 2>/dev/null || pip install -q hf_transfer 2>/dev/null") - runner_lines.append("python3 -c 'import hf_transfer' 2>/dev/null && export HF_HUB_ENABLE_HF_TRANSFER=1") - runner_lines.append("export HF_HUB_DOWNLOAD_MAX_WORKERS=8") + runner_lines.append(f"command -v hf >/dev/null 2>&1 || {_pip_install_fallback_chain('huggingface_hub', python_cmd='pip', upgrade=True)}") + if req.disable_hf_transfer: + runner_lines.append("export HF_HUB_ENABLE_HF_TRANSFER=0") + runner_lines.append("export HF_HUB_DOWNLOAD_MAX_WORKERS=4") + else: + runner_lines.append(f"python3 -c 'import hf_transfer' 2>/dev/null || {_pip_install_fallback_chain('hf_transfer', python_cmd='pip')}") + runner_lines.append("python3 -c 'import hf_transfer' 2>/dev/null && export HF_HUB_ENABLE_HF_TRANSFER=1") + runner_lines.append("export HF_HUB_DOWNLOAD_MAX_WORKERS=8") # Surface whether the HF token actually reached THIS server, so a gated # download's "not authorized" failure can be told apart from a missing # token (the token is masked — we only print applied / not-set). @@ -547,15 +572,19 @@ def setup_cookbook_routes() -> APIRouter: runner_lines.append(f' {hf_cmd} < /dev/null') runner_lines.append('elif python3 -c "import huggingface_hub" 2>/dev/null; then') runner_lines.append(' echo "hf CLI not found, using Python huggingface_hub..."') - runner_lines.append(f' python3 -c "import os; from huggingface_hub import snapshot_download; snapshot_download(\'{req.repo_id}\'{_dl_pyarg}, max_workers=8)"') + runner_lines.append(f' python3 -c "import os; from huggingface_hub import snapshot_download; snapshot_download(\'{req.repo_id}\'{_dl_pyarg}, max_workers={4 if req.disable_hf_transfer else 8})"') runner_lines.append('else') runner_lines.append(' echo "Installing huggingface-hub and dependencies..."') runner_lines.append(' pip install --no-deps -q huggingface-hub 2>/dev/null') - runner_lines.append(' pip install -q filelock fsspec packaging pyyaml tqdm typer httpx requests hf_transfer 2>/dev/null') - runner_lines.append(" python3 -c 'import hf_transfer' 2>/dev/null && export HF_HUB_ENABLE_HF_TRANSFER=1") - runner_lines.append(f' python3 -c "import os; from huggingface_hub import snapshot_download; snapshot_download(\'{req.repo_id}\'{_dl_pyarg}, max_workers=8)"') + if req.disable_hf_transfer: + runner_lines.append(' pip install -q filelock fsspec packaging pyyaml tqdm typer httpx requests 2>/dev/null') + runner_lines.append(' export HF_HUB_ENABLE_HF_TRANSFER=0') + else: + runner_lines.append(' pip install -q filelock fsspec packaging pyyaml tqdm typer httpx requests hf_transfer 2>/dev/null') + runner_lines.append(" python3 -c 'import hf_transfer' 2>/dev/null && export HF_HUB_ENABLE_HF_TRANSFER=1") + runner_lines.append(f' python3 -c "import os; from huggingface_hub import snapshot_download; snapshot_download(\'{req.repo_id}\'{_dl_pyarg}, max_workers={4 if req.disable_hf_transfer else 8})"') runner_lines.append('fi') - runner_lines.append('if [ $? -eq 0 ]; then echo ""; echo "DOWNLOAD_OK"; else echo ""; echo "DOWNLOAD_FAILED (exit $?)"; fi') + runner_lines.append('_ec=$?; if [ $_ec -eq 0 ]; then echo ""; echo "DOWNLOAD_OK"; else echo ""; echo "DOWNLOAD_FAILED (exit $_ec)"; fi') runner_lines.append(f"rm -f {remote_runner}") runner_lines.append('exec "${SHELL:-/bin/bash}"') runner_path = TMUX_LOG_DIR / f"{session_id}_run.sh" @@ -586,11 +615,11 @@ def setup_cookbook_routes() -> APIRouter: # Detached path: no controlling TTY, so skip `< /dev/null` # (handled by Popen stdin=DEVNULL) and don't keep a shell open. lines.append(hf_cmd) - lines.append('if [ $? -eq 0 ]; then echo ""; echo "DOWNLOAD_OK"; else echo ""; echo "DOWNLOAD_FAILED (exit $?)"; fi') + lines.append('_ec=$?; if [ $_ec -eq 0 ]; then echo ""; echo "DOWNLOAD_OK"; else echo ""; echo "DOWNLOAD_FAILED (exit $_ec)"; fi') else: # < /dev/null suppresses interactive "update available? [Y/n]" prompt lines.append(f"{hf_cmd} < /dev/null") - lines.append('if [ $? -eq 0 ]; then echo ""; echo "DOWNLOAD_OK"; else echo ""; echo "DOWNLOAD_FAILED (exit $?)"; fi') + lines.append('_ec=$?; if [ $_ec -eq 0 ]; then echo ""; echo "DOWNLOAD_OK"; else echo ""; echo "DOWNLOAD_FAILED (exit $_ec)"; fi') lines.append(f"rm -f '{wrapper_script}'") lines.append('exec "${SHELL:-/bin/bash}"') wrapper_script.write_text("\n".join(lines) + "\n", encoding="utf-8") @@ -672,11 +701,14 @@ def setup_cookbook_routes() -> APIRouter: cwd=str(Path.home()), ) else: - # LOCAL scan: run the interpreter directly. `python3` isn't a thing on - # Windows (it's `python`/`py`), and shell single-quoting of the path - # doesn't survive cmd.exe — so resolve the interpreter and exec it - # with the script path as an argv element (no shell quoting needed). - local_py = ( + # LOCAL scan: use sys.executable (the venv Python Odysseus is already + # running under) — it's guaranteed real Python on all platforms. + # Falling back to which_tool on Windows risks hitting the Microsoft + # Store stub alias for "python3"/"python", which prints + # "Python was not found; run without arguments to install from the + # Microsoft Store" and exits 9009, producing empty stdout and a + # JSON parse error. sys.executable bypasses PATH entirely. + local_py = sys.executable or ( which_tool("python3") or which_tool("python") or which_tool("py") or "python" ) @@ -714,6 +746,8 @@ def setup_cookbook_routes() -> APIRouter: entry["backend"] = m.get("backend") if m.get("is_ollama"): entry["is_ollama"] = True + if isinstance(m.get("gguf_files"), list): + entry["gguf_files"] = m["gguf_files"] models.append(entry) except Exception as e: logger.warning(f"Failed to parse cached models: {e}") @@ -775,6 +809,80 @@ def setup_cookbook_routes() -> APIRouter: finally: db.close() + def _auto_register_llm_endpoint(req: ServeRequest, remote: str | None) -> str | None: + """Register a freshly-served LLM as a model endpoint so it appears in the + model picker without a manual /setup step — the text-model sibling of + _auto_register_image_endpoint. + + Cookbook serve commands launch an OpenAI-compatible server (llama.cpp's + llama-server, vLLM, SGLang, or Ollama) on a known port. We point an + endpoint at that server's /v1; the picker auto-discovers the model id by + probing /v1/models and dims the endpoint until the server is reachable, + so registering immediately (before the server finishes loading) is safe. + """ + import re + from core.database import SessionLocal, ModelEndpoint + + # Port: an explicit --port wins. Otherwise fall back by backend — Ollama + # is the only server in our generated commands that omits --port. + port_match = re.search(r'--port\s+(\d+)', req.cmd) + if port_match: + port = int(port_match.group(1)) + elif "ollama" in req.cmd: + port = 11434 + else: + port = 8080 # llama.cpp's llama-server default — the Apple Silicon path + + # Determine host (mirrors the image path: SSH alias for remote serves). + if remote: + host = remote.split("@")[-1] if "@" in remote else remote + else: + host = "localhost" + + base_url = f"http://{host}:{port}/v1" + + short_name = req.repo_id.split("/")[-1] if "/" in req.repo_id else req.repo_id + display_name = short_name or "Local model" + + # If the serve command opts models into OpenAI tool-calling, record it so + # agent_loop trusts emitted tool_calls instead of the name heuristic. + supports_tools = True if "--enable-auto-tool-choice" in req.cmd else None + + db = SessionLocal() + try: + # Reuse an endpoint already pointed at this URL instead of duplicating. + existing = db.query(ModelEndpoint).filter(ModelEndpoint.base_url == base_url).first() + if existing: + existing.is_enabled = True + existing.model_type = "llm" + existing.name = display_name + if supports_tools is not None: + existing.supports_tools = supports_tools + db.commit() + logger.info(f"Updated existing local model endpoint: {base_url}") + return existing.id + + ep_id = f"local-{uuid.uuid4().hex[:8]}" + ep = ModelEndpoint( + id=ep_id, + name=display_name, + base_url=base_url, + api_key=None, + is_enabled=True, + model_type="llm", + supports_tools=supports_tools, + ) + db.add(ep) + db.commit() + logger.info(f"Auto-registered local model endpoint: {display_name} @ {base_url}") + return ep_id + except Exception as e: + logger.error(f"Failed to auto-register local model endpoint: {e}") + db.rollback() + return None + finally: + db.close() + @router.post("/api/model/serve") async def model_serve(request: Request, req: ServeRequest): """Launch a model server in a tmux session (or PowerShell background process on Windows). @@ -800,8 +908,17 @@ def setup_cookbook_routes() -> APIRouter: # many downstream `"engine" in req.cmd` membership checks can't hit # `TypeError: argument of type 'NoneType'` (a 500 instead of a clean 400). req.cmd = _validate_serve_cmd(req.cmd) or "" + req.cmd = _venv_safe_local_pip_install_cmd( + req.cmd, + local=not bool(req.remote_host), + in_venv=sys.prefix != sys.base_prefix, + ) is_pip_install = bool(req.cmd and "pip install" in req.cmd) if is_pip_install: + # Keep big dependency wheel builds (vLLM, …) off the home filesystem's + # pip cache so they don't fail mid-build with "No space left" (#1219) + # and leave the dep installed-but-unusable (#1459). + req.cmd = _pip_install_no_cache(req.cmd) # PEP-508-style package spec — letters, digits, `.-_` for the # name; `[` `]` for extras; `<>=!~,` for version specifiers. # v2 review HIGH-14: tightened from the previous regex which @@ -922,7 +1039,7 @@ def setup_cookbook_routes() -> APIRouter: runner_lines.append(' if ! python3 -c "import llama_cpp" 2>/dev/null; then') runner_lines.append(' pkg install -y cmake 2>/dev/null') runner_lines.append(' pip install numpy diskcache jinja2 2>/dev/null') - runner_lines.append(' CMAKE_ARGS="-DGGML_BLAS=OFF -DGGML_LLAMAFILE=OFF" pip install llama-cpp-python --no-build-isolation --no-cache-dir 2>&1 || true') + runner_lines.append(' CMAKE_ARGS="-DGGML_BLAS=OFF -DGGML_LLAMAFILE=OFF" pip install \'llama-cpp-python[server]\' --no-build-isolation --no-cache-dir 2>&1 || true') runner_lines.append(' fi') runner_lines.append('elif ! command -v llama-server &>/dev/null; then') runner_lines.append(' echo "Native llama-server not found — building from source (one-time, may take a few minutes)..."') @@ -944,61 +1061,45 @@ def setup_cookbook_routes() -> APIRouter: runner_lines.append(' && cmake --build build -j"$NPROC" --target llama-server \\') runner_lines.append(' && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server') runner_lines.append(' else') - # Detect pip-installed nvcc (from vLLM/nvidia CUDA wheels) and put - # it on PATH so cmake's CUDA configure can find it. We check the - # same three layouts as entrypoint.sh: - # nvidia/cu13 — nvidia-nvcc-cu13 - # nvidia/cu12 — nvidia-nvcc-cu12 - # nvidia/cuda_nvcc — nvidia-cuda-nvcc-cu12 (sub-package style) - runner_lines.append(' for _cudir in ~/.local/lib/python*/site-packages/nvidia/cu13 ~/.local/lib/python*/site-packages/nvidia/cu12 ~/.local/lib/python*/site-packages/nvidia/cuda_nvcc; do') - runner_lines.append(' [ -x "$_cudir/bin/nvcc" ] && export CUDA_HOME="$_cudir" && export PATH="$_cudir/bin:$PATH" && break') - runner_lines.append(' done') - # rm -rf build so a prior poisoned CMakeCache.txt (e.g. from a - # failed CUDA attempt) doesn't cause the next configure to reuse - # stale settings and silently produce a CPU-only binary. - runner_lines.append(' cd ~/llama.cpp && rm -rf build') - runner_lines.append(' if command -v nvcc &>/dev/null; then') - runner_lines.append(' echo "[odysseus] CUDA nvcc found — building llama-server with CUDA (GPU) support..."') - runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_CUDA=ON \\') - runner_lines.append(' && cmake --build build -j"$NPROC" --target llama-server \\') - runner_lines.append(' && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server') - runner_lines.append(' else') - runner_lines.append(' echo "[odysseus] WARNING: nvcc not found — building llama-server for CPU only."') - runner_lines.append(' echo "[odysseus] GPU inference will not be available for this llama.cpp build."') - runner_lines.append(' echo "[odysseus] To get a GPU build, first install vLLM via Cookbook -> Dependencies"') - runner_lines.append(' echo "[odysseus] (its CUDA wheels include nvcc), then re-launch this serve task."') - runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release \\') - runner_lines.append(' && cmake --build build -j"$NPROC" --target llama-server \\') - runner_lines.append(' && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server') - runner_lines.append(' fi') + _append_llama_cpp_linux_accel_build_lines(runner_lines) runner_lines.append(' fi') runner_lines.append(' # If the native build failed, fall back to the Python bindings.') runner_lines.append(' if ! command -v llama-server &>/dev/null && ! python3 -c "import llama_cpp" 2>/dev/null; then') runner_lines.append(' echo "llama-server build failed — installing Python bindings as fallback..."') - runner_lines.append(' pip install --user --break-system-packages -q llama-cpp-python 2>/dev/null || pip install -q llama-cpp-python 2>/dev/null || true') + runner_lines.append(f" {_pip_install_fallback_chain('llama-cpp-python[server]', python_cmd='pip')} || true") + runner_lines.append(' fi') + runner_lines.append(' if ! command -v llama-server &>/dev/null && ! python3 -c "import llama_cpp" 2>/dev/null; then') + runner_lines.append(' echo "ERROR: llama.cpp serving is not available after install/build attempts."') + runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127') runner_lines.append(' fi') runner_lines.append('fi') elif "ollama" in req.cmd: handled_ollama_serve = True - _ollama_port = "11434" - _ollama_match = re.search(r"OLLAMA_HOST=[^\s:]+:(\d+)", req.cmd) - if _ollama_match: - _ollama_port = _ollama_match.group(1) + _ollama_default_host = "0.0.0.0" if remote else "127.0.0.1" + _ollama_host, _ollama_port = _ollama_bind_from_cmd( + req.cmd, + default_host=_ollama_default_host, + ) # Ollama can be a host binary, a system service, or a Docker # container. If the HTTP API is already reachable, the model is # already served and we should not require a host `ollama` CLI. + runner_lines.append(f'ODYSSEUS_OLLAMA_HOST={_bash_squote(_ollama_host)}') runner_lines.append(f'ODYSSEUS_OLLAMA_PORT="{_ollama_port}"') runner_lines.append('ODYSSEUS_OLLAMA_URL=""') - runner_lines.append('for _ody_ollama_port in "$ODYSSEUS_OLLAMA_PORT" 11434; do') - runner_lines.append(' [ -z "$_ody_ollama_port" ] && continue') - runner_lines.append(' for _ody_ollama_host in 127.0.0.1 localhost host.docker.internal; do') - runner_lines.append(' _ody_ollama_url="http://${_ody_ollama_host}:${_ody_ollama_port}"') - runner_lines.append(' if curl -sf "$_ody_ollama_url/api/tags" >/dev/null 2>&1; then') - runner_lines.append(' ODYSSEUS_OLLAMA_URL="$_ody_ollama_url"') - runner_lines.append(' ODYSSEUS_OLLAMA_PORT="$_ody_ollama_port"') - runner_lines.append(' break 2') - runner_lines.append(' fi') + runner_lines.append('for _ody_ollama_try in $(seq 1 20); do') + runner_lines.append(' for _ody_ollama_port in "$ODYSSEUS_OLLAMA_PORT" 11434; do') + runner_lines.append(' [ -z "$_ody_ollama_port" ] && continue') + runner_lines.append(' for _ody_ollama_host in 127.0.0.1 localhost host.docker.internal; do') + runner_lines.append(' _ody_ollama_url="http://${_ody_ollama_host}:${_ody_ollama_port}"') + runner_lines.append(' if curl -sf "$_ody_ollama_url/api/tags" >/dev/null 2>&1; then') + runner_lines.append(' ODYSSEUS_OLLAMA_URL="$_ody_ollama_url"') + runner_lines.append(' ODYSSEUS_OLLAMA_PORT="$_ody_ollama_port"') + runner_lines.append(' break 3') + runner_lines.append(' fi') + runner_lines.append(' done') runner_lines.append(' done') + runner_lines.append(' [ "$_ody_ollama_try" -eq 1 ] && echo "[odysseus] Waiting for an existing Ollama API on ports ${ODYSSEUS_OLLAMA_PORT}/11434..."') + runner_lines.append(' sleep 1') runner_lines.append('done') runner_lines.append('if [ -n "$ODYSSEUS_OLLAMA_URL" ]; then') runner_lines.append(' if [ "$ODYSSEUS_OLLAMA_PORT" != "' + _ollama_port + '" ]; then') @@ -1015,8 +1116,12 @@ def setup_cookbook_routes() -> APIRouter: runner_lines.append(' echo "=== Process exited with code 127 ==="') runner_lines.append(' exec bash -i') runner_lines.append('fi') - runner_lines.append('echo "Starting ollama server on 0.0.0.0:${ODYSSEUS_OLLAMA_PORT}..."') - runner_lines.append('OLLAMA_HOST="0.0.0.0:${ODYSSEUS_OLLAMA_PORT}" ollama serve') + runner_lines.append('ODYSSEUS_OLLAMA_URL="http://${ODYSSEUS_OLLAMA_HOST}:${ODYSSEUS_OLLAMA_PORT}"') + if remote and _ollama_host in ("0.0.0.0", "::"): + runner_lines.append('echo "[odysseus] WARNING: remote Ollama will bind to ${ODYSSEUS_OLLAMA_HOST}:${ODYSSEUS_OLLAMA_PORT} so Odysseus can reach it from this host."') + runner_lines.append('echo "[odysseus] Ollama has no built-in authentication; expose this only on a trusted LAN/VPN or provide an explicit OLLAMA_HOST with your own access controls."') + runner_lines.append('echo "Starting ollama server on ${ODYSSEUS_OLLAMA_HOST}:${ODYSSEUS_OLLAMA_PORT}..."') + runner_lines.append('OLLAMA_HOST="${ODYSSEUS_OLLAMA_HOST}:${ODYSSEUS_OLLAMA_PORT}" ollama serve') runner_lines.append('_ody_exit=$?') runner_lines.append('echo') runner_lines.append('echo "=== Process exited with code ${_ody_exit} ==="') @@ -1032,19 +1137,24 @@ def setup_cookbook_routes() -> APIRouter: # find the `vllm` CLI ("command not found"). Mirrors llama.cpp above. runner_lines.append('export PATH="$HOME/.local/bin:$PATH"') runner_lines.append('if ! command -v vllm &>/dev/null; then') - runner_lines.append(' echo "ERROR: vLLM is not installed. Open Cookbook -> Dependencies and install vllm on this server, then launch again."') + runner_lines.append(' echo "ERROR: vLLM is not installed."') runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127') runner_lines.append('fi') elif "sglang.launch_server" in req.cmd: runner_lines.append('export PATH="$HOME/.local/bin:$PATH"') - runner_lines.append('if ! python3 -c "import sglang" 2>/dev/null; then') - runner_lines.append(' echo "ERROR: SGLang is not installed. Open Cookbook -> Dependencies and install sglang on this server, then launch again."') + runner_lines.append('if ! command -v sglang &>/dev/null; then') + runner_lines.append(' echo "ERROR: SGLang is not installed."') + runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127') + runner_lines.append('elif ! ODYSSEUS_SGLANG_IMPORT_ERROR="$(python3 -c "import sglang" 2>&1)"; then') + runner_lines.append(' echo "ERROR: SGLang is installed but failed to import."') + runner_lines.append(' printf "%s\\n" "$ODYSSEUS_SGLANG_IMPORT_ERROR"') runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127') runner_lines.append('fi') elif "scripts/diffusion_server.py" in req.cmd or ".diffusion_server.py" in req.cmd: runner_lines.append('export PATH="$HOME/.local/bin:$PATH"') - runner_lines.append('if ! python3 -c "import torch, diffusers" 2>/dev/null; then') - runner_lines.append(' echo "ERROR: Diffusion serving requires PyTorch + diffusers. Open Cookbook -> Dependencies and install diffusers on this server, then launch again."') + runner_lines.append('if ! ODYSSEUS_DIFFUSION_IMPORT_ERROR="$(python3 -c "import torch, diffusers" 2>&1)"; then') + runner_lines.append(' echo "ERROR: Diffusion serving requires PyTorch + diffusers."') + runner_lines.append(' printf "%s\\n" "$ODYSSEUS_DIFFUSION_IMPORT_ERROR"') runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127') runner_lines.append('fi') @@ -1116,11 +1226,16 @@ def setup_cookbook_routes() -> APIRouter: stderr = (await proc.stderr.read()).decode(errors="replace") return {"ok": False, "error": stderr, "session_id": session_id} - # Auto-register as model endpoint if serving a diffusion model + # Auto-register a model endpoint so the served model shows up in the model + # picker with no manual /setup step. Diffusion models get an image + # endpoint; any other real model serve (i.e. not a pip-install task) gets + # a local LLM endpoint pointed at its /v1. endpoint_id = None is_diffusion = "diffusion_server.py" in req.cmd if is_diffusion: endpoint_id = _auto_register_image_endpoint(req, remote) + elif not is_pip_install: + endpoint_id = _auto_register_llm_endpoint(req, remote) # Log to assistant try: @@ -1357,9 +1472,16 @@ def setup_cookbook_routes() -> APIRouter: total_mb = max(0, int(total_bytes / (1024 * 1024))) used_mb = max(0, min(total_mb, int(used_bytes / (1024 * 1024)))) free_mb = max(0, total_mb - used_mb) + # GTT = the system-RAM pool the GPU pages into when VRAM is full. + # On a discrete card a large gtt_used means the model spilled past + # VRAM into RAM over PCIe — much slower. Surface it so the UI can + # warn "spilling to RAM" instead of the user wondering why it's slow. + gtt_used_raw = await _gpu_read_file(f"{base}/mem_info_gtt_used", host, ssh_port) + gtt_used_mb = max(0, int(int(gtt_used_raw) / (1024 * 1024))) if (gtt_used_raw and gtt_used_raw.isdigit()) else 0 gpus.append({ "index": len(gpus), "name": name, "uuid": entry, "free_mb": free_mb, "total_mb": total_mb, "used_mb": used_mb, + "gtt_used_mb": gtt_used_mb, "util_pct": 0, "busy": bool(total_mb and (free_mb / total_mb) < 0.85), "processes": [], "backend": "rocm", "source": "amd-sysfs", "unified_memory": unified, @@ -1461,6 +1583,46 @@ def setup_cookbook_routes() -> APIRouter: if gpus: return {"ok": True, "gpus": gpus, "backend": "cuda", "source": "nvidia-smi"} + # Local Apple Silicon / Metal fallback. macOS has no nvidia-smi and no + # Linux /sys/class/drm tree, but services.hwfit.hardware already knows + # how to size the shared unified-memory GPU budget. Keep this route in + # sync so Cookbook's GPU picker doesn't show "nvidia-smi not found" on + # native Mac launches. + if not host and sys.platform == "darwin": + try: + from services.hwfit.hardware import detect_system + info = detect_system(fresh=True) + backend = str(info.get("backend") or "").lower() + if backend in {"metal", "mps", "apple"} and info.get("gpu_count", 0) > 0: + total_mb = int(float(info.get("gpu_vram_gb") or info.get("total_ram_gb") or 0) * 1024) + free_mb = int(float(info.get("available_ram_gb") or 0) * 1024) + if total_mb and (free_mb <= 0 or free_mb > total_mb): + free_mb = total_mb + used_mb = max(0, total_mb - max(0, free_mb)) + return { + "ok": True, + "gpus": [{ + "index": 0, + "name": info.get("gpu_name") or info.get("cpu_name") or "Apple Silicon GPU", + "uuid": "apple-metal-0", + "free_mb": max(0, free_mb), + "total_mb": max(0, total_mb), + "used_mb": used_mb, + "util_pct": 0, + "busy": bool(total_mb and (free_mb / total_mb) < 0.5), + "processes": [], + "backend": "metal", + "source": "apple-metal", + "unified_memory": True, + }], + "backend": "metal", + "source": "apple-metal", + "fallback_from": "nvidia-smi", + "nvidia_error": nvidia_error, + } + except Exception as e: + logger.warning("Apple Metal GPU fallback failed: %s", e) + amd_gpus = await _probe_amd_sysfs(host, ssh_port) if amd_gpus: return { @@ -1607,6 +1769,33 @@ def setup_cookbook_routes() -> APIRouter: disk_tasks = on_disk.get("tasks") or [] if isinstance(on_disk, dict) else [] incoming_tasks = data.get("tasks") if isinstance(data.get("tasks"), list) else [] + # Anti-poisoning guard: a stale browser tab can keep POSTing a + # download task as status='done' from before the strict-finish + # fix landed, undoing any server-side correction. For each + # incoming "done" download, override to "running" if the last + # shard pattern says N _completed: + logger.info(f"cookbook state POST: rejecting stale done for {_it.get('sessionId')} " + f"({_completed}/{_starts} files complete, no DOWNLOAD_OK)") + _it["status"] = "running" incoming_ids = {t.get("sessionId") for t in incoming_tasks if isinstance(t, dict) and t.get("sessionId")} import time as _t now_ms = int(_t.time() * 1000) @@ -1763,6 +1952,43 @@ def setup_cookbook_routes() -> APIRouter: def _cookbook_tasks_status_sync(): import subprocess + def _download_cache_complete(repo_id: str, remote_host: str = "", ssh_port: str = "") -> bool: + """Best-effort check for a completed HF cache entry. + + tmux output can stop at a stale progress line if the pane/session + disappears before Cookbook captures the final DOWNLOAD_OK marker. + In that case, trust the cache shape: a snapshot directory with files + and no *.incomplete blobs means HuggingFace finished materializing the + model. + """ + if not repo_id or "/" not in repo_id: + return False + py = ( + "import os,sys;" + "repo=sys.argv[1];" + "base=os.environ.get('HUGGINGFACE_HUB_CACHE') or os.path.join(os.environ.get('HF_HOME', os.path.expanduser('~/.cache/huggingface')), 'hub');" + "d=os.path.join(base,'models--'+repo.replace('/','--'));" + "snap=os.path.join(d,'snapshots');" + "ok=os.path.isdir(snap) and any(os.path.isdir(os.path.join(snap,x)) and os.listdir(os.path.join(snap,x)) for x in os.listdir(snap));" + "inc=False;" + "blobs=os.path.join(d,'blobs');" + "inc=os.path.isdir(blobs) and any(x.endswith('.incomplete') for x in os.listdir(blobs));" + "sys.exit(0 if ok and not inc else 1)" + ) + cmd = ["python3", "-c", py, repo_id] + try: + if remote_host: + ssh_base = ["ssh"] + if ssh_port and ssh_port != "22": + ssh_base.extend(["-p", str(ssh_port)]) + shell_cmd = " ".join(shlex.quote(x) for x in cmd) + proc = subprocess.run(ssh_base + [remote_host, shell_cmd], timeout=12, capture_output=True) + else: + proc = subprocess.run(cmd, timeout=12, capture_output=True) + return proc.returncode == 0 + except Exception: + return False + # Load saved tasks from cookbook state tasks = [] if _cookbook_state_path.exists(): @@ -1902,14 +2128,21 @@ def setup_cookbook_routes() -> APIRouter: # persists after the process exits, so a finished download still has a # snapshot to classify (DOWNLOAD_OK / exit marker) — evaluate it even # when the PID is gone instead of blindly reporting "stopped". + download_zero_files = False status = "unknown" if is_alive or (local_win_task and full_snapshot): lower = full_snapshot.lower() - has_exit = "=== process exited with code" in lower + exit_match = re.search(r"=== process exited with code\s+(-?\d+)", full_snapshot, re.I) + has_exit = exit_match is not None + exit_code = int(exit_match.group(1)) if exit_match else None has_error = "error" in lower or "failed" in lower or "traceback" in lower if has_exit and task_type == "serve": # Serve tasks that exit are always errors — they should run indefinitely status = "error" + elif has_exit and task_type == "download": + # Dependency installs are tracked as download tasks but only + # emit the generic runner exit marker, not HF download markers. + status = "completed" if exit_code == 0 else "error" elif has_exit and "unrecognized arguments" in lower: status = "error" elif has_error and not ("application startup complete" in lower): @@ -1918,7 +2151,11 @@ def setup_cookbook_routes() -> APIRouter: # Only download tasks treat 100% as "completed". # Serve tasks log 100%|██████| during inference progress # (diffusion sampling, etc.) — that's "running", not done. - status = "completed" + if re.search(r"Fetching\s+0\s+files", full_snapshot, re.IGNORECASE): + status = "error" + download_zero_files = True + else: + status = "completed" elif "application startup complete" in lower: status = "ready" elif not is_alive: @@ -1928,7 +2165,14 @@ def setup_cookbook_routes() -> APIRouter: status = "running" else: # Session is dead — check if it completed or crashed - status = "stopped" + if task_type == "download" and _download_cache_complete(_payload.get("repo_id") or model, remote, str(_tport or "")): + status = "completed" + if not progress_text: + progress_text = "Download complete" + if not full_snapshot: + full_snapshot = "DOWNLOAD_OK" + else: + status = "stopped" # Parse structured phase info — single source of truth for the UI phase_info = _parse_serve_phase(full_snapshot, task_type) if (task_type == "serve" and status == "running" and full_snapshot) else {} @@ -1938,6 +2182,8 @@ def setup_cookbook_routes() -> APIRouter: diagnosis = _diagnose_serve_output(full_snapshot) if task_type == "serve" and full_snapshot else None if diagnosis and status in {"running", "unknown", "stopped"}: status = "error" + if download_zero_files: + diagnosis = {"message": "No matching files were downloaded. The model repo or filename/quant pattern may be wrong (for example a ':Q4_K_M' tag that does not exist in the repo). Check the repo and the include/quant pattern."} output_tail = "\n".join(full_snapshot.splitlines()[-12:]) if full_snapshot else "" results.append({ diff --git a/routes/document_helpers.py b/routes/document_helpers.py index ebfb1772c..57acc50e7 100644 --- a/routes/document_helpers.py +++ b/routes/document_helpers.py @@ -152,7 +152,7 @@ def _resolve_user_upload_path( owner=owner, auth_manager=auth_manager, ) - if not resolved: + if not isinstance(resolved, dict) or not resolved: return None path = resolved.get("path") upload_dir = getattr(upload_handler, "upload_dir", None) @@ -203,6 +203,8 @@ def _assert_pdf_marker_upload_owned( def _derive_title(content: str) -> str: """Derive a title from document content.""" import re + if not isinstance(content, str): + return "Untitled" text = content.strip() if not text: return "Untitled" diff --git a/routes/document_routes.py b/routes/document_routes.py index 7d65ed31d..5625df88c 100644 --- a/routes/document_routes.py +++ b/routes/document_routes.py @@ -15,6 +15,21 @@ from src.auth_helpers import get_current_user logger = logging.getLogger(__name__) +def _aggregate_language_facets(lang_rows): + """Sum document counts per display language for the library facet. + + NULL-language and explicit "text" rows share the "text" bucket (the + language filter treats them as one), so they must be ADDED. The old dict + comprehension keyed both to "text", silently overwriting one group and + undercounting the facet versus what the filter actually returns. + """ + out = {} + for lang, cnt in lang_rows: + key = lang or "text" + out[key] = out.get(key, 0) + cnt + return out + + from routes.document_helpers import ( DocumentCreate, DocumentUpdate, DocumentPatch, @@ -145,7 +160,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter: create_form_markdown_document, create_plain_pdf_document, ) - from src.document_processor import _process_pdf + from src.document_processor import _process_pdf, strip_pdf_content_marker import os from src.auth_helpers import require_privilege @@ -184,7 +199,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter: title = os.path.splitext(meta.get("original_name") or meta.get("name") or upload_id)[0] try: - body_text = _process_pdf(pdf_path).lstrip("\n[PDF content]:").strip() + body_text = strip_pdf_content_marker(_process_pdf(pdf_path)) except Exception: body_text = None @@ -258,7 +273,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter: ) lang_q = _owner_session_filter(lang_q, user) lang_rows = lang_q.group_by(Document.language).all() - languages = {lang or "text": cnt for lang, cnt in lang_rows} + languages = _aggregate_language_facets(lang_rows) # Session count (owner-filtered) sc_q = ( @@ -402,7 +417,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter: text extraction was wired, plus for scanned/image-only PDFs where the VL model picks up text the basic pypdf path missed.""" import re - from src.document_processor import _process_pdf + from src.document_processor import _process_pdf, strip_pdf_content_marker from src.pdf_form_doc import find_source_upload_id user = get_current_user(request) @@ -423,7 +438,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter: raise HTTPException(404, "Source PDF could not be located") try: - body_text = _process_pdf(pdf_path).lstrip("\n[PDF content]:").strip() + body_text = strip_pdf_content_marker(_process_pdf(pdf_path)) except Exception as e: logger.error(f"extract_pdf_text failed for {pdf_path}: {e}") raise HTTPException(500, f"Extraction failed: {e}") @@ -593,6 +608,15 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter: if req.session_id is not None: # Empty string = unlink from session doc.session_id = req.session_id if req.session_id else None + if not req.session_id: + # Tab closed / doc detached from its session — drop the + # in-memory active-doc pointer so the last-resort injection + # path doesn't re-surface this doc in a later chat (#1160). + try: + from src.tool_implementations import clear_active_document + clear_active_document(doc_id) + except Exception: + pass db.commit() db.refresh(doc) return _doc_to_dict(doc) @@ -615,6 +639,13 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter: raise HTTPException(404, "Document not found") _verify_doc_owner(db, doc, user) doc.is_active = False + # Closed/deleted — drop the in-memory active-doc pointer so it isn't + # re-injected into a later, unrelated chat (#1160). + try: + from src.tool_implementations import clear_active_document + clear_active_document(doc_id) + except Exception: + pass db.commit() return {"status": "deleted", "id": doc_id} except HTTPException: @@ -885,7 +916,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter: for i, doc in enumerate(batch): if i >= len(verdicts): break - verdict = verdicts[i].lower().strip() + verdict = str(verdicts[i] or "").lower().strip() if verdict == "junk": doc.tidy_verdict = "junk" db.delete(doc) diff --git a/routes/editor_draft_routes.py b/routes/editor_draft_routes.py index 3c284392b..02641a577 100644 --- a/routes/editor_draft_routes.py +++ b/routes/editor_draft_routes.py @@ -67,6 +67,14 @@ def _summary(d: EditorDraft) -> Dict[str, Any]: } +def _load_payload(raw: Optional[str]) -> Dict[str, Any]: + try: + payload = json.loads(raw) if raw else {} + except Exception: + return {} + return payload if isinstance(payload, dict) else {} + + def setup_editor_draft_routes() -> APIRouter: router = APIRouter(tags=["editor-drafts"]) @@ -93,13 +101,9 @@ def setup_editor_draft_routes() -> APIRouter: ).first() if not d or not _owns(d, user): raise HTTPException(404, "Draft not found") - try: - payload = json.loads(d.payload) if d.payload else {} - except Exception: - payload = {} return { **_summary(d), - "payload": payload, + "payload": _load_payload(d.payload), } finally: db.close() diff --git a/routes/email_helpers.py b/routes/email_helpers.py index c14fd8c1d..409c6c4b7 100644 --- a/routes/email_helpers.py +++ b/routes/email_helpers.py @@ -15,7 +15,6 @@ and `email_pollers.py` (the background loops): import os import imaplib import smtplib -import ssl import email as email_mod import email.header import email.utils @@ -33,47 +32,43 @@ from fastapi import Query, HTTPException, Request from pydantic import BaseModel from typing import Optional, List -from src.auth_helpers import get_current_user +from src.auth_helpers import _auth_disabled, get_current_user from src.secret_storage import decrypt as _decrypt logger = logging.getLogger(__name__) -def _send_smtp_message(cfg: dict, from_addr: str, recipients: list[str], message: str | bytes, timeout: int = 30) -> None: - """Send through SMTP using the conventional TLS mode for the configured port. +def _smtp_security_mode(cfg: dict) -> str: + raw = str(cfg.get("smtp_security") or "").strip().lower() + if raw in {"ssl", "starttls", "none"}: + return raw + port = int(cfg.get("smtp_port") or 465) + if port == 587: + return "starttls" + return "ssl" - Account settings only store host/port today. Port 465 is implicit TLS - (SMTP_SSL); port 587 is plain SMTP upgraded with STARTTLS. Using SSL - directly against 587 raises the classic "[SSL: WRONG_VERSION_NUMBER]" - error even when credentials are correct. - """ + +def _send_smtp_message(cfg: dict, from_addr: str, recipients: list[str], message: str | bytes, timeout: int = 30) -> None: + """Send through SMTP using the configured transport security mode.""" host = cfg["smtp_host"] port = int(cfg.get("smtp_port") or 465) user = cfg.get("smtp_user") or "" password = cfg.get("smtp_password") or "" - def _send_starttls(starttls_port: int = 587) -> None: - with smtplib.SMTP(host, starttls_port, timeout=timeout) as smtp: - smtp.starttls() - if user and password: - smtp.login(user, password) - smtp.sendmail(from_addr, recipients, message) + security = _smtp_security_mode(cfg) - if port == 587: - _send_starttls(587) - return - - try: + if security == "ssl": with smtplib.SMTP_SSL(host, port, timeout=timeout) as smtp: if user and password: smtp.login(user, password) smtp.sendmail(from_addr, recipients, message) return - except (TimeoutError, ssl.SSLError) as e: - if port == 465: - logger.warning("SMTP implicit TLS on %s:465 failed (%s); retrying STARTTLS on 587", host, e) - _send_starttls(587) - return - raise + + with smtplib.SMTP(host, port, timeout=timeout) as smtp: + if security == "starttls": + smtp.starttls() + if user and password: + smtp.login(user, password) + smtp.sendmail(from_addr, recipients, message) def _strip_think(text: str) -> str: @@ -152,6 +147,8 @@ def _require_auth(request: Request) -> str: u = get_current_user(request) if u: return u + if _auth_disabled(): + return "" auth_mgr = getattr(request.app.state, "auth_manager", None) if auth_mgr is not None and getattr(auth_mgr, "is_configured", False): raise HTTPException(401, "Not authenticated") @@ -300,7 +297,8 @@ def _init_scheduled_db(): send_at TEXT NOT NULL, created_at TEXT NOT NULL, status TEXT NOT NULL DEFAULT 'pending', - error TEXT + error TEXT, + owner TEXT DEFAULT '' ) """) # Email summary cache (keyed by Message-ID) @@ -438,6 +436,35 @@ def _init_scheduled_db(): conn.execute("ALTER TABLE scheduled_emails ADD COLUMN account_id TEXT") if "odysseus_kind" not in cols: conn.execute("ALTER TABLE scheduled_emails ADD COLUMN odysseus_kind TEXT") + if "owner" not in cols: + conn.execute("ALTER TABLE scheduled_emails ADD COLUMN owner TEXT DEFAULT ''") + conn.execute("CREATE INDEX IF NOT EXISTS ix_scheduled_emails_owner_status ON scheduled_emails(owner, status)") + # Backfill owner on legacy rows from the owning email account so the + # owner-scoped list/cancel routes surface pre-migration scheduled + # sends to the right user (the poller already resolves these by + # account at send time; this aligns the UI with that). + legacy_accounts = conn.execute( + "SELECT DISTINCT account_id FROM scheduled_emails " + "WHERE (owner IS NULL OR owner = '') AND account_id IS NOT NULL AND account_id != ''" + ).fetchall() + if legacy_accounts: + try: + from core.database import SessionLocal as _SL, EmailAccount as _EA + _db = _SL() + try: + for (acct_id,) in legacy_accounts: + row = _db.query(_EA.owner).filter(_EA.id == acct_id).first() + acct_owner = (row[0] or "") if row else "" + if acct_owner: + conn.execute( + "UPDATE scheduled_emails SET owner = ? " + "WHERE account_id = ? AND (owner IS NULL OR owner = '')", + (acct_owner, acct_id), + ) + finally: + _db.close() + except Exception: + pass except Exception: pass # Lazy migration: add turns_json to email_boundaries for server-side @@ -541,6 +568,7 @@ def _get_email_config(account_id: str | None = None, owner: str = "") -> dict: "account_name": row.name, "smtp_host": row.smtp_host or "", "smtp_port": int(row.smtp_port or 465), + "smtp_security": _smtp_security_mode({"smtp_security": getattr(row, "smtp_security", ""), "smtp_port": row.smtp_port}), "smtp_user": row.smtp_user or "", "smtp_password": _decrypt(row.smtp_password or ""), "imap_host": row.imap_host or "", @@ -567,6 +595,10 @@ def _get_email_config(account_id: str | None = None, owner: str = "") -> dict: "account_name": "legacy", "smtp_host": settings.get("smtp_host", os.environ.get("SMTP_HOST", "")), "smtp_port": int(settings.get("smtp_port", os.environ.get("SMTP_PORT", "465")) or 465), + "smtp_security": _smtp_security_mode({ + "smtp_security": settings.get("smtp_security", os.environ.get("SMTP_SECURITY", "")), + "smtp_port": settings.get("smtp_port", os.environ.get("SMTP_PORT", "465")), + }), "smtp_user": settings.get("smtp_user", os.environ.get("SMTP_USER", "")), "smtp_password": settings.get("smtp_password", os.environ.get("SMTP_PASSWORD", "")), "imap_host": settings.get("imap_host", os.environ.get("IMAP_HOST", "")), @@ -606,7 +638,32 @@ def _list_email_accounts() -> list[dict]: # ── IMAP helpers ── -_IMAP_TIMEOUT_SECONDS = 15 +def _coerce_imap_timeout_seconds(raw: str | None) -> int: + try: + value = int(raw or "30") + except (TypeError, ValueError): + value = 30 + return max(5, min(value, 300)) + + +_IMAP_TIMEOUT_SECONDS = _coerce_imap_timeout_seconds(os.environ.get("ODYSSEUS_IMAP_TIMEOUT_SECONDS")) + + +def _open_imap_connection(host: str, port: int, *, starttls: bool, timeout: int = _IMAP_TIMEOUT_SECONDS): + """Open an IMAP connection using the configured security mode.""" + port = int(port or 993) + if starttls: + conn = imaplib.IMAP4(host, port, timeout=timeout) + conn.starttls() + elif port == 993: + conn = imaplib.IMAP4_SSL(host, port, timeout=timeout) + else: + conn = imaplib.IMAP4(host, port, timeout=timeout) + try: + conn.sock.settimeout(timeout) + except Exception: + pass + return conn def _imap_connect(account_id: str | None = None, owner: str = ""): # SECURITY: passing `owner` scopes the fallback config lookup so a brand @@ -620,17 +677,12 @@ def _imap_connect(account_id: str | None = None, owner: str = ""): # The last branch is critical: previously this fell into IMAP4_SSL # for any non-STARTTLS port, which would fail the TLS handshake on # plain local servers (Dovecot on 31143, etc.). - if cfg.get("imap_starttls"): - conn = imaplib.IMAP4(cfg["imap_host"], cfg["imap_port"], timeout=_IMAP_TIMEOUT_SECONDS) - conn.starttls() - elif int(cfg.get("imap_port") or 993) == 993: - conn = imaplib.IMAP4_SSL(cfg["imap_host"], cfg["imap_port"], timeout=_IMAP_TIMEOUT_SECONDS) - else: - conn = imaplib.IMAP4(cfg["imap_host"], cfg["imap_port"], timeout=_IMAP_TIMEOUT_SECONDS) - try: - conn.sock.settimeout(_IMAP_TIMEOUT_SECONDS) - except Exception: - pass + conn = _open_imap_connection( + cfg["imap_host"], + cfg["imap_port"], + starttls=bool(cfg.get("imap_starttls")), + timeout=_IMAP_TIMEOUT_SECONDS, + ) conn.login(cfg["imap_user"], cfg["imap_password"]) return conn @@ -699,7 +751,13 @@ def _decode_header(raw): decoded = [] for data, charset in parts: if isinstance(data, bytes): - decoded.append(data.decode(charset or "utf-8", errors="replace")) + try: + decoded.append(data.decode(charset or "utf-8", errors="replace")) + except (LookupError, ValueError): + # Unknown/invalid MIME charset (e.g. a malformed or spam header + # like =?x-unknown-charset?B?...?=). errors="replace" only covers + # byte-decode errors, not codec lookup, so fall back to utf-8. + decoded.append(data.decode("utf-8", errors="replace")) else: decoded.append(data) return " ".join(decoded) @@ -793,22 +851,27 @@ def _detect_spam_folder(conn): return None -def _imap_move(uid, dest, src="INBOX"): +def _imap_move(uid, dest, src="INBOX", account_id: str | None = None, owner: str = ""): """Move a single IMAP UID from src folder to dest. Returns True on success.""" + c = None try: - c = _imap_connect() + c = _imap_connect(account_id, owner=owner) c.select(_q(src)) status, _ = c.copy(uid, _q(dest)) if status != "OK": - c.logout() return False c.store(uid, "+FLAGS", "\\Deleted") c.expunge() - c.logout() return True except Exception as e: logger.warning(f"IMAP move {uid} → {dest} failed: {e}") return False + finally: + if c: + try: + c.logout() + except Exception: + pass def _extract_attachment_text(msg, max_chars: int = 6000) -> str: @@ -999,7 +1062,9 @@ def _fetch_sender_thread_context(sender_addr: str, exclude_folder: str = "INBOX", limit: int = 3, max_chars_per_email: int = 1500, - max_attachment_chars: int = 4000) -> str: + max_attachment_chars: int = 4000, + account_id: str | None = None, + owner: str = "") -> str: """Pull the last N emails from `sender_addr` (across common folders), extract their body snippets + attachment text, and return one formatted block ready to be glued into an LLM system prompt as "REFERENCED MATERIAL". @@ -1021,7 +1086,7 @@ def _fetch_sender_thread_context(sender_addr: str, seen_uids.add((exclude_folder or "INBOX", str(exclude_uid))) try: - conn = _imap_connect() + conn = _imap_connect(account_id, owner=owner) except Exception as e: logger.warning(f"sender-thread-context: imap connect failed: {e}") return "" @@ -1104,7 +1169,12 @@ def _fetch_sender_thread_context(sender_addr: str, return "\n\n=====\n\n".join(blocks) -def _pre_retrieve_context(body: str, sender: str) -> tuple: +def _pre_retrieve_context( + body: str, + sender: str, + account_id: str | None = None, + owner: str = "", +) -> tuple: """Extract key terms from an incoming email and search past emails + contacts. Returns (context_snippets, terms_list). Best-effort; never raises. @@ -1128,18 +1198,37 @@ def _pre_retrieve_context(body: str, sender: str) -> tuple: # ── Known-sender check: only retrieve context for senders we already # have a relationship with. New / cold senders get an empty context. sender_addr = email.utils.parseaddr(sender or "")[1].lower() - is_known = False + # The CardDAV address book is global admin data backed by a single + # Radicale instance, so only fold it into reply context for an admin / + # single-user owner. Non-admin owners still get their own (owner-scoped) + # IMAP history below, just not the shared contacts. try: - from routes.contacts_routes import _fetch_contacts - for c in _fetch_contacts() or []: - if (c.get("email") or "").lower() == sender_addr: - is_known = True - break + from src.tool_security import owner_is_admin_or_single_user + contacts_allowed = owner_is_admin_or_single_user(owner or None) except Exception: - pass + contacts_allowed = not bool(owner) + is_known = False + if contacts_allowed: + try: + from routes.contacts_routes import _fetch_contacts + for c in _fetch_contacts() or []: + # Contacts are normalized to plural `emails` lists, but + # keep the legacy singular key fallback for older data. + contact_emails = [] + raw_emails = c.get("emails") + if isinstance(raw_emails, list): + contact_emails.extend(str(e or "") for e in raw_emails) + legacy_email = c.get("email") + if legacy_email: + contact_emails.append(str(legacy_email)) + if any((addr or "").strip().lower() == sender_addr for addr in contact_emails): + is_known = True + break + except Exception: + pass if not is_known and sender_addr: try: - with _imap() as _ck: + with _imap(account_id, owner=owner) as _ck: _ck.select("INBOX", readonly=True) st_known, dk = _ck.search(None, f'(FROM "{sender_addr}")') if st_known == "OK" and dk and dk[0]: @@ -1177,7 +1266,7 @@ def _pre_retrieve_context(body: str, sender: str) -> tuple: return context_snippets, terms_list try: - ctx_conn = _imap_connect() + ctx_conn = _imap_connect(account_id, owner=owner) for folder in ["INBOX", "Sent", "Archive", "Drafts"]: try: st_sel, _sd = ctx_conn.select(_q(folder), readonly=True) @@ -1221,18 +1310,18 @@ def _pre_retrieve_context(body: str, sender: str) -> tuple: try: from routes.contacts_routes import _fetch_contacts - all_contacts = _fetch_contacts() + all_contacts = _fetch_contacts() if contacts_allowed else [] for term in terms_list: t_lower = term.lower() matches = [c for c in all_contacts if t_lower in (c.get("name") or "").lower() - or t_lower in (c.get("email") or "").lower()] + or any(t_lower in (e or "").lower() for e in (c.get("emails") or []))] for c in matches[:2]: parts = [f"Name: {c.get('name','')}"] - if c.get("email"): - parts.append(f"Email: {c['email']}") - if c.get("phone"): - parts.append(f"Phone: {c['phone']}") + if c.get("emails"): + parts.append(f"Email: {', '.join(c['emails'])}") + if c.get("phones"): + parts.append(f"Phone: {', '.join(c['phones'])}") context_snippets.append(f"[Contact match for \"{term}\"] " + ", ".join(parts)) except Exception: pass diff --git a/routes/email_pollers.py b/routes/email_pollers.py index ec8b1e18c..529ba00c1 100644 --- a/routes/email_pollers.py +++ b/routes/email_pollers.py @@ -45,6 +45,21 @@ from routes.email_helpers import ( logger = logging.getLogger(__name__) +def _owner_for_email_account(account_id: str | None) -> str: + if not account_id: + return "" + try: + from core.database import SessionLocal as _SL, EmailAccount as _EA + db = _SL() + try: + row = db.query(_EA.owner).filter(_EA.id == account_id).first() + return (row[0] or "") if row else "" + finally: + db.close() + except Exception: + return "" + + # ── Routes ── async def _emit_progress(progress_cb, message: str): @@ -84,6 +99,36 @@ async def _run_auto_summarize_once(do_summary: bool = True, do_reply: bool = Tru _save_settings(s2) +def _latest_inbox_fallback_uids(conn, reconnect): + """Latest INBOX UIDs via ``SEARCH ALL``, with a poisoned-socket guard (#1613). + + On a large Gmail mailbox the fallback ``SEARCH ALL`` can time out mid-reply, + leaving its enormous ``* SEARCH `` line unread on the socket. The next + command (the downstream re-select / EXAMINE) then reads those leftover bytes + and fails with ``EXAMINE => unexpected response: b'325188 …'``. Reconnecting + on failure guarantees the downstream command starts from a clean socket. + + Returns ``(uids, conn)`` — ``conn`` is the live connection to keep using: the + same one on success, a fresh one (via ``reconnect()``) if we had to recover. + """ + try: + conn.select("INBOX", readonly=True) + status, data = conn.uid("SEARCH", None, "ALL") + uids = [] + if status == "OK" and data and data[0]: + for u in reversed(data[0].split()[-8:]): + uids.append(("INBOX", u)) + logger.info("Email task SINCE scan found no messages; fell back to latest INBOX messages") + return uids, conn + except Exception as _e: + logger.warning(f"Latest-INBOX fallback scan failed: {_e}") + try: + conn.logout() + except Exception: + pass + return [], reconnect() + + async def _auto_summarize_pass(days_back: int = 1, account_id: str | None = None, progress_cb=None) -> str: """Single pass of the auto-summarize/reply scan. @@ -132,7 +177,7 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None import sqlite3 as _sql3 import requests as _req from src.endpoint_resolver import resolve_endpoint - from src.llm_core import _uses_max_completion_tokens + from src.llm_core import _uses_max_completion_tokens, _restricts_temperature settings = _load_settings() auto_sum = settings.get("email_auto_summarize", False) @@ -143,25 +188,18 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None if not auto_sum and not auto_reply and not auto_tag and not auto_spam and not auto_cal: return "Nothing to do" - # Owner of the account being processed. All calendar reads/writes below are - # scoped to this user: the multi-account fan-out runs every user's mailbox, - # so an unscoped pass would disclose and mutate other tenants' calendars. - _acct_owner = None - try: - from core.database import SessionLocal as _SLo, EmailAccount as _EAo - _dbo = _SLo() - try: - if account_id: - _arow = _dbo.query(_EAo).filter(_EAo.id == account_id).first() - _acct_owner = _arow.owner if _arow else None - finally: - _dbo.close() - except Exception: - _acct_owner = None + # Owner of the account being processed. All calendar + mailbox reads/writes + # below are scoped to this user: the multi-account fan-out runs every user's + # mailbox, so an unscoped pass would disclose/mutate other tenants' data. + # One resolution feeds both the mailbox path (account_owner) and upstream's + # calendar path (_acct_owner, which expects None rather than ""). + account_owner = _owner_for_email_account(account_id) + _acct_owner = account_owner or None + conn = None try: await _emit_progress(progress_cb, "Connecting to mail…") - conn = _imap_connect(account_id) + conn = _imap_connect(account_id, owner=account_owner) from datetime import timedelta as _td since = (datetime.utcnow() - _td(days=max(1, days_back))).strftime("%d-%b-%Y") # uid_list carries real IMAP UIDs, matching the email UI/read routes. @@ -193,26 +231,27 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None # the latest visible inbox messages so Clear cache -> Run again can # actually repopulate AI reply/summary/tag caches. if not uid_list: - try: - conn.select("INBOX", readonly=True) - status, data = conn.uid("SEARCH", None, "ALL") - if status == "OK" and data and data[0]: - for u in reversed(data[0].split()[-8:]): - uid_list.append(("INBOX", u)) - logger.info("Email task SINCE scan found no messages; fell back to latest INBOX messages") - except Exception as _e: - logger.warning(f"Latest-INBOX fallback scan failed: {_e}") - # Re-select INBOX as default for downstream code + _fb_uids, conn = _latest_inbox_fallback_uids( + conn, lambda: _imap_connect(account_id, owner=account_owner) + ) + uid_list.extend(_fb_uids) + # Re-select INBOX as default for downstream code (on a clean socket even + # if the SEARCH ALL fallback above failed — see #1613). conn.select("INBOX", readonly=True) if not uid_list: - conn.logout() return "No recent emails" await _emit_progress(progress_cb, f"Found {len(uid_list)} recent email(s); checking cache…") _c = _sql3.connect(SCHEDULED_DB) _sum_existing = {r[0] for r in _c.execute("SELECT message_id FROM email_summaries").fetchall()} _reply_existing = {r[0] for r in _c.execute("SELECT message_id FROM email_ai_replies").fetchall()} - _tag_existing = {r[0] for r in _c.execute("SELECT message_id FROM email_tags").fetchall()} if (auto_tag or auto_spam) else set() + if auto_tag or auto_spam: + if account_owner: + _tag_existing = {r[0] for r in _c.execute("SELECT message_id FROM email_tags WHERE owner=?", (account_owner,)).fetchall()} + else: + _tag_existing = {r[0] for r in _c.execute("SELECT message_id FROM email_tags WHERE owner='' OR owner IS NULL").fetchall()} + else: + _tag_existing = set() _cal_existing = {r[0] for r in _c.execute("SELECT message_id FROM email_calendar_extractions").fetchall()} if auto_cal else set() # Urgency is handled by the built-in `check_email_urgency` task. Keep # this legacy poller path disabled so users don't get two independent @@ -225,7 +264,7 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None # this per-iteration was making big inbox scans crawl. Used by the # urgency self-loop check below. try: - _self_self_addr = (_get_email_config(account_id).get("from_address") or "").strip().lower() + _self_self_addr = (_get_email_config(account_id, owner=account_owner).get("from_address") or "").strip().lower() except Exception: _self_self_addr = "" @@ -233,11 +272,10 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None if auto_spam and not spam_folder: logger.warning("Auto-spam enabled but no Junk/Spam folder detected — will classify but not move") - url, model, headers = resolve_endpoint("utility") + url, model, headers = resolve_endpoint("utility", owner=account_owner) if not url: - url, model, headers = resolve_endpoint("default") + url, model, headers = resolve_endpoint("default", owner=account_owner) if not url or not model: - conn.logout() return "No model configured" writing_style = settings.get("email_writing_style", "") @@ -355,6 +393,9 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None "temperature": 0.3, "stream": False, } + # Reasoning models (o1/o3/o4/gpt-5) reject an explicit temperature. + if _restricts_temperature(model): + payload.pop("temperature", None) try: # Use to_thread so this sync HTTP call doesn't freeze # the entire event loop while the LLM thinks (240s). @@ -392,8 +433,8 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None await _emit_progress(progress_cb, f"Drafting reply {processed + 1}/{_max_process} · checked {examined}/{len(uid_list)}") # Background reply drafting should not make the whole app # feel busy. Keep it lightweight: no extra IMAP context - # mining here; manual AI Reply can still do that when the - # user explicitly asks for a draft on one email. + # mining here; manual AI Reply can still do that (owner-scoped) + # when the user explicitly asks for a draft on one email. context_snippets, _terms = [], [] sys_prompt = _EMAIL_REPLY_SYS_PROMPT_BASE if att_text: @@ -708,7 +749,7 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None # Send alert email immediately if critical or high if urgency in ("critical", "high"): try: - cfg = _get_email_config(account_id) + cfg = _get_email_config(account_id, owner=account_owner) to_addr = cfg["from_address"] # self-email # Deep-link to open the original email in Odysseus (if public URL is configured). @@ -716,8 +757,8 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None from src.settings import load_settings as _ls _pub = (_ls().get("app_public_url") or "").rstrip("/") uid_str = uid.decode() if isinstance(uid, bytes) else str(uid) - from urllib.parse import quote as _q - open_url = f"{_pub}/#email={_q(_folder, safe='')}:{uid_str}" if _pub else "" + from urllib.parse import quote as _url_q + open_url = f"{_pub}/#email={_url_q(_folder, safe='')}:{uid_str}" if _pub else "" alert_subject = f"[{urgency.upper()}] {subject}" alert_body = ( @@ -806,12 +847,15 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None "temperature": 0.1, "stream": False, } + # Reasoning models (o1/o3/o4/gpt-5) reject an explicit temperature. + if _restricts_temperature(model): + payload.pop("temperature", None) # to_thread keeps the event loop responsive during the LLM call resp = await asyncio.to_thread( _req.post, url, json=payload, headers=req_headers, timeout=120 ) if not resp.ok: - logger.warning(f"Auto-classify {uid.decode()} HTTP {resp.status_code}: {resp.text[:200]}") + logger.warning(f"Auto-classify {uid.decode() if isinstance(uid, bytes) else str(uid)} HTTP {resp.status_code}: {resp.text[:200]}") else: rdata = resp.json() m = (rdata.get("choices") or [{}])[0].get("message", {}) @@ -840,17 +884,17 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None moved_to = "" if is_spam and auto_spam and spam_folder: - if _imap_move(uid, spam_folder): + if _imap_move(uid, spam_folder, account_id=account_id, owner=account_owner): moved_to = spam_folder logger.info(f"Auto-spam moved uid={uid.decode()} to {spam_folder}: {spam_reason}") _c = _sql3.connect(SCHEDULED_DB) _c.execute(""" INSERT OR REPLACE INTO email_tags - (message_id, uid, folder, subject, sender, tags, spam_verdict, + (message_id, owner, uid, folder, subject, sender, tags, spam_verdict, spam_reason, moved_to, model_used, created_at) - VALUES (?, ?, 'INBOX', ?, ?, ?, ?, ?, ?, ?, ?) - """, (message_id, uid.decode(), subject, sender, + VALUES (?, ?, ?, 'INBOX', ?, ?, ?, ?, ?, ?, ?, ?) + """, (message_id, account_owner or "", uid.decode(), subject, sender, json.dumps(tags), 1 if is_spam else 0, spam_reason, moved_to, model, datetime.utcnow().isoformat())) _c.commit() @@ -865,7 +909,6 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None logger.warning(f"Auto-process {uid} failed: {e}") continue - conn.logout() await _emit_progress(progress_cb, "Finishing…") if processed > 0: logger.info(f"Auto-processed {processed} new email(s) for summary/reply/classify") @@ -902,6 +945,12 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None except Exception as e: logger.warning(f"Auto-summarize pass error: {e}") return f"Error: {e}" + finally: + if conn: + try: + conn.logout() + except Exception: + pass async def _auto_summarize_poller(): @@ -930,8 +979,9 @@ def _scheduled_poll_once() -> dict: conn = sqlite3.connect(SCHEDULED_DB) cols = [row[1] for row in conn.execute("PRAGMA table_info(scheduled_emails)").fetchall()] kind_expr = "odysseus_kind" if "odysseus_kind" in cols else "'scheduled' AS odysseus_kind" + owner_expr = "owner" if "owner" in cols else "'' AS owner" rows = conn.execute(f""" - SELECT id, to_addr, cc, bcc, subject, body, in_reply_to, references_hdr, attachments, account_id, {kind_expr} + SELECT id, to_addr, cc, bcc, subject, body, in_reply_to, references_hdr, attachments, account_id, {kind_expr}, {owner_expr} FROM scheduled_emails WHERE status = 'pending' AND send_at <= ? """, (now_iso,)).fetchall() @@ -943,7 +993,8 @@ def _scheduled_poll_once() -> dict: attachments = json.loads(r[8] or "[]") row_account_id = r[9] if len(r) > 9 else None odysseus_kind = r[10] if len(r) > 10 else "scheduled" - cfg = _get_email_config(row_account_id) + row_owner = (r[11] if len(r) > 11 else "") or _owner_for_email_account(row_account_id) + cfg = _get_email_config(row_account_id, owner=row_owner) has_atts = bool(attachments) if has_atts: outer = MIMEMultipart("mixed") @@ -980,7 +1031,7 @@ def _scheduled_poll_once() -> dict: # Append to local Sent folder try: - with _imap() as imap: + with _imap(row_account_id, owner=row_owner) as imap: sent_folder = _detect_sent_folder(imap) imap.append(sent_folder, "\\Seen", None, outer.as_bytes()) except Exception as e: diff --git a/routes/email_routes.py b/routes/email_routes.py index 8b82aa571..e611a2978 100644 --- a/routes/email_routes.py +++ b/routes/email_routes.py @@ -17,7 +17,6 @@ import sqlite3 as _sql3 import email as email_mod import email.header import email.utils -import imaplib import smtplib import json import re @@ -40,7 +39,8 @@ from routes.email_helpers import ( _strip_think, _extract_reply, _apply_email_style_mechanics, require_owner, require_user, _assert_owns_account, _q, _attach_compose_uploads, _cleanup_compose_uploads, _load_settings, _save_settings, _get_email_config, - _send_smtp_message, + _send_smtp_message, _smtp_security_mode, + _IMAP_TIMEOUT_SECONDS, _open_imap_connection, _imap_connect, _imap, _decode_header, _detect_sent_folder, _detect_drafts_folder, _extract_attachment_text, _list_attachments_from_msg, _extract_attachment_to_disk, _extract_html, _extract_text, @@ -90,6 +90,16 @@ def _email_tag_owner_aliases(account_id: str | None, owner: str = "") -> list[st return out or [""] +def _email_tag_owner_clause(account_id: str | None, owner: str = "") -> tuple[str, list[str]]: + aliases = _email_tag_owner_aliases(account_id, owner) + placeholders = ",".join("?" * len(aliases)) + # In configured multi-user mode, do not treat legacy owner='' rows as + # visible to everyone. Single-user/unconfigured mode keeps legacy rows. + if owner: + return f"owner IN ({placeholders})", aliases + return f"(owner IN ({placeholders}) OR owner IS NULL)", aliases + + def _record_email_received_events(owner: str, account_id: str | None, folder: str, emails: list[dict]): """Baseline inbox messages, then fire `email_received` for new arrivals.""" if not owner or (folder or "INBOX").upper() != "INBOX" or not emails: @@ -312,6 +322,20 @@ def _apply_odysseus_headers(msg, kind: str | None = None, ref_id: str | None = N msg["X-Odysseus-Ref"] = re.sub(r"[^A-Za-z0-9_.:-]", "-", ref_id)[:128] +def _envelope_recipients(*fields: str) -> list: + """Extract bare SMTP envelope addresses from one or more To/Cc/Bcc header + strings. A naive `field.split(",")` corrupts display names that contain a + comma (e.g. `"Smith, John" `, the canonical Outlook form): + it splits into `"Smith` and `John" `, breaking delivery. + email.utils.getaddresses parses the address grammar correctly.""" + out = [] + for _name, addr in email.utils.getaddresses([f for f in fields if f]): + addr = (addr or "").strip() + if addr: + out.append(addr) + return out + + def _md_to_email_html(text: str) -> str: """Render the compose markdown body to a SAFE HTML fragment for the email's text/html part. Everything is HTML-escaped FIRST (so a pasted