diff --git a/.dockerignore b/.dockerignore index ed30dd73b..aed7e9368 100644 --- a/.dockerignore +++ b/.dockerignore @@ -9,6 +9,7 @@ __pycache__/ dist/ build/ .env +.env.bak.* /data/ /logs/ .git/ diff --git a/.env.example b/.env.example index 5add859c9..5382c23c7 100644 --- a/.env.example +++ b/.env.example @@ -16,6 +16,10 @@ LLM_HOST=localhost # when started with OLLAMA_HOST=0.0.0.0:11434. # OLLAMA_BASE_URL=http://host.docker.internal:11434/v1 +# Optional LM Studio URL. In Docker, host LM Studio is reachable here +# when LM Studio is set to serve on all interfaces (0.0.0.0). +# LM_STUDIO_URL=http://host.docker.internal:1234 + # OpenAI API key (only needed if using OpenAI models). # Do not commit real keys. Keep this commented until needed. # OPENAI_API_KEY=your_openai_api_key_here @@ -23,6 +27,16 @@ LLM_HOST=localhost # Research service LLM endpoint # RESEARCH_LLM_ENDPOINT=http://localhost:8000/v1/chat/completions +# Extra CA bundle for LLM providers whose TLS chain isn't in the default +# trust store. Layered ON TOP of the system / certifi bundle — verification +# stays on for every host, the trust set just gets larger. Useful for: +# - GigaChat / Sber (Russian Trusted Root CA): without this the endpoint +# shows offline with CERTIFICATE_VERIFY_FAILED — self-signed certificate +# in certificate chain. +# - On-premise / corporate LLM gateways with an internal CA. +# Point at a PEM file containing the missing root(s). +# LLM_CA_BUNDLE=/etc/odysseus/ca/extra-roots.pem + # ============================================================ # Search & Web # ============================================================ @@ -42,6 +56,13 @@ SEARXNG_INSTANCE=http://localhost:8080 # SQLite database path (default: sqlite:///./data/app.db) # DATABASE_URL=sqlite:///./data/app.db +# ============================================================ +# Data directory +# ============================================================ +# Move everything that lives under data/ - settings, sessions, database, auth, +# cache, uploads, etc. - to another path: +# ODYSSEUS_DATA_DIR=C:\path\to\dir + # ============================================================ # Auth & Security # ============================================================ @@ -49,7 +70,9 @@ SEARXNG_INSTANCE=http://localhost:8080 # Enable authentication (default: true) # AUTH_ENABLED=true -# Host port for the Odysseus web UI in Docker Compose. +# Host bind address and port for the Odysseus web UI in Docker Compose. +# Keep APP_BIND on loopback unless you intentionally want LAN/reverse-proxy access. +# APP_BIND=127.0.0.1 # Change this if another local service already uses 7000 (macOS AirPlay often does). # APP_PORT=7000 @@ -57,6 +80,10 @@ SEARXNG_INSTANCE=http://localhost:8080 # Keep false for Docker, LAN, reverse proxy, and any shared deployment. # LOCALHOST_BYPASS=false +# Mark session cookies Secure. Set true when Odysseus is served through HTTPS +# by a trusted reverse proxy or private access gateway. +# SECURE_COOKIES=true + # Optional: pre-seed the first admin password during setup. # Do not commit a real password. # ODYSSEUS_ADMIN_PASSWORD=change_me_before_first_boot @@ -92,6 +119,9 @@ SEARXNG_INSTANCE=http://localhost:8080 # Default: http://{LLM_HOST}:11434/v1/embeddings (ollama) # EMBEDDING_URL=http://localhost:11434/v1/embeddings +# Embedding API key (if there's one) +# EMBEDDING_API_KEY=embedding_api_key_here + # Embedding model name (must be available at the endpoint above) # EMBEDDING_MODEL=all-minilm:l6-v2 @@ -124,6 +154,21 @@ SEARXNG_INSTANCE=http://localhost:8080 # if you intentionally want scheduled scripts to run remotely. # ODYSSEUS_SCRIPT_HOST=localhost +# Chat / agent attachment size cap in bytes (default: 10 MB). +# Raise this for local installs that need larger PDFs or text documents. +# Example: 52428800 = 50 MB. +# ODYSSEUS_CHAT_UPLOAD_MAX_BYTES=10485760 + +# Other per-feature upload size caps in bytes. All are validated and optional; +# defaults shown. An invalid value (non-integer or < 1) fails fast at startup. +# ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES=104857600 # gallery image upload (100 MB) +# ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES=26214400 # gallery transform input (25 MB) +# ODYSSEUS_MEMORY_IMPORT_MAX_BYTES=10485760 # memory import file (10 MB) +# ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES=26214400 # personal document upload (25 MB) +# ODYSSEUS_EMAIL_COMPOSE_UPLOAD_MAX_BYTES=26214400 # email compose attachment (25 MB) +# ODYSSEUS_STT_MAX_AUDIO_BYTES=26214400 # speech-to-text audio (25 MB) +# ODYSSEUS_ICS_MAX_BYTES=10485760 # calendar .ics import (10 MB) + # ============================================================ # GPU support (Docker Compose) # ============================================================ @@ -135,9 +180,12 @@ SEARXNG_INSTANCE=http://localhost:8080 # NVIDIA (requires nvidia-container-toolkit + `nvidia-ctk runtime # configure --runtime=docker` on the host): # COMPOSE_FILE=docker-compose.yml:docker/gpu.nvidia.yml +# COMPOSE_FILE=docker-compose.yml;docker/gpu.nvidia.yml #(Windows) # -# AMD ROCm (requires ROCm drivers on the host): +# AMD ROCm (requires ROCm drivers on the host and the GID of the render group): # COMPOSE_FILE=docker-compose.yml:docker/gpu.amd.yml +# Find the render GID with: getent group render | cut -d: -f3 +# RENDER_GID=989 # # These overlays only expose the GPU devices. The slim Odysseus image # still needs CUDA/ROCm userspace via Cookbook -> Dependencies (vLLM, diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 000000000..64f2d7dcf --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,114 @@ +name: Bug Report +description: Report a reproducible bug in Odysseus. +labels: ["bug"] + +body: + - type: markdown + attributes: + value: | + **Before submitting:** search [open issues](https://github.com/pewdiepie-archdaemon/odysseus/issues) + and [discussions](https://github.com/pewdiepie-archdaemon/odysseus/discussions) first. + Duplicate reports slow things down. + + For security vulnerabilities, **do not open a public issue** — + use [GitHub Security Advisories](https://github.com/pewdiepie-archdaemon/odysseus/security/advisories/new) + and read [SECURITY.md](https://github.com/pewdiepie-archdaemon/odysseus/blob/main/SECURITY.md) first. + + - type: checkboxes + id: prerequisites + attributes: + label: Prerequisites + options: + - label: I searched [open issues](https://github.com/pewdiepie-archdaemon/odysseus/issues?q=is%3Aissue+is%3Aopen) and [discussions](https://github.com/pewdiepie-archdaemon/odysseus/discussions) and did not find an existing report of this bug. + required: true + - label: This is **not** a security vulnerability. (Vulnerabilities go to [GitHub Security Advisories](https://github.com/pewdiepie-archdaemon/odysseus/security/advisories/new) — see [SECURITY.md](https://github.com/pewdiepie-archdaemon/odysseus/blob/main/SECURITY.md).) + required: true + - label: I am running the latest code from the `dev` branch (the default branch you get on clone, where fixes land first) and the bug still reproduces there. Please `git pull` the latest `dev` before filing. + required: true + + - type: dropdown + id: install-method + attributes: + label: Install Method + options: + - "-- Please Select --" + - Docker (docker compose up) + - Manual Python install (pip / venv) + - Windows native (launch-windows.ps1) + - macOS app (build-macos-app.sh / start-macos.sh) + - Other (describe in the reproduction steps below) + validations: + required: true + + - type: dropdown + id: os + attributes: + label: Operating System + options: + - "-- Please Select --" + - Linux + - macOS + - Windows + - Other + validations: + required: true + + - type: textarea + id: steps + attributes: + label: Steps to Reproduce + description: Exact steps that reliably trigger the bug. The more specific, the faster this gets fixed. + placeholder: | + 1. Go to ... + 2. Click / type ... + 3. Observe ... + validations: + required: true + + - type: textarea + id: expected + attributes: + label: Expected Behaviour + description: What should have happened? + validations: + required: true + + - type: textarea + id: actual + attributes: + label: Actual Behaviour + description: What actually happened? Include the full error message if there is one. + validations: + required: true + + - type: textarea + id: logs + attributes: + label: Logs / Screenshots + description: Paste relevant terminal output or attach screenshots. Remove API keys, passwords, and personal data before pasting. + render: text + + - type: input + id: model-backend + attributes: + label: Model / Backend (if relevant) + description: "e.g. Ollama + llama3.2:latest, vLLM + mistral-7b, OpenAI API, Anthropic API" + placeholder: "Ollama + llama3.2:latest" + + - type: dropdown + id: willing_to_fix + attributes: + label: Are you willing to submit a fix? + options: + - "-- Please Select --" + - "Yes — I can open a PR" + - "Partially — I can help but need guidance" + - "No — I am only filing the report" + validations: + required: true + + - type: textarea + id: additional-info + attributes: + label: Additional Information + description: Anything else that might help — browser console errors, related issues, things you already tried, or environment quirks. diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 000000000..da163954f --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,13 @@ +blank_issues_enabled: false +contact_links: + - name: Question / Need Help + url: https://github.com/pewdiepie-archdaemon/odysseus/discussions/categories/q-a + about: Ask how-to questions, setup help, and model configuration questions here. Issues are for confirmed bugs and concrete proposals only. + + - name: Idea or Suggestion + url: https://github.com/pewdiepie-archdaemon/odysseus/discussions/categories/ideas + about: Discuss ideas and gauge interest before opening a formal feature request. If there is already a discussion, link it in your feature request. + + - name: Security Vulnerability + url: https://github.com/pewdiepie-archdaemon/odysseus/security/advisories/new + about: Report vulnerabilities privately via GitHub Security Advisories — never as a public issue. Read SECURITY.md before reporting. diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 000000000..2444177ff --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,92 @@ +name: Feature Request +description: Propose a new feature or a concrete improvement to Odysseus. +labels: ["enhancement"] + +body: + - type: markdown + attributes: + value: | + **Before submitting:** search [open issues](https://github.com/pewdiepie-archdaemon/odysseus/issues) + and [discussions](https://github.com/pewdiepie-archdaemon/odysseus/discussions) first. + Feature requests that duplicate [ROADMAP.md](https://github.com/pewdiepie-archdaemon/odysseus/blob/main/ROADMAP.md) + or an existing open issue will be closed as duplicates. + + If your idea needs community input before it becomes a concrete proposal, + start a [discussion](https://github.com/pewdiepie-archdaemon/odysseus/discussions/categories/ideas) instead. + + - type: checkboxes + id: prerequisites + attributes: + label: Prerequisites + options: + - label: I searched [open issues](https://github.com/pewdiepie-archdaemon/odysseus/issues?q=is%3Aissue+is%3Aopen) and this has not already been proposed. + required: true + - label: I searched [discussions](https://github.com/pewdiepie-archdaemon/odysseus/discussions) and this is not already being debated there. + required: true + - label: This is a concrete, actionable proposal — not a vague "it would be nice if..." request. + required: true + + - type: dropdown + id: area + attributes: + label: Area + description: Which part of the application does this affect? + options: + - "-- Please Select --" + - Chat / Agent + - Email + - Calendar + - Documents / RAG + - Memory + - Cookbook / Local Models / GPU + - Search + - Notes / Editor + - Auth / Security + - Docker / Deployment + - UI / Frontend + - API / Backend + - MCP + - Testing / CI + - Other + validations: + required: true + + - type: textarea + id: problem + attributes: + label: Problem or Motivation + description: What problem does this solve, or what use case does it enable? Be specific — "it would be better" is not enough. + validations: + required: true + + - type: textarea + id: solution + attributes: + label: Proposed Solution + description: Describe the behaviour or change you want to see. Include API shape, UI sketch, or code snippets if that helps make it concrete. + validations: + required: true + + - type: textarea + id: alternatives + attributes: + label: Alternatives Considered + description: What other approaches did you consider and why did you rule them out? If there is an existing workaround, describe it. + + - type: textarea + id: prior-art + attributes: + label: Prior Art / Related Issues + description: Link any related issues, discussions, or external references that informed this proposal. + + - type: dropdown + id: willing_to_implement + attributes: + label: Are you willing to implement this? + options: + - "-- Please Select --" + - "Yes — I can open a PR" + - "Partially — I can help but need guidance" + - "No — I am only filing the request" + validations: + required: true diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 000000000..911b4b9b2 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,57 @@ +## Summary + + + +## Target branch + +- [ ] This PR targets **`dev`**, not `main`. All PRs land in `dev`; `main` is curated by the maintainer at each release. If your PR is on `main` by accident, click "Edit" on this PR and change the base. + +## Linked Issue + + + +Fixes # + +## Type of Change + +- [ ] Bug fix (non-breaking — fixes a confirmed issue) +- [ ] New feature (non-breaking — adds new behaviour) +- [ ] Breaking change (changes or removes existing behaviour) +- [ ] Refactor / cleanup (behaviour unchanged) +- [ ] Documentation only +- [ ] CI / tooling / configuration + +## Checklist + +- [ ] I searched [open issues](https://github.com/pewdiepie-archdaemon/odysseus/issues) and [open PRs](https://github.com/pewdiepie-archdaemon/odysseus/pulls) — this is not a duplicate. +- [ ] This PR targets `dev` +- [ ] My changes are limited to the scope described above — no unrelated refactors or whitespace changes mixed in. +- [ ] I actually ran the app (`docker compose up` or `uvicorn app:app`) and verified the change works end-to-end. Type-checks and unit tests are not enough. + +## How to Test + + + +1. +2. +3. + +## Visual / UI changes — REQUIRED if you touched anything that renders + +**Anything that changes what the UI looks like — buttons, icons, padding, colors, fonts, spacing, layout, CSS, HTML, SVG, or any `static/js/` module that draws to the DOM — needs all of the following. PRs that change rendering without these WILL be closed.** + +- [ ] **Screenshot or short clip** of the change in the running app, attached below. Mobile screenshot too if the change affects mobile. +- [ ] **Style match**: the change uses Odysseus's existing visual language. Specifically: + - Reuse existing CSS variables (`--red`, `--fg`, `--bg`, `--card`, `--border`, etc.) — do not introduce new color values, font sizes, or spacing units. + - Reuse existing button/input/card/border classes. Don't invent parallel styling. + - **No Unicode emoji in UI or code.** Use inline SVG (matching the monochrome icon style already in `static/index.html`) or plain text. + - Monospaced font (`Fira Code`) for primary UI text. Don't override. + - Dark theme is the default; any light-mode work must be wired through the existing theme system, not hard-coded. +- [ ] **No new component patterns.** If a similar widget already exists in the app, extend it instead of writing a parallel one. +- [ ] **I am not an LLM agent submitting a bulk PR.** If you are, please open an issue describing the problem first — bulk auto-generated PRs that don't match the project's visual style are closed on sight, even when the underlying fix is correct. + +### Screenshots / clips + + diff --git a/.github/scripts/check-issue-description.js b/.github/scripts/check-issue-description.js new file mode 100644 index 000000000..a76ca29ab --- /dev/null +++ b/.github/scripts/check-issue-description.js @@ -0,0 +1,196 @@ +// @ts-check +'use strict'; + +/** @param {{ github: import('@octokit/rest').Octokit, context: import('@actions/github').context, core: import('@actions/core') }} */ +module.exports = async ({ github, context, core }) => { + const issue = context.payload.issue; + const body = (issue.body || '').trim(); + const labels = issue.labels.map(l => l.name); + const owner = context.repo.owner; + const repo = context.repo.repo; + + const isBug = labels.includes('bug'); + const isFeature = labels.includes('enhancement'); + + // Extract a Section's text, stripping HTML comments. Matches any heading + // depth (#, ##, ###, …) so a manually-written body isn't penalised for + // using a different number of hashes than the issue form generates. + function section(heading) { + const re = new RegExp(`#+\\s+${heading}\\s*([\\s\\S]*?)(?=\\n#+\\s+|$)`, 'i'); + const m = body.match(re); + return m ? m[1].replace(//g, '').trim() : ''; + } + + const failures = []; + + // ── Common: body must exist ─────────────────────────────────────────────── + if (body.length < 50) { + failures.push( + '**Description** — body is empty or too short. ' + + 'Please open the issue using one of the provided templates.', + ); + } + + // An issue is one or the other — never both. Resolve to a single type so the + // validation can't run two conflicting blocks at once. + const type = isBug && isFeature ? 'conflict' : isBug ? 'bug' : isFeature ? 'feature' : 'untyped'; + + switch (type) { + case 'conflict': + failures.push('**Labels** — an issue cannot be both `bug` and `enhancement`. Remove one label.'); + break; + + case 'bug': { + if (!section('Install Method')) { + failures.push('**Install Method** — select how you installed Odysseus'); + } + + if (!section('Operating System')) { + failures.push('**Operating System** — select your OS'); + } + + const stepsText = section('Steps to Reproduce'); + if (!stepsText || !/\d+\.|[-*]/.test(stepsText)) { + failures.push('**Steps to Reproduce** — must include at least one numbered or bulleted step'); + } + + if (section('Expected Behaviour').length < 10) { + failures.push('**Expected Behaviour** — section is empty or too short'); + } + + if (section('Actual Behaviour').length < 10) { + failures.push('**Actual Behaviour** — section is empty or too short'); + } + break; + } + + case 'feature': + if (!section('Area')) { + failures.push('**Area** — select which part of the application this affects'); + } + + if (section('Problem or Motivation').length < 20) { + failures.push( + '**Problem or Motivation** — section is empty or too short ' + + '(explain the concrete problem this solves)', + ); + } + + if (section('Proposed Solution').length < 20) { + failures.push( + '**Proposed Solution** — section is empty or too short ' + + '(describe the change you want to see)', + ); + } + + if (!section('Are you willing to implement this\\?')) { + failures.push('**Are you willing to implement this?** — select an option'); + } + break; + + // 'untyped' → only the common body-length check applies. + } + + // ── Unfilled dropdowns ──────────────────────────────────────────────────── + // #2068 added a "-- Please Select --" default to every template dropdown, so + // a contributor who never opens the dropdown submits with that literal string + // as the section value. The per-section checks above only verify presence, so + // a placeholder value passes. Scan every section and flag the ones still + // showing the placeholder, as a single comma-separated line item. + const PLACEHOLDER = '-- Please Select --'; + const headingRe = /^#+\s+(.+?)\s*$/gm; + const headings = []; + let headingMatch; + while ((headingMatch = headingRe.exec(body)) !== null) { + headings.push({ + name: headingMatch[1].trim(), + headStart: headingMatch.index, + contentStart: headingMatch.index + headingMatch[0].length, + }); + } + const unfilled = []; + for (let i = 0; i < headings.length; i++) { + const end = i + 1 < headings.length ? headings[i + 1].headStart : body.length; + if (body.slice(headings[i].contentStart, end).includes(PLACEHOLDER)) { + unfilled.push(headings[i].name); + } + } + if (unfilled.length > 0) { + failures.push( + `**Unfilled dropdowns** — please choose a value; these sections still show ` + + `the \`${PLACEHOLDER}\` placeholder: ${unfilled.join(', ')}.`, + ); + } + + // ── Labels ──────────────────────────────────────────────────────────────── + // These labels are expected to already exist in the repo — managing the + // repo's label set is the maintainer's job, not this workflow's. We check a + // label exists before applying it (issues.addLabels would otherwise silently + // create a missing label) and fail soft — warn and skip — if it's absent. + async function labelExists(name) { + try { + await github.rest.issues.getLabel({ owner, repo, name }); + return true; + } catch (e) { + if (e.status === 404) return false; + throw e; + } + } + + async function addLabel(name) { + if (await labelExists(name)) { + await github.rest.issues.addLabels({ owner, repo, issue_number: issue.number, labels: [name] }); + } else { + core.warning(`Label "${name}" does not exist in the repo — skipping. Create it once to enable labelling.`); + } + } + + async function dropLabel(name) { + try { + await github.rest.issues.removeLabel({ owner, repo, issue_number: issue.number, name }); + } catch (e) { + if (e.status !== 404 && e.status !== 410) throw e; + } + } + + // ── Find existing bot comment to update in-place ────────────────────────── + const MARKER = ''; + const { data: comments } = await github.rest.issues.listComments({ + owner, repo, issue_number: issue.number, + }); + const existing = comments.find(c => c.user.type === 'Bot' && c.body.includes(MARKER)); + + const LABEL_BAD = 'needs more info'; + const LABEL_GOOD = 'ready for review'; + + if (failures.length === 0) { + if (existing) { + await github.rest.issues.deleteComment({ owner, repo, comment_id: existing.id }); + } + + await dropLabel(LABEL_BAD); + await addLabel(LABEL_GOOD); + + } else { + const list = failures.map(f => `- ${f}`).join('\n'); + const commentBody = [ + MARKER, + '⚠️ **Issue description is incomplete.** Please update the following sections:', + '', + list, + '', + '_This comment is deleted automatically once all sections are complete._', + ].join('\n'); + + if (existing) { + await github.rest.issues.updateComment({ owner, repo, comment_id: existing.id, body: commentBody }); + } else { + await github.rest.issues.createComment({ owner, repo, issue_number: issue.number, body: commentBody }); + } + + await dropLabel(LABEL_GOOD); + await addLabel(LABEL_BAD); + + core.setFailed(`Issue description has ${failures.length} issue(s) — see bot comment for details.`); + } +}; diff --git a/.github/scripts/check-pr-description.js b/.github/scripts/check-pr-description.js new file mode 100644 index 000000000..f5dabea5d --- /dev/null +++ b/.github/scripts/check-pr-description.js @@ -0,0 +1,130 @@ +// @ts-check +'use strict'; + +/** @param {{ github: import('@octokit/rest').Octokit, context: import('@actions/github').context, core: import('@actions/core') }} */ +module.exports = async ({ github, context, core }) => { + const body = context.payload.pull_request.body || ''; + const prNum = context.payload.pull_request.number; + const MARKER = ''; + const owner = context.repo.owner; + const repo = context.repo.repo; + + // Strip HTML comments so placeholder text does not count as content. + function strip(text) { + return (text ?? '').replace(//g, '').trim(); + } + + // Extract the text content of a Section. Matches any heading depth (#, ##, + // ###, …) so the check doesn't break if the template's heading level changes. + function section(heading) { + const m = body.match(new RegExp(`#+\\s+${heading}[\\s\\S]*?(?=\\n#+\\s+|$)`, 'i')); + return strip(m?.[0].replace(new RegExp(`#+\\s+${heading}`, 'i'), '') ?? ''); + } + + const problems = []; + + // 1. Summary must be filled in. + if (section('Summary').length < 20) { + problems.push('**Summary** is empty or too short — describe what changed and why.'); + } + + // 2. Linked Issue must reference a real issue. Accept a bare #NNN, a closing + // keyword + #NNN, or a full issue URL (e.g. .../issues/123) — the strict + // keyword-prefixed form previously false-flagged correctly-linked PRs. + const linkedSection = section('Linked Issue'); + const hasIssueRef = /#\d+\b/.test(linkedSection) || /\/issues\/\d+/.test(linkedSection); + if (!linkedSection || !hasIssueRef) { + problems.push('**Linked Issue** — add a reference like `Fixes #NNN`, a bare `#NNN`, or a link to the issue.'); + } + + // 3. At least one Type of Change box must be checked. + const typeBlock = body.match(/##\s+Type of Change[\s\S]*?(?=\n##\s|$)/i)?.[0] ?? ''; + if (!/- \[x\]/i.test(typeBlock)) { + problems.push('**Type of Change** — check at least one box.'); + } + + // 4. Duplicate-search checklist item must be checked. + if (!/- \[x\] I searched/i.test(body)) { + problems.push('**Checklist** — check the duplicate-search box to confirm you searched existing issues and PRs.'); + } + + // 5. How to Test must contain enough real detail for a reviewer to act on. + // Any format is fine — numbered steps, prose, the commands you ran, or a + // code block — so we only require non-trivial content, not a specific shape. + const howTo = section('How to Test'); + if (howTo.length < 30) { + problems.push('**How to Test** — explain how a reviewer can verify this change. Numbered steps, the commands you ran, or a short code block all work — give a sentence or two of real detail (not just "tested locally").'); + } + + // ── Comment ────────────────────────────────────────────────────────────── + const comments = await github.paginate(github.rest.issues.listComments, { + owner, repo, issue_number: prNum, per_page: 100, + }); + const existing = comments.find(c => (c.body ?? '').includes(MARKER)); + + if (problems.length === 0) { + if (existing) { + await github.rest.issues.deleteComment({ owner, repo, comment_id: existing.id }); + } + } else { + const commentBody = [ + MARKER, + '⚠️ **PR description — action needed**', + '', + 'The following required sections are missing or incomplete. Please update the PR description to address them:', + '', + problems.map(p => `- ${p}`).join('\n'), + '', + '---', + '_This comment is deleted automatically once all sections are complete._', + ].join('\n'); + + if (existing) { + await github.rest.issues.updateComment({ owner, repo, comment_id: existing.id, body: commentBody }); + } else { + await github.rest.issues.createComment({ owner, repo, issue_number: prNum, body: commentBody }); + } + } + + // ── Labels ──────────────────────────────────────────────────────────────── + // These labels are expected to already exist in the repo — managing the + // repo's label set is the maintainer's job, not this workflow's. We check a + // label exists before applying it (issues.addLabels would otherwise silently + // create a missing label) and fail soft — warn and skip — if it's absent. + async function labelExists(name) { + try { + await github.rest.issues.getLabel({ owner, repo, name }); + return true; + } catch (e) { + if (e.status === 404) return false; + throw e; + } + } + + async function swapLabel(num, add, remove) { + if (await labelExists(add)) { + try { + await github.rest.issues.addLabels({ owner, repo, issue_number: num, labels: [add] }); + } catch (e) { + // Fail soft on a token that can't write labels so a label permission + // problem never masks the actual description verdict. + if (e.status !== 403) throw e; + core.warning(`Could not add "${add}" — token lacks label write here; skipping.`); + } + } else { + core.warning(`Label "${add}" does not exist in the repo — skipping. Create it once to enable labelling.`); + } + try { + await github.rest.issues.removeLabel({ owner, repo, issue_number: num, name: remove }); + } catch (e) { + if (e.status !== 404 && e.status !== 410 && e.status !== 403) throw e; + } + } + + if (problems.length === 0) { + await swapLabel(prNum, 'ready for review', 'needs work'); + } else { + await swapLabel(prNum, 'needs work', 'ready for review'); + core.setFailed(`PR description has ${problems.length} issue(s) — see bot comment for details.`); + } +}; diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 000000000..818495d14 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,94 @@ +name: CI + +on: + push: + branches: [main] + pull_request: + +# Least privilege: none of the jobs write to the repo. +permissions: + contents: read + +# Cancel superseded runs on the same ref to save Actions minutes. +concurrency: + group: ci-${{ github.ref }} + cancel-in-progress: true + +jobs: + python-syntax: + name: Python syntax (compileall) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + with: + persist-credentials: false + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + with: + python-version: "3.11" + # Byte-compile sources — catches syntax errors without installing deps. + - run: python -m compileall -q app.py core routes src services scripts tests + + node-syntax: + name: JS syntax (node --check) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + with: + persist-credentials: false + - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4 + with: + node-version: "20" + # Syntax-check our own JS (skip vendored libs in static/lib). + - name: node --check + run: | + shopt -s globstar nullglob + for f in static/app.js static/js/**/*.js; do + node --check "$f" + done + + python-tests: + name: Python tests (pytest) + runs-on: ubuntu-latest + # Informational for now: the suite has known flaky / environment-dependent + # failures (test isolation + embedding-model assertions). Tracked under the + # ROADMAP "fresh install smoke tests" item; make this required once green. + continue-on-error: true + steps: + - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + with: + fetch-depth: 0 + persist-credentials: false + + # Detect whether this PR only touches documentation files. + # If so, skip the expensive pytest run while still reporting a passing check. + - name: Check for docs-only changes + id: docs-check + run: | + if [ "${{ github.event_name }}" = "pull_request" ]; then + BASE="${{ github.event.pull_request.base.sha }}" + HEAD="${{ github.event.pull_request.head.sha }}" + else + BASE="${{ github.event.before }}" + HEAD="${{ github.sha }}" + fi + # List all changed files; if every file matches docs/markdown patterns, skip pytest. + changed=$(git diff --name-only "$BASE" "$HEAD" 2>/dev/null || git diff --name-only HEAD~1 HEAD) + non_docs=$(echo "$changed" | grep -Ev '^(docs/|.*\.md$|\.github/[^/]+\.md$)' || true) + if [ -z "$non_docs" ]; then + echo "docs_only=true" >> "$GITHUB_OUTPUT" + echo "Docs-only change detected — skipping pytest." + else + echo "docs_only=false" >> "$GITHUB_OUTPUT" + fi + + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + if: steps.docs-check.outputs.docs_only != 'true' + with: + python-version: "3.11" + cache: pip + - run: pip install -r requirements.txt + if: steps.docs-check.outputs.docs_only != 'true' + - run: mkdir -p data # sqlite DB lives at ./data/app.db + if: steps.docs-check.outputs.docs_only != 'true' + - run: python -m pytest -q + if: steps.docs-check.outputs.docs_only != 'true' diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml new file mode 100644 index 000000000..5e822ab07 --- /dev/null +++ b/.github/workflows/docker-publish.yml @@ -0,0 +1,140 @@ +name: ci / docker publish + +# Build the Odysseus image and publish to GHCR. +# push to main -> :latest, :X.Y.Z (curated release; main is fast-forwarded at releases) +# push to dev -> :dev, :X.Y.Z-dev. (rolling dev + an immutable, traceable pin) +# Multi-arch (linux/amd64 + linux/arm64): each arch builds on its own native +# runner and pushes by digest, then a merge job stitches the digests into one +# manifest list and applies the tags (faster + cleaner than QEMU emulation). +# Registry: ghcr.io//. + +on: + push: + branches: [dev, main] + paths-ignore: + - '**.md' + - 'docs/**' + - '.github/ISSUE_TEMPLATE/**' + +concurrency: + group: docker-publish-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +jobs: + build: + name: build (${{ matrix.arch }}) + runs-on: ${{ matrix.runner }} + permissions: + contents: read + packages: write + strategy: + fail-fast: false + matrix: + include: + - platform: linux/amd64 + arch: amd64 + runner: ubuntu-latest + - platform: linux/arm64 + arch: arm64 + runner: ubuntu-24.04-arm + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + - name: Set up Buildx + uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5 # v4.1.0 + - name: Log in to GHCR + uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Build and push by digest + id: build + uses: docker/build-push-action@f9f3042f7e2789586610d6e8b85c8f03e5195baf # v7.2.0 + with: + context: . + platforms: ${{ matrix.platform }} + outputs: type=image,name=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true + cache-from: type=gha,scope=${{ matrix.arch }} + cache-to: type=gha,mode=max,scope=${{ matrix.arch }} + - name: Export digest + run: | + mkdir -p /tmp/digests + digest="${{ steps.build.outputs.digest }}" + touch "/tmp/digests/${digest#sha256:}" + - name: Upload digest + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: digest-${{ matrix.arch }} + path: /tmp/digests/* + if-no-files-found: error + retention-days: 1 + + merge: + name: merge manifest + tag + runs-on: ubuntu-latest + needs: build + permissions: + contents: read + packages: write + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + - name: Read APP_VERSION + short sha + id: ver + run: | + v=$(grep -E '^APP_VERSION' src/constants.py | head -1 | sed -E 's/.*"([^"]+)".*/\1/') + [ -n "$v" ] || { echo "APP_VERSION not found"; exit 1; } + echo "version=$v" >> "$GITHUB_OUTPUT" + echo "short=${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT" + - name: Download digests + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + path: /tmp/digests + pattern: digest-* + merge-multiple: true + - name: Set up Buildx + uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5 # v4.1.0 + - name: Log in to GHCR + uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Compute tags + id: meta + uses: docker/metadata-action@80c7e94dd9b9319bd5eb7a0e0fe9291e23a2a2e9 # v6.1.0 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }} + type=raw,value=${{ steps.ver.outputs.version }},enable=${{ github.ref == 'refs/heads/main' }} + type=raw,value=dev,enable=${{ github.ref == 'refs/heads/dev' }} + type=raw,value=${{ steps.ver.outputs.version }}-dev.${{ steps.ver.outputs.short }},enable=${{ github.ref == 'refs/heads/dev' }} + - name: Create manifest list + push tags + working-directory: /tmp/digests + run: | + tags=$(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") + digests=$(printf "${REGISTRY}/${IMAGE_NAME}@sha256:%s " *) + # word-splitting is intended: $tags and $digests each expand to multiple args + # shellcheck disable=SC2086 + docker buildx imagetools create $tags $digests + env: + REGISTRY: ${{ env.REGISTRY }} + IMAGE_NAME: ${{ env.IMAGE_NAME }} + - name: Inspect + run: | + if [ "$GITHUB_REF" = "refs/heads/main" ]; then ref=latest; else ref=dev; fi + docker buildx imagetools inspect "${REGISTRY}/${IMAGE_NAME}:${ref}" + env: + REGISTRY: ${{ env.REGISTRY }} + IMAGE_NAME: ${{ env.IMAGE_NAME }} diff --git a/.github/workflows/issue-description-check.yml b/.github/workflows/issue-description-check.yml new file mode 100644 index 000000000..3d0cf094e --- /dev/null +++ b/.github/workflows/issue-description-check.yml @@ -0,0 +1,24 @@ +name: ci / issue description check + +on: + issues: + types: [opened, edited, reopened] + +permissions: + issues: write + +jobs: + check: + name: Check issue description + runs-on: ubuntu-latest + # Skip bots (Dependabot, release-drafter, etc.) + if: ${{ github.event.issue.user.type != 'Bot' }} + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + sparse-checkout: .github/scripts + persist-credentials: false + + - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 + with: + script: return require('./.github/scripts/check-issue-description.js')({github, context, core}) diff --git a/.github/workflows/pr-description-check.yml b/.github/workflows/pr-description-check.yml new file mode 100644 index 000000000..c8fbe4b0f --- /dev/null +++ b/.github/workflows/pr-description-check.yml @@ -0,0 +1,109 @@ +name: ci / PR checks + +on: + # pull_request_target runs in the base-repo context (has secrets) so the check + # works on fork PRs. Safe here: the checkout pins to the base branch (no fork + # code runs) and the scripts only read context.payload and call the GitHub API. + pull_request_target: # zizmor: ignore[dangerous-triggers] + types: [opened, edited, synchronize, reopened, ready_for_review] + +# Default-deny at the workflow level; each job opts into only the scopes it needs. +# Note: modifying a PR's labels/comments needs pull-requests:write even though the +# REST path is under /issues/{n}/...; issues:write alone returns 403 on PRs. +permissions: {} + +jobs: + check-description: + name: Check PR description + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: write + issues: write + # Skip bots: they open PRs programmatically and have their own process. + if: github.event.pull_request.user.type != 'Bot' + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + ref: ${{ github.base_ref }} + sparse-checkout: .github/scripts + persist-credentials: false + + - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 + with: + script: return require('./.github/scripts/check-pr-description.js')({github, context, core}) + + check-title: + name: Check PR title (Conventional Commits) + runs-on: ubuntu-latest + permissions: {} + # Skip bots: they open PRs programmatically and have their own process. + if: github.event.pull_request.user.type != 'Bot' + steps: + - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 + with: + script: | + const title = context.payload.pull_request.title || ""; + // Conventional Commits: type(optional-scope)(optional !): summary + const re = /^(feat|fix|docs|style|refactor|perf|test|build|ci|chore|revert)(\([\w .\/-]+\))?!?: .+/; + if (!re.test(title)) { + core.setFailed( + `PR title is not in Conventional Commits format:\n "${title}"\n\n` + + `Expected: type(scope): summary\n` + + `Example: fix(search): handle empty query\n` + + `Types: feat, fix, docs, style, refactor, perf, test, build, ci, chore, revert.` + ); + } else { + core.info(`PR title OK: ${title}`); + } + + check-mergeable: + name: Flag unmergeable PRs + runs-on: ubuntu-latest + permissions: + pull-requests: write + issues: write + # Skip bots: they open PRs programmatically and have their own process. + if: github.event.pull_request.user.type != 'Bot' + steps: + - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 + with: + script: | + const repo = { owner: context.repo.owner, repo: context.repo.repo }; + const number = context.payload.pull_request.number; + const READY = "ready for review"; + const CONFLICT = "merge conflict"; + + // Ensure the conflict label exists (red). Ignore if already present. + try { + await github.rest.issues.getLabel({ ...repo, name: CONFLICT }); + } catch { + await github.rest.issues.createLabel({ + ...repo, name: CONFLICT, color: "B60205", + description: "Conflicts with the base branch; needs a rebase before review.", + }).catch(() => {}); + } + + // mergeable is computed asynchronously and is often null right after + // an event, so poll a few times until GitHub has resolved it. + let pr = null; + for (let i = 0; i < 5; i++) { + const { data } = await github.rest.pulls.get({ ...repo, pull_number: number }); + if (data.mergeable !== null) { pr = data; break; } + await new Promise(r => setTimeout(r, 3000)); + } + if (!pr || pr.draft) return; + const labels = pr.labels.map(l => l.name); + + if (pr.mergeable === false) { + if (labels.includes(READY)) { + await github.rest.issues.removeLabel({ ...repo, issue_number: number, name: READY }).catch(() => {}); + } + if (!labels.includes(CONFLICT)) { + await github.rest.issues.addLabels({ ...repo, issue_number: number, labels: [CONFLICT] }); + } + } else if (pr.mergeable === true) { + if (labels.includes(CONFLICT)) { + await github.rest.issues.removeLabel({ ...repo, issue_number: number, name: CONFLICT }).catch(() => {}); + } + } diff --git a/.gitignore b/.gitignore index 8ec11ab19..c48f6cd61 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ venv/ # Environment .env +.env.bak.* !.env.example # Data — all user data stays local @@ -66,6 +67,11 @@ output.txt.txt !docs/*.png !docs/*.gif !docs/*.webp +# …and curated docs/ subfolder assets (e.g. accessibility before/after shots). +!docs/**/*.png +!docs/**/*.jpg +!docs/**/*.gif +!docs/**/*.webp # Reports and temp files reports/ diff --git a/ACKNOWLEDGMENTS.md b/ACKNOWLEDGMENTS.md index c4079e6e5..fdf55c48a 100644 --- a/ACKNOWLEDGMENTS.md +++ b/ACKNOWLEDGMENTS.md @@ -33,8 +33,8 @@ The full license texts are kept in [`licenses/`](licenses/). - **[Tongyi DeepResearch](https://github.com/Alibaba-NLP/DeepResearch)** by **Alibaba-NLP / Tongyi Lab** — the multi-step deep-research agent pipeline. Copyright © Alibaba-NLP / Tongyi Lab. **Apache-2.0.** Adapted for Odysseus's - Deep Research feature (`api/research_*.py`, `routes/research_routes.py`, - `services/search/`). Full text in + Deep Research feature (`services/research/`, `src/research_handler.py`, + `routes/research_routes.py`, `services/search/`). Full text in [`licenses/DeepResearch-Apache-2.0.txt`](licenses/DeepResearch-Apache-2.0.txt). --- @@ -47,7 +47,7 @@ just composed. | Service | Image | Purpose | License | |---|---|---|---| -| [SearXNG](https://github.com/searxng/searxng) | `searxng/searxng:latest` | Default metasearch backend | AGPL-3.0 | +| [SearXNG](https://github.com/searxng/searxng) | `searxng/searxng:2026.5.31-7159b8aed` (pinned tag; see compose) | Default metasearch backend | AGPL-3.0 | | [ChromaDB](https://github.com/chroma-core/chroma) | `chromadb/chroma:latest` | Vector store for memory / RAG | Apache-2.0 | | [ntfy](https://github.com/binwiederhier/ntfy) | `binwiederhier/ntfy` | Push notifications (self-hosted reminders) | Apache-2.0 / GPL-2.0 | @@ -118,6 +118,7 @@ Core (`requirements.txt`) and optional (`requirements-optional.txt`): | croniter | MIT | | pytest / pytest-asyncio | MIT / Apache-2.0 | | duckduckgo-search (optional) | MIT | +| markitdown (optional — Office/EPUB text extraction) | MIT | | **PyMuPDF** *(optional — form-filling only)* | **AGPL-3.0** — see note below | ## Companion services (interoperated with, not bundled) @@ -152,6 +153,9 @@ concerns from earlier are resolved: deployment (Artifex also sells a commercial PyMuPDF license that lifts this). - **`caldav`** (Python lib) is **dual-licensed GPL-3.0-or-later OR Apache-2.0**. Odysseus uses it under **Apache-2.0**, which is permissive and MIT-compatible. +- **`markitdown`** (Microsoft) is **MIT** and used only as an *optional* dependency for Office/EPUB text + extraction (`src/markitdown_runtime.py`), lazy-imported with graceful fallback — the MIT core runs without + it. The cloud `az-doc-intel` extra is deliberately **not** installed, keeping extraction fully local. --- diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 01ed77b71..174a4f2f6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,6 +2,17 @@ Thanks for helping. The project is moving quickly, so the best contributions are focused, easy to review, and easy to test. +## Branch model + +Odysseus has two branches: + +- **`dev`** — where all PRs land. Things can be in flux here; the merge button gets used freely. +- **`main`** — what users run. Curated and tested by the maintainer. Fast-forwarded to a stable `dev` commit at each release. + +**Open your PR against `dev`, not `main`.** The GitHub "base" dropdown defaults to `dev`. If you opened a PR against `main` by accident, click "Edit" on the PR and change the base — no rebase needed. + +End-users cloning the repo will land on `dev` by default. To run the curated/stable version: `git checkout main` after clone. + ## Before You Start - Search existing issues and pull requests before opening a new one. @@ -57,12 +68,44 @@ Good pull requests usually include: - A short explanation of the bug or feature. - The files or areas changed. -- Manual test steps or automated test results. +- Manual test steps or automated test results from running the actual app, not just the test suite. - Screenshots or short recordings for UI changes. - Links to related issues, for example `Fixes #123`. Please keep PRs small. Large PRs that mix unrelated cleanup, formatting, refactors, and behavior changes are much harder to review. +> **Auto-generated PRs.** If you are running an LLM agent (Devin, Cursor, OpenHands, Claude Code, etc.) against this repo: please open an issue describing the problem first instead of opening a PR directly. Bulk agent-generated PRs that don't match the project's visual style or contribution format will be closed without review, even when the underlying fix is correct. + +## Style and visual changes + +Odysseus has an intentional visual style. PRs that ignore it will be closed without merge, no matter how correct the underlying code is. + +Before submitting any change that affects what the app looks like — buttons, icons, fonts, colors, spacing, layout, CSS, HTML, SVG, or any `static/js/` module that draws to the DOM — please: + +1. **Run the app locally** and view the change in a browser. Type-checks and unit tests are not enough. +2. **Attach a screenshot or short clip** of the change in the running app. Add a mobile screenshot too if the change affects mobile. +3. **Match the existing visual language.** Specifically: + - Reuse existing CSS variables (`--red`, `--fg`, `--bg`, `--card`, `--border`, …). Do not introduce new color values, font sizes, or spacing units. + - Reuse existing button, input, card, and border classes. Don't invent parallel styling for similar widgets. + - **No Unicode emoji in UI or code.** Use inline SVG (matching the monochrome icon style already in `static/index.html`) or plain text. + - Monospaced font (`Fira Code`) for primary UI text. Don't override. + - Dark theme is the default; any light-mode work goes through the existing theme system, not hard-coded. +4. **Don't add parallel components.** If a similar widget already exists in the app, extend it instead of writing a new one. + +If you are unsure whether a change is "visual," it is. Default to attaching a screenshot. + +## Code conventions + +Don't hardcode values that the project already exposes through a constant or a helper. Hardcoded literals drift out of sync, break on non-default deployments, and reintroduce bugs we've already fixed. + +- **Filesystem paths:** never build writable paths from `Path(__file__)...` into the source tree, hardcode `/app/...`, or use a relative `"data/..."` string. Every persisted file and directory has a named constant in `src/constants.py` (for example `AUTH_FILE`, `USER_PREFS_FILE`, `SETTINGS_FILE`, `TTS_CACHE_DIR`, `CHROMA_DIR`). Import and use that named constant; do not re-derive the path locally with `os.path.join(DATA_DIR, "x.json")` or `DATA_DIR / "x.json"`. `DATA_DIR` is the single place that reads `ODYSSEUS_DATA_DIR`, so use it directly only for dynamic paths that have no fixed name (for example per-owner files). If a data file or directory has no constant yet, add one to `src/constants.py`. The source tree is read-only in Docker and `/app/...` does not exist on native runs; guard directory creation so an unwritable path degrades gracefully instead of crashing at import. +- **Internal API / loopback URLs:** don't hardcode `http://localhost:7000`. Use `internal_api_base()` from `src.constants` (it honors `ODYSSEUS_INTERNAL_BASE` / `APP_PORT`). +- **Ports, limits, model lists, and similar:** reuse the existing constant if one exists; if it doesn't and the value is used in more than one place, add a constant rather than copying the literal. + +If you need a value that has no constant or helper yet, add it to `src/constants.py` (the single source of truth for paths and config; `core/constants.py` only re-exports it for backward compatibility) and import it, rather than repeating a literal across files. + +**Commits:** use [Conventional Commits](https://www.conventionalcommits.org), `type(scope): summary` (e.g. `fix(search): ...`, `feat(notes): ...`, `docs(contributing): ...`). Common types: `fix`, `feat`, `refactor`, `docs`, `test`, `chore`, `ci`. Keep the subject short and imperative; put the "why" in the body when it isn't obvious. + ## Issue Reports For bugs, include: diff --git a/Dockerfile b/Dockerfile index 535f0a0d4..ad273cec4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -22,9 +22,12 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ WORKDIR /app -# Install Python deps first (layer cache) -COPY requirements.txt . -RUN pip install --no-cache-dir -r requirements.txt +# Install Python deps first (layer cache). Optional extras (PyMuPDF AGPL, etc.) +# are opt-in so the default image stays MIT-core; see requirements-optional.txt. +ARG INSTALL_OPTIONAL=false +COPY requirements.txt requirements-optional.txt ./ +RUN pip install --no-cache-dir -r requirements.txt \ + && if [ "$INSTALL_OPTIONAL" = "true" ]; then pip install --no-cache-dir -r requirements-optional.txt; fi # Copy app code COPY . . diff --git a/README.md b/README.md index 2f2da5b6e..4fae1d76b 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,19 @@ # Odysseus + +> **Branch note:** `dev` is the default branch and contains the latest development changes, but it may be unstable. For the more stable curated branch, use [`main`](https://github.com/pewdiepie-archdaemon/odysseus/tree/main). + +``` ─────────────────────────────────────────────── ⊹ ࣪ ˖ ૮( ˶ᵔ ᵕ ᵔ˶ )っ Odysseus vers. 1.0 ─────────────────────────────────────────────── +``` ![Odysseus](docs/odysseus.jpg) A self-hosted AI workspace -- meant to be the self-hosted version of the UI experience you get from ChatGPT and Claude. But with more jank and fun. Running on your own hardware, with your own data -- local-first, privacy-first, and no trojan. ## Features - - **Chat** -- chat with any local model or API; adding them is super simple.
 vLLM · llama.cpp · Ollama · OpenRouter · OpenAI + - **Chat** -- chat with any local model or API; adding them is super simple.
 vLLM · llama.cpp · Ollama · OpenRouter · OpenAI · GitHub Copilot - **Agent** -- hand it tools and let it run the whole task itself.
 built on [opencode](https://github.com/anomalyco/opencode) · MCP · web · files · shell · skills · memory - **Cookbook** -- Scans your hardware, recommends models, click to download and serve.. easy!
 built on [llmfit](https://github.com/AlexsJones/llmfit) · VRAM-aware · GGUF / FP8 / AWQ · fit scoring · vLLM / llama.cpp serving - **Deep Research** -- multi-step runs that gather, read, and synthesize sources into a nice visual report.
 adapted from [Tongyi DeepResearch](https://github.com/Alibaba-NLP/DeepResearch) @@ -44,7 +49,7 @@ A full, hover-to-play tour lives on the landing page (`docs/index.html`). Defaults work out of the box: clone, run, then configure models/search/email inside **Settings**. Only edit `.env` for deployment-level overrides like -`APP_PORT`, `AUTH_ENABLED`, `DATABASE_URL`, or a pre-seeded admin password. +`APP_BIND`, `APP_PORT`, `AUTH_ENABLED`, `DATABASE_URL`, or a pre-seeded admin password. On first setup, Odysseus creates an admin account (`admin` unless `ODYSSEUS_ADMIN_USER` is set) and prints a temporary password in the terminal. @@ -61,8 +66,12 @@ cd odysseus cp .env.example .env # optional, but recommended for explicit defaults docker compose up -d --build ``` -Open `http://localhost:7000` when the containers are healthy. If the port is -taken, set `APP_PORT=7001` in `.env` and recreate the container. +To include optional extras in the image (PDF viewer, Office extraction; includes AGPL PyMuPDF), build with `docker compose build --build-arg INSTALL_OPTIONAL=true` before `up`. + +Open `http://localhost:7000` when the containers are healthy. Docker Compose +binds the web UI to `127.0.0.1` by default. If the port is taken, set +`APP_PORT=7001` in `.env` and recreate the container. Set `APP_BIND=0.0.0.0` +only when you intentionally want LAN/reverse-proxy access. ### Native Linux / macOS ```bash @@ -72,10 +81,12 @@ python3 -m venv venv source venv/bin/activate pip install -r requirements.txt python setup.py -python -m uvicorn app:app --host 0.0.0.0 --port 7000 +python -m uvicorn app:app --host 127.0.0.1 --port 7000 ``` Requirements: Python 3.11+. Cookbook also needs `tmux` for background model -downloads and serves. +downloads and serves. The app itself is lightweight; local model serving is the +heavy part and depends on the model, runtime, GPU, and VRAM, so small hosts can +connect to API or remote model servers instead. Use `--host 0.0.0.0` only when you intentionally want LAN/reverse-proxy access. ### Apple Silicon Docker on macOS cannot use the Metal GPU. For GPU-accelerated Cookbook on an @@ -87,7 +98,18 @@ cd odysseus ./start-macos.sh ``` -It launches at `http://127.0.0.1:7860`. To build a clickable app wrapper: +It launches at `http://127.0.0.1:7860`. To expose it to your phone over a trusted LAN/VPN such as Tailscale, bind all interfaces: + +```bash +ODYSSEUS_HOST=0.0.0.0 ./start-macos.sh +# then open http://:7860 +``` + +The script also reads `.env` at startup, so `APP_BIND=0.0.0.0` and `APP_PORT` +set there are picked up automatically without a command-line override each run. + +Keep `AUTH_ENABLED=true` (the default) before binding outside loopback. Do not +expose this port directly to the public internet. To build a clickable app wrapper: ```bash ./build-macos-app.sh @@ -97,9 +119,9 @@ It launches at `http://127.0.0.1:7860`. To build a clickable app wrapper: Cookbook, GPU, Ollama, and troubleshooting notes **Docker bundled services.** Compose starts Odysseus, ChromaDB, SearXNG, and -ntfy. ChromaDB/SearXNG/ntfy bind host ports to `127.0.0.1` by default, so they -are reachable from the host but not exposed to your LAN/public internet unless -you opt in. +ntfy. Odysseus and the bundled service ports bind to `127.0.0.1` by default, so +they are reachable from the host but not exposed to your LAN/public internet +unless you opt in. **Cookbook storage in Docker.** Downloads live in `./data/huggingface` (`~/.cache/huggingface` in the container). Cookbook-installed Python CLIs and @@ -114,21 +136,96 @@ Odysseus SSH key and add the public key to the remote server's ssh-copy-id -i data/ssh/id_ed25519.pub user@server ``` -**NVIDIA / AMD Docker GPU overlays.** Install the host runtime first, then add -one of these to `.env`: +**Docker GPU overlays.** CPU-only users can skip this section. Cookbook can +only detect GPUs that Docker exposes to the container — if the host runtime or +device passthrough is not configured, Cookbook sees the iGPU, another card, or +CPU instead of your intended GPU. + +For NVIDIA, `scripts/check-docker-gpu.sh` diagnoses GPU passthrough and can +optionally install the host runtime or update `.env`. + +```bash +# Read-only diagnostic (default — installs nothing, never edits .env): +scripts/check-docker-gpu.sh + +# Print OS-specific install commands without running them: +scripts/check-docker-gpu.sh --print-install-commands + +# Install NVIDIA Container Toolkit on Ubuntu/Debian (requires sudo): +scripts/check-docker-gpu.sh --install-nvidia-toolkit + +# Write COMPOSE_FILE to .env (only when GPU passthrough is confirmed working): +scripts/check-docker-gpu.sh --enable-nvidia-overlay + +# Full assisted setup — install toolkit, then enable overlay if passthrough works: +scripts/check-docker-gpu.sh --install-nvidia-toolkit --enable-nvidia-overlay +``` + +Safety notes: +- The app never installs host GPU runtime automatically. +- The app never edits `.env` automatically. +- `.env` is only modified when `--enable-nvidia-overlay` is explicitly passed, + and only after GPU passthrough succeeds. `--yes` skips prompts but does not + bypass the passthrough gate. +- `.env.bak.*` backups created by `--enable-nvidia-overlay` are ignored by + Git and the Docker build context. + +To enable manually without the script, add this to `.env`: ```bash COMPOSE_FILE=docker-compose.yml:docker/gpu.nvidia.yml -COMPOSE_FILE=docker-compose.yml:docker/gpu.amd.yml ``` -Verify with: +**AMD / ROCm.** AMD setup is read-only diagnostic plus manual `.env` edit. Run: ```bash -docker compose exec odysseus nvidia-smi -L -docker compose exec odysseus rocm-smi +scripts/check-docker-amd-gpu.sh ``` +Then add the reported values to `.env`, replacing `RENDER_GID` with your host's +numeric render group id: + +```bash +COMPOSE_FILE=docker-compose.yml:docker/gpu.amd.yml +RENDER_GID=989 +``` + +For NVIDIA/AMD GPU support, also read the comments in the selected overlay file: docker/gpu.nvidia.yml or docker/gpu.amd.yml. + +**Stack-management UIs (Portainer, Coolify, Dockhand, etc.).** These tools +often accept only a single Compose file and do not reliably honor `COMPOSE_FILE` +or multiple `-f` overlays. CLI users should keep using the `COMPOSE_FILE` +overlay workflow above. For stack UIs, point the stack at one of the standalone +files instead, which bundle the base stack plus the GPU settings: + +- `docker-compose.gpu-nvidia.yml` — still requires the NVIDIA Container Toolkit + on the host. +- `docker-compose.gpu-amd.yml` — still requires host ROCm/kfd/DRI setup, the + `video`/`render` group membership, and `RENDER_GID` when needed. + +The base `docker-compose.yml` plus the `docker/gpu.*.yml` overlays remain the +source of truth; the standalone files mirror them for single-file deployments. + +Verify after enabling either overlay: + +```bash +docker compose exec odysseus nvidia-smi -L # NVIDIA +docker compose exec odysseus sh -lc 'test -e /dev/kfd && test -d /dev/dri && ls -l /dev/kfd /dev/dri/renderD*' # AMD +``` + +> **GPU passthrough ≠ llama.cpp CUDA.** `nvidia-smi` passing inside the +> container confirms Docker GPU access, but llama.cpp also needs `cudart` and +> the CUDA Toolkit at runtime. If Cookbook logs show `Unable to find cudart +> library`, `Could NOT find CUDAToolkit`, `CUDA Toolkit not found`, or +> tensors/layers assigned to CPU, that is a Cookbook/llama.cpp build issue — +> not a Docker passthrough failure. Re-install the serve engine via +> **Cookbook → Dependencies** to get a CUDA-enabled build. +> +> The same split applies to AMD/ROCm: seeing `/dev/kfd` and `/dev/dri` inside +> the container confirms device passthrough, not ROCm userspace or a +> ROCm-enabled vLLM/llama.cpp build. `rocm-smi` and `rocminfo` are not expected +> inside the slim Odysseus image. + **Ollama with Docker.** If Ollama runs on the host, add this endpoint in Settings: @@ -142,6 +239,13 @@ Ollama must listen outside its own loopback interface: OLLAMA_HOST=0.0.0.0:11434 ollama serve ``` +This connects Odysseus in Docker to an Ollama server that is already running on +your host machine; it does not start Ollama inside the container. +`host.docker.internal` is Docker's hostname for the host machine from inside the +container. Cookbook **Serve** is a separate workflow for serving downloaded +models through Odysseus/llama.cpp, so Windows users with an existing Ollama +install usually only need to add the endpoint in Settings. + **Useful checks.** ```bash @@ -173,13 +277,16 @@ Or do it by hand: ```powershell git clone https://github.com/pewdiepie-archdaemon/odysseus.git cd odysseus -python -m venv venv +py -3.11 -m venv venv venv\Scripts\Activate.ps1 pip install -r requirements.txt python setup.py python -m uvicorn app:app --host 127.0.0.1 --port 7000 ``` +If `python` points at an older interpreter, use `py -3.12` (or another installed +3.11+ version) for the venv step. + **Requirements:** Python 3.11+. The core app (chat, agent, memory, documents, email, calendar, deep research) runs fully native. For full **Cookbook** background model downloads and the agent shell tool, also install @@ -191,31 +298,83 @@ Local GPU *serving* of vLLM/SGLang needs Linux/WSL2; for a local model on Window Open `http://localhost:7000`, log in with the generated admin password, and configure everything else inside **Settings**. +## Troubleshooting & Advanced Setup + +### `chromadb-client` conflicts with embedded ChromaDB +If `chromadb-client` (the lightweight HTTP-only package) is installed alongside the full `chromadb` package, Odysseus starts but ChromaDB silently falls back to HTTP-only mode and fails. + +**Fix:** uninstall `chromadb-client` and force-reinstall the full package: +```bash +./venv/bin/pip uninstall chromadb-client -y +./venv/bin/pip install --force-reinstall chromadb +``` + +### HTTPS + LAN/Tailscale exposure +To expose Odysseus on a local network or Tailscale with HTTPS: +1. Change the bind address to `0.0.0.0` in `.env` (`APP_BIND=0.0.0.0` or `ODYSSEUS_HOST=0.0.0.0`). +2. Generate a locally-trusted cert for your LAN/Tailscale IPs using [mkcert](https://github.com/FiloSottile/mkcert): + ```bash + mkcert -install + mkcert -cert-file cert.pem -key-file key.pem 192.168.1.100 tailscale-ip + ``` +3. Run `uvicorn` with the generated certs: + ```bash + python -m uvicorn app:app --host 0.0.0.0 --port 7000 --ssl-certfile=cert.pem --ssl-keyfile=key.pem + ``` +4. Install the `mkcert` CA on any other device you want to access Odysseus from (e.g., for iOS, email the `rootCA.pem` to yourself, install the profile, and trust it in Certificate Trust Settings). + +### Optional Dependencies +`requirements-optional.txt` contains packages that unlock extra features. It is not installed by default. + +| Package | Feature unlocked | +|---------|-----------------| +| `faster-whisper` | Local speech-to-text (microphone -> text) via the "local" STT provider. | +| `duckduckgo-search` | DuckDuckGo as a search provider option. | +| `PyMuPDF` | PDF page rendering in the side viewer panel and form-filling. (Note: AGPL-3.0) | +| `markitdown` | Office/EPUB document text extraction (converts .docx/.xlsx/.pptx/.xls/.epub to Markdown). | + +### Outlook / Office 365 email +Odysseus email accounts currently use IMAP/SMTP username-password auth. Outlook +and Microsoft 365 generally require OAuth instead, so normal Microsoft mailbox +passwords will fail. See [docs/email-outlook.md](docs/email-outlook.md) for the +current limitation and the planned integration direction. + ## Security Notes Odysseus is a self-hosted workspace with powerful local tools: shell access, file uploads, model downloads, web research, email/calendar integrations, and API tokens. Treat it like an admin console. - Keep `AUTH_ENABLED=true` for any network-accessible deployment. -- Do not expose it directly to the public internet without HTTPS and a trusted reverse proxy. -- Keep `data/`, `.env`, logs, databases, and uploaded/generated media out of Git. They are ignored by default. +- Keep `LOCALHOST_BYPASS=false` outside local development. +- Use `SECURE_COOKIES=true` when Odysseus is served through HTTPS by a trusted reverse proxy or private access gateway. +- Do not expose it directly to the public internet without HTTPS and a trusted reverse proxy or private access layer. +- Keep `.env`, `data/`, `logs/`, databases, uploads, generated media, backups, auth/session files, API keys, and model/provider tokens out of Git and private shares. They are ignored by default. - Review `data/auth.json` after first boot: disable open signup unless you intentionally want it, make only your own account admin, and keep demo/test accounts non-admin. - Non-admin users do not get shell/Python/file read/write by default, and admin-only routes/tools such as MCP management, API tokens, webhooks, model/cookbook serving, backup/vault, and app settings are admin-gated. Other features are controlled by per-user privileges, so review each user's privileges before exposing a deployment. - Rotate any API keys or tokens that were ever pasted into a shared chat, demo, screenshot, or log. - If you enable API tokens or webhooks, create separate tokens per integration and delete unused ones. - Prefer binding manual development runs to `127.0.0.1`; bind to `0.0.0.0` only when you intentionally want LAN/reverse-proxy access. +- Keep ChromaDB, SearXNG, ntfy, Ollama, vLLM, llama.cpp, databases, and raw model/provider APIs internal-only. Expose only the authenticated Odysseus web/API entrypoint through your trusted proxy or private access layer. - Before publishing a fork, run `git status --short` and confirm no private files from `.env`, `data/`, `logs/`, uploads, backups, or local databases are staged. -### Putting it behind HTTPS -Odysseus serves plain HTTP on its port. That's fine for `localhost` and trusted LAN/VPN use, but browsers will warn ("Password fields present on an insecure page") and the login + API tokens travel in cleartext. For anything reachable outside your machine — including a Tailscale IP shared with other devices — put a TLS-terminating reverse proxy in front. +### Private or proxied deployments +Odysseus serves plain HTTP on its app port. Docker Compose binds Odysseus and the bundled services to `127.0.0.1` by default, so a typical production/private setup is: -Shortest path with [Caddy](https://caddyserver.com/) (auto-renews Let's Encrypt certs): +1. Keep Odysseus on localhost, for example `127.0.0.1:7000`. +2. Terminate HTTPS at a trusted reverse proxy or private access gateway. +3. Put the authenticated Odysseus web/API entrypoint behind that layer. +4. Keep raw service and model ports internal-only. -```caddy -odysseus.example.com { - reverse_proxy localhost:7000 -} -``` +Cloudflare Access, Tailscale, Caddy, nginx, and Traefik can all fit this pattern; none are required by Odysseus. If your access layer reaches Odysseus on the same host, proxy to `http://127.0.0.1:7000` and keep `AUTH_ENABLED=true`, `LOCALHOST_BYPASS=false`, and `SECURE_COOKIES=true`. -For a LAN-only Tailscale deployment, Caddy + [tailscale-cert](https://caddyserver.com/docs/caddyfile/options#auto-https) or the built-in MagicDNS HTTPS feature both work. nginx/Traefik configs are similar — proxy `localhost:7000`, terminate TLS at the proxy. Once that's in place, the browser warning goes away and your login is encrypted. +Common internal-only ports from the default docs/compose setup: + +| Port | Service | +|---|---| +| `7000` | Odysseus raw app port | +| `8080` | SearXNG | +| `8091` | ntfy | +| `8100` | ChromaDB host port for manual/compose access | +| `11434` | Ollama | +| `8000-8020` | Common local model/provider APIs | ## Contributing Help is welcome. The best entry points are fresh-install testing, provider setup @@ -234,12 +393,25 @@ Key settings: | `OPENAI_API_KEY` | -- | Optional OpenAI key. Prefer adding providers in the app unless pre-seeding. | | `SEARXNG_INSTANCE` | `http://localhost:8080` | SearXNG URL. Docker overrides this to `http://searxng:8080`. | | `SEARXNG_SECRET` | generated on first Docker boot | Optional SearXNG cookie/CSRF secret. Leave blank unless you need to pin it. | +| `APP_BIND` | `127.0.0.1` | Docker Compose host bind address for the web UI. Use `0.0.0.0` only for intentional LAN/reverse-proxy access. | +| `APP_PORT` | `7000` | Docker Compose host port for the web UI. | | `AUTH_ENABLED` | `true` | Enable/disable login | | `LOCALHOST_BYPASS` | `false` | Development-only auth bypass for loopback requests. Keep false for shared/network deployments. | +| `SECURE_COOKIES` | `false` | Set true when serving Odysseus through HTTPS at a trusted proxy or private access gateway. | | `DATABASE_URL` | `sqlite:///./data/app.db` | Database connection string | | `CHROMADB_HOST` | `localhost` | ChromaDB host for vector memory. Docker overrides this to `chromadb`. | | `CHROMADB_PORT` | `8100` | ChromaDB port for manual host runs. Docker overrides this to `8000`. | | `EMBEDDING_URL` | -- | OpenAI-compatible embeddings endpoint | +| `ODYSSEUS_CHAT_UPLOAD_MAX_BYTES` | `10485760` | Chat/agent attachment cap in bytes. Raise for larger local PDFs or text documents. | +| `ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES` | `104857600` | Gallery image upload cap in bytes (100 MB). | +| `ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES` | `26214400` | Gallery transform input cap in bytes (25 MB). | +| `ODYSSEUS_MEMORY_IMPORT_MAX_BYTES` | `10485760` | Memory import file cap in bytes (10 MB). | +| `ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES` | `26214400` | Personal document upload cap in bytes (25 MB). | +| `ODYSSEUS_EMAIL_COMPOSE_UPLOAD_MAX_BYTES` | `26214400` | Email compose attachment cap in bytes (25 MB). | +| `ODYSSEUS_STT_MAX_AUDIO_BYTES` | `26214400` | Speech-to-text audio cap in bytes (25 MB). | +| `ODYSSEUS_ICS_MAX_BYTES` | `10485760` | Calendar `.ics` import cap in bytes (10 MB). | + +All upload-limit vars are validated (must be a positive integer) and optional; an invalid value fails fast at startup. ### Built-in MCP servers (optional setup) diff --git a/ROADMAP.md b/ROADMAP.md index aa79c3088..7c59c1f6a 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -1,6 +1,6 @@ # Roadmap / Help Wanted -Odysseus is on a voyage, but not home yet. It works great for me (lol), but this is ship is moving fast and feedback/help would be appreciated! (I dont know what I'm doing hlep). +Odysseus is on a voyage, but not home yet. It works great for me (lol), but this ship is moving fast and feedback/help would be appreciated! (I don't know what I'm doing, help). If you see weird CSS, strange layout behavior, or a suspiciously murky corner of the codebase, you are probably right to stay away. @@ -8,25 +8,60 @@ the codebase, you are probably right to stay away. ## High Priority - SQUASH BUGS -- Fresh Docker install smoke tests on Linux, macOS, and Windows!! +- Fresh install smoke tests on Linux, macOS, and Windows. Docker, native Python, + and WSL all need coverage. - Integration audit: do integrations even work? Confirm what works, what needs setup docs, and what should be removed or hidden. - Self-host troubleshooting cookbook. Document the weird 30-second fixes that otherwise become 30-minute searches: Dovecot cleartext auth for local stacks, ntfy Android Instant Delivery for non-ntfy.sh servers, clipboard limits on plain-HTTP Tailscale URLs, Radicale collection URLs, and similar traps. - Cookbook reliability on other computers. This is probably the area most likely to need work across different machines, GPUs, drivers, shells, and Python environments. -- Tile/window management correctness. I had to brute force my way a bit here, I'm aware, popups, dropdowns, and fixed-position UI inside transformed modals can land in the wrong place. -- Esc button, it's small but a lot of windows that arent still close on esc and alot of them doesnt. -- Skill audit, how does your model respond to skill injection, does it follow? Does its parsing miss? +- Cookbook SGLang support across platforms. Make sure SGLang setup/serve works + predictably on Linux, Windows/WSL, macOS where possible, Docker, and common + NVIDIA/AMD hardware paths. +- Deep Research model presets by hardware. Recommend approved model/parameter + profiles for small, medium, and large local setups so people with different + hardware can use Deep Research without guessing. Surface this either in Deep + Research settings or as a Cookbook scan/dropdown suggestion. +- Cookbook model scan/download ranking. Prioritize newer architectures and + better hardware-fit models instead of scoring everything almost the same. + Ranking should account for architecture age, quant format, VRAM/RAM fit, + backend support, vision/mmproj requirements, and likely serve reliability. +- Cookbook error feedback and logging. Failed downloads, dependency installs, + preflights, and serve jobs should show the actual command/output/error in the + UI, with copyable logs and clear next steps instead of just "crashed". +- Agent prompt/context bloat. Agent mode is too heavy for smaller local models: + tool schemas, skills, memory, documents, and instructions can eat the context + before the user request really starts. We need slimmer prompts, better tool + selection, smaller default tool sets, and clearer guidance for models with + 4k/8k/16k context windows. +- Skill/tool prompt-injection audit. User-editable skills, notes, documents, + fetched pages, and memories should be treated as untrusted data. Keep testing + whether models follow malicious instructions from those surfaces. - Better degraded-state reporting for ChromaDB, SearXNG, email, ntfy, and provider probes. +- Email performance audit. Fetching, searching, opening, deleting, and sending + email can feel slow, especially over IMAP/SMTP providers with high latency. + Need someone who knows mail performance to profile the current flow, identify + whether the bottleneck is IMAP folder select/fetch, cache invalidation, + attachment/body loading, SMTP handshakes, or frontend refresh behavior, then + propose safer caching/prefetch/batching without breaking multi-account state. - Provider setup/probing audit for Anthropic, Gemini, Groq, xAI, OpenRouter, OpenAI, and DeepSeek. ## Refactor Targets - CSS cleanup. `static/style.css` basically Calypso's island atm. - Tour core helper. The onboarding tours have too much copy-pasted scaffolding; promote a shared `tour-core.js` helper before adding more tours. +- Modal/window positioning cleanup. Some window controls have improved, but the + underlying popup/dropdown/fixed-position behavior is still too fragile. - Mobile media override discoverability. A lot of "CSS did not move" bugs are mobile `@media` overrides of the same selector; comments or linting around desktop/mobile paired rules would help. - Dead code pass for old routes, stale feature flags, and unused UI states. ## Frontend +- Expand the Editor for quicker, more robust everyday use. Better file/document + handling, smoother window behavior, clearer save/export flows, stronger image + editing affordances, and fewer brittle edge cases. +- Better AI integration for Notes and Todos. Notes should be easier for the + agent to read, update, summarize, and turn into actions. Todos should be + assignable to an agent from the UI, possibly through a button, task action, + or dedicated skill/tool flow. - Mobile gallery/editor polish. Easier to launch/download inpaint model or any missing pieces. - Accessibility pass: keyboard navigation, focus states, contrast, reduced motion. - Improve empty states and error messages on fresh installs. diff --git a/SECURITY.md b/SECURITY.md index 2cca34be9..1fa5b0b3b 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -8,16 +8,20 @@ Security fixes are handled on the default branch until formal releases are cut. ## Deployment Guidance -- Keep `AUTH_ENABLED=true`. +- Keep `AUTH_ENABLED=true` for any network-accessible deployment. +- Keep `LOCALHOST_BYPASS=false` outside local development. +- Set `SECURE_COOKIES=true` when Odysseus is served through HTTPS by a trusted reverse proxy or private access gateway. - Use HTTPS when exposing the app beyond localhost. -- Put the app behind a trusted reverse proxy or private network. -- Protect `.env`, `data/`, logs, uploaded files, generated media, and database files. +- Put the authenticated Odysseus web/API entrypoint behind a trusted reverse proxy or private access layer such as Cloudflare Access, Tailscale, or a VPN. +- Keep ChromaDB, SearXNG, ntfy, Ollama, vLLM, llama.cpp, databases, and raw model/provider APIs internal-only. +- Protect `.env`, `data/`, `logs/`, uploads, generated media, backups, auth/session files, database files, API keys, and model/provider tokens. - Disable open signup unless you intentionally want new accounts. - Keep demo/test users non-admin, and remove them entirely on serious deployments. - Give admin accounts strong passwords and enable 2FA where possible. - Leave high-risk agent tools restricted to admins: shell, Python, file read/write, email send/read, MCP, app API, task/skill/memory management, settings, tokens, and model serving. - Rotate API keys, webhook secrets, and Odysseus API tokens if they appear in logs, screenshots, demos, or shared chats. - Treat shell, model-serving, MCP, email, calendar, and vault features as privileged admin functionality. +- Common internal-only ports are Odysseus `7000`, SearXNG `8080`, ntfy `8091`, ChromaDB `8100`, Ollama `11434`, and local model/provider APIs such as `8000-8020`. ## Publishing A Fork @@ -29,7 +33,7 @@ git check-ignore -v .env data/auth.json data/app.db logs/compound.log odysseus.d git grep -n -I -E "(sk-[A-Za-z0-9_-]{20,}|xox[baprs]-|AIza[0-9A-Za-z_-]{20,}|Bearer [A-Za-z0-9._~+/-]{20,})" -- . ':!static/lib/**' ':!package-lock.json' ``` -Only `.env.example`, docs, source, tests, and static assets should be committed. Never commit live `data/` contents, local databases, uploaded files, generated media, logs, backups, API keys, password hashes, or personal documents. +Only `.env.example`, docs, source, tests, and static assets should be committed. Never commit live `.env` values, `data/` contents, local databases, uploaded files, generated media, logs, backups, auth/session files, API keys, model/provider tokens, password hashes, or personal documents. ## Reporting diff --git a/THREAT_MODEL.md b/THREAT_MODEL.md new file mode 100644 index 000000000..48665a61d --- /dev/null +++ b/THREAT_MODEL.md @@ -0,0 +1,81 @@ +# Threat Model + +Odysseus is a **self-hosted AI workspace with privileged local access**. This document states the trust boundary so contributors can reason about security decisions without reading through the full auth and middleware stack. + +## Trust Boundary + +Odysseus is designed for **trusted users on a private network**, not public exposure. The README describes it as "treat it like an admin console" — that framing is accurate. A logged-in admin can execute shell commands, read and write files, send email, and control model serving. This is intentional. The threat model does not try to prevent admins from doing these things. It does try to prevent: + +- Unauthenticated access +- Non-admins reaching admin-only capabilities +- The AI agent acting on instructions injected through untrusted content (web results, emails, fetched pages, memories) +- Internal services (ChromaDB, Ollama, SearXNG, etc.) being reachable from outside the host + +## Roles and Capabilities + +| Capability | Admin | Non-admin (default) | +|---|---|---| +| Chat with agent | ✓ | ✓ | +| Browser tool | ✓ | ✓ | +| Documents | ✓ | ✓ | +| Research mode | ✓ | ✓ | +| Image generation | ✓ | ✓ | +| Memory management | ✓ | ✓ | +| Shell / Python execution | ✓ | ✗ | +| File read / write | ✓ | ✗ | +| Email send / read | ✓ | ✗ | +| MCP tools | ✓ | ✗ | +| Calendar management | ✓ | ✗ | +| Token / webhook management | ✓ | ✗ | +| Model serving | ✓ | ✗ | +| Vault | ✓ | ✗ | +| Settings | ✓ | ✗ | + +Non-admin defaults are in `core/auth.py:DEFAULT_PRIVILEGES`. Tool enforcement is in `src/tool_security.py:NON_ADMIN_BLOCKED_TOOLS`. Any tool whose name starts with `mcp__` is also blocked for non-admins. Admins always get full access regardless of stored privilege values. + +## Authentication + +- **Sessions:** bcrypt passwords, 7-day session tokens stored atomically in `data/sessions.json` via `core/atomic_io.py`. +- **2FA:** TOTP with 8 single-use backup codes. Verified after password check, before session issuance. +- **Reserved usernames:** `internal-tool`, `api`, `demo`, `system` cannot be registered or renamed into. Defined in `core/auth.py:RESERVED_USERNAMES`. + - `internal-tool` is security-critical: `core/middleware.py:require_admin` treats any request where `request.state.current_user == "internal-tool"` as the in-process tool loopback and grants admin unconditionally. A real account with that name would silently pass every `require_admin` check. +- **Orphan sessions:** `validate_token` re-checks that the user record still exists on every call. A deleted user's cookie is dropped on next request rather than continuing to authenticate. + +## Internal Tool Loopback + +Agent tool calls reach admin-gated HTTP routes over an in-process HTTP loopback. The mechanism: + +1. At app startup, `core/middleware.py` generates a random `INTERNAL_TOOL_TOKEN` via `secrets.token_hex(32)`. It is never persisted and never sent to clients. +2. Loopback requests carry `X-Odysseus-Internal-Token: ` or have `request.state.current_user` already set to `"internal-tool"` by the auth middleware. +3. `require_admin` recognises either signal and grants access without checking the session user. + +The agent may be running in a non-admin user's session, but tool dispatch first calls `src/tool_security.py:owner_is_admin_or_single_user` to verify the session owner is an admin before issuing any loopback call. Non-admin users cannot invoke admin tools even via the agent. + +## Prompt-Injection Hardening + +External content that reaches the LLM is treated as untrusted via `src/prompt_security.py`: + +- `untrusted_context_message(label, content)` wraps the content in a `user`-role message with a header block instructing the model not to follow instructions inside it. Content goes in as data, not as a system instruction. +- `UNTRUSTED_CONTEXT_POLICY` is a system-prompt preamble that states the same policy at the top of every session where untrusted data may appear. + +**Untrusted surfaces that must go through this wrapper:** web search results, fetched URLs, emails (read), saved memories, skill text, notes, and any tool output sourced from outside the server. Injecting untrusted content directly into the system role is a security bug. + +## Security Headers + +`core/middleware.py:SecurityHeadersMiddleware` sets headers on every response: + +- `X-Frame-Options: DENY` + `frame-ancestors 'none'` on all routes except tool-render iframes (which are sandboxed at the HTML level). +- `X-Content-Type-Options: nosniff` and `Referrer-Policy: no-referrer` everywhere. +- **CSP:** nonce-based `script-src 'self' 'nonce-{nonce}' https://cdn.jsdelivr.net`. `style-src 'unsafe-inline'` is intentionally kept — `static/index.html` ships inline ` +
+

Pair a device

+

Generate a one-time pairing code (a chat-scoped API token) for a LAN client.

+
+ +
+

Admin only. Each code mints a new token, shown once. Manage or revoke under Settings → API tokens.

+
""" + return HTMLResponse(page) + + @router.post("/pair") + def pair_create(request: Request): + """Mint a pairing code. Admin-cookie only; CSRF-safe because the + SameSite=Lax session cookie is not sent on a cross-site POST (same + protection as POST /api/tokens). Minting invalidates the token cache so + the code works immediately, no restart. `?format=json` returns the + payload for an in-app pairing screen.""" + require_admin(request) + owner = get_current_user(request) + invalidate = getattr(request.app.state, "invalidate_token_cache", None) + token_id, raw_token = mint_pairing_token(owner, invalidate) + + hosts = _pairing.lan_ip_candidates() + host = hosts[0] if hosts else "127.0.0.1" + port = request.url.port or _pairing.default_port() + payload = _pairing.pairing_payload(host, port, raw_token) + qr = _pairing.pairing_qr_png_data_uri(payload) + qr_ok = bool(qr and qr.startswith("data:image/png;base64,")) + + if (request.query_params.get("format") or "").lower() == "json": + return { + "host": host, + "port": port, + "token": raw_token, + "token_id": token_id, + "hosts": hosts, + "payload": payload, + "qr": qr if qr_ok else None, + } + + import json as _json + payload_json = _json.dumps(payload, separators=(",", ":")) + # Only ever emit a known PNG data-URI into the src; every other value is + # html.escaped. + qr_block = ( + f'Pairing QR' + if qr_ok else "

QR rendering unavailable -- enter the details manually.

" + ) + page = f""" + +Pairing code + +
+

Pairing code

+ {qr_block} +
Host: {html.escape(host)}
+
Port: {html.escape(str(port))}
+
Token: {html.escape(raw_token)}
+
Payload: {html.escape(payload_json)}
+

Shown once. This grants chat access to your Odysseus; revoke it + in Settings → API tokens (id {html.escape(token_id)}). The + device must be on the same network, and the server must bind to your LAN.

+
""" + return HTMLResponse(page) + + return router diff --git a/core/auth.py b/core/auth.py index 4d355542e..5db2fed4c 100644 --- a/core/auth.py +++ b/core/auth.py @@ -30,16 +30,42 @@ DEFAULT_PRIVILEGES = { "can_manage_memory": True, "max_messages_per_day": 0, "allowed_models": [], + "allowed_models_restricted": False, + # Explicit "block every model" sentinel. An empty `allowed_models` list is + # ambiguous — it's also what gets sent when the admin clicks "[All]" — so + # we need a dedicated flag to express "this user may use no models at all" + # distinctly from "this user has no restriction". + "block_all_models": False, } # Admins get everything ADMIN_PRIVILEGES = {k: (True if isinstance(v, bool) else (0 if isinstance(v, int) else [])) for k, v in DEFAULT_PRIVILEGES.items()} +ADMIN_PRIVILEGES["allowed_models_restricted"] = False +# Admins must never be blocked from using models — the generic dict +# comprehension above flips every boolean default to True, which would be +# backwards for this sentinel. +ADMIN_PRIVILEGES["block_all_models"] = False -DEFAULT_AUTH_PATH = os.path.join( - Path(__file__).parent.parent, "data", "auth.json" -) +from src.constants import AUTH_FILE +DEFAULT_AUTH_PATH = AUTH_FILE TOKEN_TTL = 60 * 60 * 24 * 7 # 7 days +# Usernames the auth + middleware layer reserve as internal "synthetic owner" +# sentinels; they must never belong to a real account. The most dangerous is +# "internal-tool": `core.middleware.require_admin` treats any request whose +# `current_user == "internal-tool"` as the in-process tool loopback and grants +# admin, and because the cookie auth path sets `current_user` to the raw +# username, an account literally named "internal-tool" would be silently +# treated as an admin by every `require_admin`-gated route. "api" collides with +# the bearer-token owner-attribution sentinel. "demo"/"system" round out the +# synthetic-owner set the rest of the codebase already special-cases (see +# `_SYNTHETIC_OWNERS` in routes/assistant_routes.py and the matching guards in +# src/task_scheduler.py / routes/research_routes.py) — a real account with one +# of those names would be denied an assistant and inconsistently owner-scoped. +# Refuse to create or rename into any of them so the sentinels can't be +# impersonated. (Keep this in sync with that synthetic-owner set.) +RESERVED_USERNAMES = frozenset({"internal-tool", "api", "demo", "system"}) + def _hash_password(password: str) -> str: return bcrypt.hashpw(password.encode("utf-8"), bcrypt.gensalt()).decode("utf-8") @@ -60,6 +86,13 @@ class AuthManager: # Guards mutations of self._sessions and the on-disk sessions.json. # Validate/create/revoke run concurrently from the FastAPI threadpool. self._sessions_lock = threading.RLock() + # Guards all mutations of self._config and the on-disk auth.json so + # concurrent create/delete/rename/privilege operations don't interleave + # and corrupt the user database. + self._config_lock = threading.Lock() + # Guards the first-run setup check-and-write so concurrent requests + # cannot both observe is_configured==False and both create admin accounts. + self._setup_lock = threading.Lock() self._load() self._load_sessions() self._migrate_single_user() @@ -70,6 +103,15 @@ class AuthManager: if os.path.exists(self.auth_path): with open(self.auth_path, "r", encoding="utf-8") as f: self._config = json.load(f) + # Normalize all stored usernames to lowercase so they match + # the .strip().lower() applied at login/verify time. Fixes + # "Invalid credentials" when auth.json was written with + # mixed-case keys (e.g. via manual edit or a future migration). + if "users" in self._config: + self._config["users"] = { + k.strip().lower(): v + for k, v in self._config["users"].items() + } logger.info("Auth config loaded") else: self._config = {} @@ -144,8 +186,9 @@ class AuthManager: @signup_enabled.setter def signup_enabled(self, value: bool): - self._config["signup_enabled"] = value - self._save() + with self._config_lock: + self._config["signup_enabled"] = value + self._save() @property def is_configured(self) -> bool: @@ -157,24 +200,31 @@ class AuthManager: def setup(self, username: str, password: str) -> bool: """First-run admin setup. Only works if no users exist.""" - if self.is_configured: - return False - return self.create_user(username, password, is_admin=True) + with self._setup_lock: + if self.is_configured: + return False + return self.create_user(username, password, is_admin=True) def create_user(self, username: str, password: str, is_admin: bool = False) -> bool: """Create a new user account.""" username = username.strip().lower() - if username in self.users: + if not username: return False - if "users" not in self._config: - self._config["users"] = {} - self._config["users"][username] = { - "password_hash": _hash_password(password), - "created": time.time(), - "is_admin": is_admin, - "privileges": dict(ADMIN_PRIVILEGES if is_admin else DEFAULT_PRIVILEGES), - } - self._save() + if username in RESERVED_USERNAMES: + logger.warning("Refused to create reserved username '%s'", username) + return False + with self._config_lock: + if username in self.users: + return False + if "users" not in self._config: + self._config["users"] = {} + self._config["users"][username] = { + "password_hash": _hash_password(password), + "created": time.time(), + "is_admin": is_admin, + "privileges": dict(ADMIN_PRIVILEGES if is_admin else DEFAULT_PRIVILEGES), + } + self._save() logger.info(f"Created user '{username}' (admin={is_admin})") return True @@ -187,14 +237,15 @@ class AuthManager: their cookie expired naturally (default ~30 days). """ username = username.strip().lower() - if username not in self.users: - return False - if username == requesting_user: - return False - if not self.users.get(requesting_user, {}).get("is_admin"): - return False - del self._config["users"][username] - self._save() + with self._config_lock: + if username not in self.users: + return False + if username == requesting_user: + return False + if not self.users.get(requesting_user, {}).get("is_admin"): + return False + del self._config["users"][username] + self._save() # Purge all sessions belonging to this user. validate_token doesn't # cross-check `self.users`, so without this step a deleted user's # cookie keeps authenticating. @@ -207,6 +258,18 @@ class AuthManager: revoked += 1 if revoked: self._save_sessions() + # Also revoke API bearer tokens owned by this user. The bearer auth + # path authenticates straight against ApiToken rows and never + # re-checks that the owner still exists, so leaving the rows behind + # would let a deleted user keep full API access indefinitely. + try: + from core.database import get_db_session, ApiToken + with get_db_session() as db: + removed = db.query(ApiToken).filter(ApiToken.owner == username).delete() + if removed: + logger.info(f"Revoked {removed} API token(s) owned by deleted user '{username}'") + except Exception: + logger.warning(f"Failed to revoke API tokens for deleted user '{username}'") logger.info(f"Deleted user '{username}' (by {requesting_user}); revoked {revoked} active session(s)") return True @@ -217,19 +280,24 @@ class AuthManager: requesting_user = (requesting_user or "").strip().lower() if not old_username or not new_username: return False - if old_username not in self.users: + if new_username in RESERVED_USERNAMES: + logger.warning("Refused to rename '%s' into reserved username '%s'", old_username, new_username) return False - if new_username in self.users: - return False - if not self.users.get(requesting_user, {}).get("is_admin"): - return False - self._config.setdefault("users", {})[new_username] = self._config["users"].pop(old_username) - self._save() + with self._config_lock: + if old_username not in self.users: + return False + if new_username in self.users: + return False + if not self.users.get(requesting_user, {}).get("is_admin"): + return False + self._config.setdefault("users", {})[new_username] = self._config["users"].pop(old_username) + self._save() renamed_sessions = 0 with self._sessions_lock: for sess in self._sessions.values(): - if (sess or {}).get("username") == old_username: + sess_user = str((sess or {}).get("username") or "").strip().lower() + if sess_user == old_username: sess["username"] = new_username renamed_sessions += 1 if renamed_sessions: @@ -261,17 +329,18 @@ class AuthManager: def set_privileges(self, username: str, privileges: Dict[str, Any]) -> bool: """Update privileges for a user. Can't modify admin privileges.""" username = username.strip().lower() - if username not in self.users: - return False - if self.users[username].get("is_admin"): - return False # admins always have full access - # Only allow known privilege keys - current = self.get_privileges(username) - for k, v in privileges.items(): - if k in DEFAULT_PRIVILEGES: - current[k] = v - self._config["users"][username]["privileges"] = current - self._save() + with self._config_lock: + if username not in self.users: + return False + if self.users[username].get("is_admin"): + return False # admins always have full access + # Only allow known privilege keys + current = self.get_privileges(username) + for k, v in privileges.items(): + if k in DEFAULT_PRIVILEGES: + current[k] = v + self._config["users"][username]["privileges"] = current + self._save() logger.info(f"Updated privileges for '{username}': {current}") return True @@ -281,8 +350,9 @@ class AuthManager: return False if not _verify_password(current_password, self.users[username]["password_hash"]): return False - self._config["users"][username]["password_hash"] = _hash_password(new_password) - self._save() + with self._config_lock: + self._config["users"][username]["password_hash"] = _hash_password(new_password) + self._save() return True # ------------------------------------------------------------------ @@ -300,8 +370,9 @@ class AuthManager: if username not in self.users: return None secret = pyotp.random_base32() - self._config["users"][username]["totp_secret_pending"] = secret - self._save() + with self._config_lock: + self._config["users"][username]["totp_secret_pending"] = secret + self._save() return secret def totp_get_provisioning_uri(self, username: str, secret: str) -> str: @@ -320,13 +391,14 @@ class AuthManager: if not totp.verify(code, valid_window=1): return False # Enable 2FA - self._config["users"][username]["totp_secret"] = secret - self._config["users"][username]["totp_enabled"] = True - self._config["users"][username].pop("totp_secret_pending", None) - # Generate backup codes - backup = [secrets.token_hex(4) for _ in range(8)] - self._config["users"][username]["totp_backup_codes"] = backup - self._save() + with self._config_lock: + self._config["users"][username]["totp_secret"] = secret + self._config["users"][username]["totp_enabled"] = True + self._config["users"][username].pop("totp_secret_pending", None) + # Generate backup codes + backup = [secrets.token_hex(4) for _ in range(8)] + self._config["users"][username]["totp_backup_codes"] = backup + self._save() logger.info(f"2FA enabled for '{username}'") return True @@ -338,13 +410,17 @@ class AuthManager: return True # 2FA not enabled, always pass secret = user.get("totp_secret") if not secret: - return True + # 2FA is enabled but no secret is stored (corrupt/partially-written + # auth.json). Fail closed — returning True here bypassed the second + # factor entirely. + return False # Check backup codes first backup = user.get("totp_backup_codes", []) if code in backup: - backup.remove(code) - self._config["users"][username]["totp_backup_codes"] = backup - self._save() + with self._config_lock: + backup.remove(code) + self._config["users"][username]["totp_backup_codes"] = backup + self._save() logger.info(f"Backup code used for '{username}' ({len(backup)} remaining)") return True totp = pyotp.TOTP(secret) @@ -355,11 +431,12 @@ class AuthManager: username = username.strip().lower() if not self.verify_password(username, password): return False - self._config["users"][username].pop("totp_secret", None) - self._config["users"][username].pop("totp_secret_pending", None) - self._config["users"][username].pop("totp_backup_codes", None) - self._config["users"][username]["totp_enabled"] = False - self._save() + with self._config_lock: + self._config["users"][username].pop("totp_secret", None) + self._config["users"][username].pop("totp_secret_pending", None) + self._config["users"][username].pop("totp_backup_codes", None) + self._config["users"][username]["totp_enabled"] = False + self._save() logger.info(f"2FA disabled for '{username}'") return True @@ -378,6 +455,12 @@ class AuthManager: username = username.strip().lower() if not self.verify_password(username, password): return None + return self.create_session_trusted(username) + + def create_session_trusted(self, username: str) -> str: + """Issue a session token for an already-verified user. + Call only after verify_password (and TOTP if enabled) have passed.""" + username = username.strip().lower() token = secrets.token_hex(32) with self._sessions_lock: self._sessions[token] = { @@ -442,6 +525,22 @@ class AuthManager: self._sessions.pop(token, None) self._save_sessions() + def revoke_user_sessions(self, username: str, except_token: Optional[str] = None) -> int: + """Revoke active browser sessions for a user, optionally preserving one.""" + username = username.strip().lower() + revoked = 0 + with self._sessions_lock: + to_drop = [ + token for token, session in self._sessions.items() + if token != except_token and (session or {}).get("username") == username + ] + for token in to_drop: + self._sessions.pop(token, None) + revoked += 1 + if revoked: + self._save_sessions() + return revoked + def status(self, token: Optional[str]) -> Dict[str, Any]: username = self.get_username_for_token(token) authenticated = username is not None diff --git a/core/constants.py b/core/constants.py index 5dcf9e91e..d71bb0aed 100644 --- a/core/constants.py +++ b/core/constants.py @@ -1,40 +1,12 @@ -# src/constants.py -"""Application-wide constants and configuration values.""" -import os +# core/constants.py +"""Backward-compatible shim — the single source of truth is src/constants.py. -APP_VERSION = "0.9.1" - -# Base paths -BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + "/" -STATIC_DIR = os.path.join(BASE_DIR, "static") -DATA_DIR = os.path.join(BASE_DIR, "data") - -# Data file paths -SESSIONS_FILE = os.path.join(DATA_DIR, "sessions.json") -MEMORY_FILE = os.path.join(DATA_DIR, "memory.json") -MEMORY_DOC = os.path.join(DATA_DIR, "memory_doc.md") -PERSONAL_DIR = os.path.join(DATA_DIR, "personal_docs") -RUNBOOK_DIR = os.path.join(PERSONAL_DIR, "runbook") -UPLOAD_DIR = os.path.join(DATA_DIR, "uploads") -FEATURES_FILE = os.path.join(DATA_DIR, "features.json") -SETTINGS_FILE = os.path.join(DATA_DIR, "settings.json") - -# API Configuration -MAX_CONTEXT_MESSAGES = 90 -REQUEST_TIMEOUT = 20 -OPENAI_COMPAT_PATH = "/v1/chat/completions" - -# Environment variables with defaults -DEFAULT_HOST = os.getenv("LLM_HOST", "localhost") -LLM_HOSTS = [h.strip() for h in os.getenv("LLM_HOSTS", "").split(",") if h.strip()] -OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") -SEARXNG_INSTANCE = os.getenv('SEARXNG_INSTANCE', 'http://localhost:8080') - - -# Cleanup configuration -CLEANUP_ENABLED = os.getenv("CLEANUP_ENABLED", "True").lower() == "true" -CLEANUP_INTERVAL_HOURS = int(os.getenv("CLEANUP_INTERVAL_HOURS", "24")) - -# Default parameters -DEFAULT_TEMPERATURE = 1.0 -DEFAULT_MAX_TOKENS = 0 +Historically there were two copies of this module (this one lagged behind at +APP_VERSION 0.9.1 and was missing the consolidated tool-output constants). To +kill the drift, this now simply re-exports everything from src.constants so +there is exactly one place that defines paths and reads ODYSSEUS_DATA_DIR. +internal_api_base() also lives in src.constants now and is re-exported here so +existing `from core.constants import internal_api_base` callers keep working. +""" +from src.constants import * # noqa: F401,F403 +from src.constants import internal_api_base # noqa: F401 (explicit: functions aren't covered by some linters' * checks) diff --git a/core/database.py b/core/database.py index 745c42d55..ee365c30c 100644 --- a/core/database.py +++ b/core/database.py @@ -1,7 +1,9 @@ import os import logging -from datetime import datetime -from sqlalchemy import create_engine, Column, String, Text, Boolean, DateTime, Integer, ForeignKey, JSON, Index, func, text +import sqlite3 +from datetime import datetime, timezone +from sqlalchemy import event, create_engine, Column, String, Text, Boolean, DateTime, Integer, ForeignKey, JSON, Index, func, text +from sqlalchemy.engine import Engine from sqlalchemy.types import TypeDecorator from sqlalchemy.ext.declarative import declarative_base, declared_attr from sqlalchemy.orm import relationship, sessionmaker, backref @@ -11,18 +13,25 @@ logger = logging.getLogger(__name__) # Create base class for declarative models Base = declarative_base() + +def utcnow_naive() -> datetime: + """Return naive UTC for existing DateTime columns.""" + return datetime.now(timezone.utc).replace(tzinfo=None) + + class TimestampMixin: """Mixin that adds timestamp fields to models""" @declared_attr def created_at(cls): - return Column(DateTime, default=datetime.utcnow, nullable=False) + return Column(DateTime, default=utcnow_naive, nullable=False) @declared_attr def updated_at(cls): - return Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False) + return Column(DateTime, default=utcnow_naive, onupdate=utcnow_naive, nullable=False) -# Get database URL from environment, default to SQLite -DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///./data/app.db") +# Get database URL from environment, default to SQLite in DATA_DIR +from src.constants import DATA_DIR, AUTH_FILE, MEMORY_FILE, USER_PREFS_FILE, SETTINGS_FILE +DATABASE_URL = os.getenv("DATABASE_URL", f"sqlite:///{DATA_DIR}/app.db") # Create engine engine = create_engine( @@ -34,6 +43,18 @@ engine = create_engine( SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) +# Listening on the Engine class ensures this listener fires for all Engine +# instances created within the process, not just the primary application engine. +# The isinstance(sqlite3.Connection) check ensures that this PRAGMA foreign_keys=ON +# configuration remains a no-op when using non-SQLite database backends. +@event.listens_for(Engine, "connect") +def set_sqlite_pragma(dbapi_connection, connection_record): + if isinstance(dbapi_connection, sqlite3.Connection): + cursor = dbapi_connection.cursor() + cursor.execute("PRAGMA foreign_keys=ON") + cursor.close() + + class EncryptedText(TypeDecorator): """Text column transparently encrypted at rest via src.secret_storage. @@ -157,7 +178,7 @@ class ChatMessage(Base): meta_data = Column("metadata", Text, nullable=True) # JSON string for metrics etc. # Timestamp - timestamp = Column(DateTime, default=datetime.utcnow) + timestamp = Column(DateTime, default=utcnow_naive) # Relationship to Session session = relationship("Session", back_populates="messages") @@ -210,7 +231,7 @@ class DocumentVersion(Base): content = Column(Text, nullable=False) summary = Column(String, nullable=True) # Edit description source = Column(String, default="ai") # "ai" or "user" - created_at = Column(DateTime, default=datetime.utcnow) + created_at = Column(DateTime, default=utcnow_naive) document = relationship("Document", back_populates="versions") @@ -298,6 +319,7 @@ class EmailAccount(TimestampMixin, Base): # SMTP (sending) smtp_host = Column(String, default="") smtp_port = Column(Integer, default=465) + smtp_security = Column(String, default="ssl") # ssl | starttls | none smtp_user = Column(String, default="") smtp_password = Column(String, default="") @@ -319,7 +341,16 @@ class ModelEndpoint(TimestampMixin, Base): is_enabled = Column(Boolean, default=True) hidden_models = Column(Text, nullable=True) # JSON list of model IDs that failed probing cached_models = Column(Text, nullable=True) # JSON list of last-known model IDs (avoids probe on list) + pinned_models = Column(Text, nullable=True) # JSON list of admin-pinned model IDs (manual, may not appear in /v1/models) model_type = Column(String, nullable=True, default="llm") # "llm" or "image" + # auto = classify by URL; local = self-hosted server; api/proxy = external + # OpenAI-compatible API even when reachable through a private/tailnet IP. + endpoint_kind = Column(String, nullable=True, default="auto") + # auto = background refresh with TTL/backoff; manual/disabled = cached-first + # only unless an explicit endpoint probe is requested. + model_refresh_mode = Column(String, nullable=True, default="auto") + model_refresh_interval = Column(Integer, nullable=True, default=None) + model_refresh_timeout = Column(Integer, nullable=True, default=None) # Whether models on this endpoint accept OpenAI-style function # schemas + emit `tool_calls`. Auto-detected at Cookbook auto- # register time from `--enable-auto-tool-choice` in the serve cmd; @@ -330,6 +361,24 @@ class ModelEndpoint(TimestampMixin, Base): # is the historical default. When non-null, the model picker only shows # the endpoint to that user (admins always see everything). owner = Column(String, nullable=True, index=True) + # Optional OAuth/session-backed credential row. Used by subscription-backed + # providers that need refresh tokens instead of a static API key. + provider_auth_id = Column(String, nullable=True, index=True) + + +class ProviderAuthSession(TimestampMixin, Base): + """Encrypted OAuth/session credentials for refresh-aware model providers.""" + __tablename__ = "provider_auth_sessions" + + id = Column(String, primary_key=True, index=True) + provider = Column(String, nullable=False, index=True) + owner = Column(String, nullable=True, index=True) + label = Column(String, nullable=True) + base_url = Column(String, nullable=False) + access_token = Column(EncryptedText, nullable=True) + refresh_token = Column(EncryptedText, nullable=True) + last_refresh = Column(DateTime, nullable=True) + auth_mode = Column(String, nullable=True) class McpServer(TimestampMixin, Base): """Admin-configured MCP (Model Context Protocol) tool servers.""" @@ -345,6 +394,7 @@ class McpServer(TimestampMixin, Base): is_enabled = Column(Boolean, default=True) oauth_config = Column(Text, nullable=True) # JSON: provider, keys_file, token_file, scopes disabled_tools = Column(Text, nullable=True) # JSON array of tool names to hide from LLM + oauth_tokens = Column(EncryptedText, nullable=True) # JSON {tokens, client_info} for generic MCP OAuth, encrypted at rest class Comparison(TimestampMixin, Base): @@ -456,8 +506,8 @@ class UserToolData(Base): tool_id = Column(String, ForeignKey("user_tools.id", ondelete="CASCADE"), nullable=False) key = Column(String, nullable=False) value = Column(Text, nullable=True) - created_at = Column(DateTime, default=datetime.utcnow) - updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow) + created_at = Column(DateTime, default=utcnow_naive) + updated_at = Column(DateTime, default=utcnow_naive, onupdate=utcnow_naive) tool = relationship("UserTool", backref=backref("data_entries", cascade="all, delete-orphan")) @@ -576,7 +626,7 @@ class TaskRun(Base): id = Column(String, primary_key=True, index=True) task_id = Column(String, ForeignKey("scheduled_tasks.id", ondelete="CASCADE"), nullable=False) - started_at = Column(DateTime, nullable=False, default=datetime.utcnow) + started_at = Column(DateTime, nullable=False, default=utcnow_naive) finished_at = Column(DateTime, nullable=True) status = Column(String, default="running") # "running", "success", "error" result = Column(Text, nullable=True) @@ -617,7 +667,7 @@ class Memory(Base): session_id = Column(String, ForeignKey("sessions.id", ondelete="SET NULL"), nullable=True, index=True) # Timestamp as Unix timestamp - timestamp = Column(Integer, default=lambda: int(datetime.utcnow().timestamp())) + timestamp = Column(Integer, default=lambda: int(utcnow_naive().timestamp())) # Relationship to Session session = relationship("Session", backref="memories") @@ -769,6 +819,26 @@ def _migrate_add_model_endpoint_owner_column(): logging.getLogger(__name__).warning(f"model_endpoints.owner migration failed: {e}") +def _migrate_add_provider_auth_id_column(): + """Add provider_auth_id column to model_endpoints if it doesn't exist.""" + import sqlite3 + db_path = DATABASE_URL.replace("sqlite:///", "") + if not os.path.exists(db_path): + return + try: + conn = sqlite3.connect(db_path) + cursor = conn.execute("PRAGMA table_info(model_endpoints)") + columns = [row[1] for row in cursor.fetchall()] + if columns and "provider_auth_id" not in columns: + conn.execute("ALTER TABLE model_endpoints ADD COLUMN provider_auth_id VARCHAR") + conn.execute("CREATE INDEX IF NOT EXISTS ix_model_endpoints_provider_auth_id ON model_endpoints(provider_auth_id)") + conn.commit() + logging.getLogger(__name__).info("Migrated: added 'provider_auth_id' column + index to model_endpoints") + conn.close() + except Exception as e: + logging.getLogger(__name__).warning(f"model_endpoints.provider_auth_id migration failed: {e}") + + def _migrate_add_model_type_column(): """Add model_type column to model_endpoints if it doesn't exist.""" import sqlite3 @@ -787,6 +857,29 @@ def _migrate_add_model_type_column(): except Exception as e: logging.getLogger(__name__).warning(f"model_type migration failed: {e}") +def _migrate_add_model_endpoint_refresh_columns(): + """Add endpoint classification / refresh policy columns if missing.""" + import sqlite3 + db_path = DATABASE_URL.replace("sqlite:///", "") + if not os.path.exists(db_path): + return + try: + conn = sqlite3.connect(db_path) + cursor = conn.execute("PRAGMA table_info(model_endpoints)") + columns = [row[1] for row in cursor.fetchall()] + if columns and "endpoint_kind" not in columns: + conn.execute("ALTER TABLE model_endpoints ADD COLUMN endpoint_kind TEXT DEFAULT 'auto'") + if columns and "model_refresh_mode" not in columns: + conn.execute("ALTER TABLE model_endpoints ADD COLUMN model_refresh_mode TEXT DEFAULT 'auto'") + if columns and "model_refresh_interval" not in columns: + conn.execute("ALTER TABLE model_endpoints ADD COLUMN model_refresh_interval INTEGER") + if columns and "model_refresh_timeout" not in columns: + conn.execute("ALTER TABLE model_endpoints ADD COLUMN model_refresh_timeout INTEGER") + conn.commit() + conn.close() + except Exception as e: + logging.getLogger(__name__).warning(f"model_endpoints refresh-policy migration failed: {e}") + def _migrate_add_task_run_model_column(): """Add model column to task_runs if it doesn't exist (records which model ran).""" import sqlite3 @@ -841,6 +934,24 @@ def _migrate_add_cached_models_column(): except Exception as e: logging.getLogger(__name__).warning(f"cached_models migration failed: {e}") +def _migrate_add_pinned_models_column(): + """Add pinned_models column to model_endpoints if it doesn't exist.""" + import sqlite3 + db_path = DATABASE_URL.replace("sqlite:///", "") + if not os.path.exists(db_path): + return + try: + conn = sqlite3.connect(db_path) + cursor = conn.execute("PRAGMA table_info(model_endpoints)") + columns = [row[1] for row in cursor.fetchall()] + if columns and "pinned_models" not in columns: + conn.execute("ALTER TABLE model_endpoints ADD COLUMN pinned_models TEXT") + conn.commit() + logging.getLogger(__name__).info("Migrated: added 'pinned_models' column to model_endpoints") + conn.close() + except Exception as e: + logging.getLogger(__name__).warning(f"pinned_models migration failed: {e}") + def _migrate_add_notes_sort_order(): """Add sort_order, image_url, repeat columns to notes if they don't exist.""" import sqlite3 @@ -993,7 +1104,7 @@ def _migrate_assign_legacy_owner(): # fell through to "first user" every time. auth_path = os.path.join(os.path.dirname(DATABASE_URL.replace("sqlite:///", "")), "auth.json") if not os.path.isabs(auth_path): - auth_path = os.path.join("data", "auth.json") + auth_path = AUTH_FILE admin_user = None try: with open(auth_path, "r", encoding="utf-8") as f: @@ -1046,7 +1157,7 @@ def _migrate_assign_legacy_owner(): logger.warning(f"Legacy owner migration failed: {e}") # Also migrate memory.json - mem_path = os.path.join("data", "memory.json") + mem_path = MEMORY_FILE try: if os.path.exists(mem_path): with open(mem_path, "r", encoding="utf-8") as f: @@ -1064,7 +1175,7 @@ def _migrate_assign_legacy_owner(): logger.warning(f"memory.json legacy migration failed: {e}") # Also migrate user_prefs.json to per-user format - prefs_path = os.path.join("data", "user_prefs.json") + prefs_path = USER_PREFS_FILE try: if os.path.exists(prefs_path): with open(prefs_path, "r", encoding="utf-8") as f: @@ -1240,6 +1351,23 @@ def _migrate_add_disabled_tools(): except Exception as e: logging.getLogger(__name__).warning(f"disabled_tools migration: {e}") +def _migrate_add_mcp_oauth_tokens_column(): + """Add oauth_tokens column to mcp_servers table if missing. + + The model declares this column as EncryptedText, but the SQL type is plain + TEXT on purpose: EncryptedText is a SQLAlchemy TypeDecorator that encrypts at + the Python layer and stores the ciphertext as TEXT, so the DB column type is + TEXT. This matches the existing encrypted columns (see _migrate_encrypt_*).""" + try: + with engine.connect() as conn: + cols = [r[1] for r in conn.execute(text("PRAGMA table_info(mcp_servers)"))] + if "oauth_tokens" not in cols: + conn.execute(text("ALTER TABLE mcp_servers ADD COLUMN oauth_tokens TEXT")) + conn.commit() + logging.getLogger(__name__).info("Added oauth_tokens column to mcp_servers") + except Exception as e: + logging.getLogger(__name__).warning(f"oauth_tokens migration: {e}") + def _migrate_add_task_v2_columns(): """Add cron_expression, then_task_id, webhook_token to scheduled_tasks.""" new_cols = { @@ -1369,7 +1497,11 @@ class CalendarCal(TimestampMixin, Base): owner = Column(String, nullable=True, index=True) name = Column(String, nullable=False) color = Column(String, default="#5b8abf") - source = Column(String, default="local") # "local" or "timetree" + source = Column(String, default="local") # "local" or "caldav" + # UUID of the CalDAV account in user prefs that owns this calendar. + # NULL for local calendars and for CalDAV calendars created before + # multi-account support was added (treated as "use any configured account"). + account_id = Column(String, nullable=True, index=True) events = relationship("CalendarEvent", back_populates="calendar", cascade="all, delete-orphan") @@ -1396,6 +1528,10 @@ class CalendarEvent(TimestampMixin, Base): importance = Column(String, default="normal") # low | normal | high | critical event_type = Column(String, nullable=True) # work | personal | health | travel | meal | social | admin | other last_pinged = Column(DateTime, nullable=True) # last time the assistant pinged about this event + # "caldav" = pulled from a CalDAV server (so the sync may prune it when it + # vanishes upstream). NULL/local = created locally (agent, email triage, or + # a UI event whose write-back failed) and must NOT be pruned by the sync. + origin = Column(String, nullable=True, index=True) calendar = relationship("CalendarCal", back_populates="events") @@ -1433,7 +1569,7 @@ def _migrate_seed_email_account(): import json as _json import uuid as _uuid from pathlib import Path - settings_file = Path("data/settings.json") + settings_file = Path(SETTINGS_FILE) if not settings_file.exists(): return try: @@ -1446,7 +1582,7 @@ def _migrate_seed_email_account(): if not imap_host and not smtp_host: return # nothing to migrate - now = datetime.utcnow() + now = utcnow_naive() with engine.begin() as conn: conn.execute(text(""" INSERT INTO email_accounts @@ -1483,6 +1619,10 @@ def _migrate_seed_email_account(): logging.getLogger(__name__).warning(f"seed email account migration: {e}") +# WARNING: Foreign-key enforcement is enabled globally for all SQLite connections. +# Any future migrations or schema changes that temporarily violate foreign-key +# constraints will fail. To perform such operations, foreign_keys must be +# temporarily disabled around the migration workflow. def init_db(): """ Initialize the database by creating all tables. @@ -1492,9 +1632,12 @@ def init_db(): Base.metadata.create_all(bind=engine) _migrate_add_hidden_models_column() _migrate_add_cached_models_column() + _migrate_add_pinned_models_column() _migrate_add_notes_sort_order() _migrate_add_model_type_column() + _migrate_add_model_endpoint_refresh_columns() _migrate_add_model_endpoint_owner_column() + _migrate_add_provider_auth_id_column() _migrate_add_supports_tools_column() _migrate_add_task_run_model_column() _migrate_add_owner_column() @@ -1512,17 +1655,142 @@ def init_db(): _migrate_add_oauth_config() _migrate_add_task_automation_columns() _migrate_add_disabled_tools() + _migrate_add_mcp_oauth_tokens_column() _migrate_add_task_v2_columns() _migrate_add_notifications_enabled() _migrate_drop_ping_notes_tasks() _migrate_add_crew_member_id() _migrate_add_assistant_columns() + _migrate_add_email_smtp_security() _migrate_seed_email_account() _migrate_add_calendar_metadata() _migrate_add_calendar_is_utc() + _migrate_add_calendar_origin() + _migrate_add_calendar_account_id() + _migrate_chat_messages_fts() _migrate_encrypt_email_passwords() _migrate_encrypt_signatures() _migrate_encrypt_endpoint_keys() + _migrate_backfill_task_folders() + + +def _migrate_backfill_task_folders(): + """Backfill folder='Tasks' on pre-existing task/research sessions. + + Sessions created by the task scheduler (LLM tasks, action tasks, research + runs) now set folder='Tasks' at creation time. This migration tags any + older sessions that predate that assignment. Idempotent — only touches + rows where folder is NULL or empty and the title matches known prefixes. + """ + try: + with engine.connect() as conn: + cols = [r[1] for r in conn.execute(text("PRAGMA table_info(sessions)"))] + if "folder" not in cols: + return + res = conn.execute(text( + "UPDATE sessions SET folder = 'Tasks' " + "WHERE (folder IS NULL OR folder = '') " + "AND (name LIKE '[Task] %' OR name LIKE '[Research] %')" + )) + conn.commit() + if res.rowcount: + logging.getLogger(__name__).info( + f"Backfilled folder='Tasks' on {res.rowcount} task/research sessions") + except Exception as e: + logging.getLogger(__name__).warning(f"task folder backfill: {e}") + + +def _migrate_chat_messages_fts(): + """Create and backfill the session transcript FTS index for SQLite.""" + if not DATABASE_URL.startswith("sqlite"): + return + + db_path = DATABASE_URL.replace("sqlite:///", "") + if db_path == ":memory:": + return + conn = None + try: + conn = sqlite3.connect(db_path) + try: + conn.execute("CREATE VIRTUAL TABLE IF NOT EXISTS temp._odysseus_fts5_probe USING fts5(content)") + conn.execute("DROP TABLE IF EXISTS temp._odysseus_fts5_probe") + except Exception as e: + logging.getLogger(__name__).warning(f"chat_messages FTS migration skipped; FTS5 unavailable: {e}") + return + + conn.executescript( + """ + CREATE VIRTUAL TABLE IF NOT EXISTS chat_messages_fts USING fts5( + content, + message_id UNINDEXED, + session_id UNINDEXED, + role UNINDEXED + ); + + CREATE TRIGGER IF NOT EXISTS chat_messages_fts_ai + AFTER INSERT ON chat_messages BEGIN + INSERT INTO chat_messages_fts(content, message_id, session_id, role) + VALUES (COALESCE(new.content, ''), new.id, new.session_id, new.role); + END; + + CREATE TRIGGER IF NOT EXISTS chat_messages_fts_ad + AFTER DELETE ON chat_messages BEGIN + DELETE FROM chat_messages_fts WHERE message_id = old.id; + END; + + CREATE TRIGGER IF NOT EXISTS chat_messages_fts_au + AFTER UPDATE ON chat_messages BEGIN + DELETE FROM chat_messages_fts WHERE message_id = old.id; + INSERT INTO chat_messages_fts(content, message_id, session_id, role) + VALUES (COALESCE(new.content, ''), new.id, new.session_id, new.role); + END; + """ + ) + conn.execute( + """ + INSERT INTO chat_messages_fts(content, message_id, session_id, role) + SELECT COALESCE(cm.content, ''), cm.id, cm.session_id, cm.role + FROM chat_messages cm + WHERE NOT EXISTS ( + SELECT 1 FROM chat_messages_fts fts + WHERE fts.message_id = cm.id + ) + """ + ) + conn.commit() + except Exception as e: + logging.getLogger(__name__).warning(f"chat_messages FTS migration failed: {e}") + finally: + try: + conn.close() + except Exception: + pass + + +def _migrate_add_email_smtp_security(): + """Add explicit SMTP security mode for Proton Bridge/custom local SMTP.""" + import sqlite3 + db_path = DATABASE_URL.replace("sqlite:///", "") + if not os.path.exists(db_path): + return + try: + conn = sqlite3.connect(db_path) + cursor = conn.execute("PRAGMA table_info(email_accounts)") + columns = [row[1] for row in cursor.fetchall()] + if columns and "smtp_security" not in columns: + conn.execute("ALTER TABLE email_accounts ADD COLUMN smtp_security TEXT DEFAULT 'ssl'") + conn.execute( + "UPDATE email_accounts SET smtp_security = CASE " + "WHEN COALESCE(smtp_port, 465) = 587 THEN 'starttls' " + "WHEN COALESCE(smtp_port, 465) = 465 THEN 'ssl' " + "ELSE 'ssl' END " + "WHERE smtp_security IS NULL OR smtp_security = ''" + ) + conn.commit() + logging.getLogger(__name__).info("Migrated: added smtp_security column to email_accounts") + conn.close() + except Exception as e: + logging.getLogger(__name__).warning(f"smtp_security migration skipped: {e}") def _migrate_encrypt_endpoint_keys(): @@ -1636,6 +1904,49 @@ def _migrate_add_calendar_is_utc(): logging.getLogger(__name__).warning(f"is_utc migration failed: {e}") +def _migrate_add_calendar_origin(): + """Add `origin` to calendar_events so the CalDAV sync can tell server-pulled + rows (prunable when they vanish upstream) from locally-created ones (agent / + email triage / failed write-back), which must never be pruned. Idempotent.""" + import sqlite3 + db_path = DATABASE_URL.replace("sqlite:///", "") + if not os.path.exists(db_path): + return + try: + conn = sqlite3.connect(db_path) + cursor = conn.execute("PRAGMA table_info(calendar_events)") + columns = [row[1] for row in cursor.fetchall()] + if columns and "origin" not in columns: + conn.execute("ALTER TABLE calendar_events ADD COLUMN origin TEXT") + conn.execute("CREATE INDEX IF NOT EXISTS ix_calendar_events_origin ON calendar_events(origin)") + conn.commit() + logging.getLogger(__name__).info("Migrated: added 'origin' column to calendar_events") + conn.close() + except Exception as e: + logging.getLogger(__name__).warning(f"calendar_events.origin migration failed: {e}") + + +def _migrate_add_calendar_account_id(): + """Add `account_id` to calendars so each CalDAV-backed calendar knows which + credential set (from caldav_accounts in user prefs) owns it. Idempotent.""" + import sqlite3 + db_path = DATABASE_URL.replace("sqlite:///", "") + if not os.path.exists(db_path): + return + try: + conn = sqlite3.connect(db_path) + cursor = conn.execute("PRAGMA table_info(calendars)") + columns = [row[1] for row in cursor.fetchall()] + if columns and "account_id" not in columns: + conn.execute("ALTER TABLE calendars ADD COLUMN account_id TEXT") + conn.execute("CREATE INDEX IF NOT EXISTS ix_calendars_account_id ON calendars(account_id)") + conn.commit() + logging.getLogger(__name__).info("Migrated: added 'account_id' column to calendars") + conn.close() + except Exception as e: + logging.getLogger(__name__).warning(f"calendars.account_id migration failed: {e}") + + def _migrate_add_calendar_metadata(): """Add importance/event_type/last_pinged columns to calendar_events table.""" import sqlite3 @@ -1694,7 +2005,7 @@ def bulk_insert_messages(session_id: str, messages: list): 'session_id': session_id, 'role': msg['role'], 'content': msg['content'], - 'timestamp': datetime.utcnow() + 'timestamp': utcnow_naive() } for msg in messages ] @@ -1705,7 +2016,7 @@ def cleanup_old_sessions(days: int = 30): from datetime import timedelta with get_db_session() as db: - cutoff_date = datetime.utcnow() - timedelta(days=days) + cutoff_date = utcnow_naive() - timedelta(days=days) deleted_count = db.query(Session).filter( Session.archived == True, @@ -1750,7 +2061,7 @@ def update_session_last_accessed(session_id: str): with get_db_session() as db: db_session = db.query(Session).filter(Session.id == session_id).first() if db_session: - db_session.last_accessed = datetime.utcnow() + db_session.last_accessed = utcnow_naive() db.commit() return True return False @@ -1787,6 +2098,32 @@ def get_session_by_id(session_id: str): with get_db_session() as db: return db.query(Session).filter(Session.id == session_id).first() +def get_upcoming_events(owner, horizon_days: int = 60, limit: int = 40): + """Upcoming, non-cancelled events as {uid, title, start} dicts, soonest first. + + owner=None means NO owner scoping (single-user / legacy). Multi-user callers + MUST pass the owning username — otherwise they read every tenant's events. + The autonomous email->calendar pass relies on this to avoid disclosing (and + acting on) other users' calendars.""" + from datetime import timedelta + now = utcnow_naive() + with get_db_session() as db: + q = db.query(CalendarEvent).join(CalendarCal).filter( + CalendarEvent.dtstart >= now, + CalendarEvent.dtstart <= now + timedelta(days=horizon_days), + CalendarEvent.status != "cancelled", + ) + if owner is not None: + q = q.filter(CalendarCal.owner == owner) + return [ + { + "uid": e.uid, + "title": e.summary or "", + "start": e.dtstart.isoformat() if e.dtstart else "", + } + for e in q.order_by(CalendarEvent.dtstart).limit(limit).all() + ] + def archive_session(session_id: str): """Archive a session""" with get_db_session() as db: diff --git a/core/middleware.py b/core/middleware.py index a3e9e9ae9..550ee3bd7 100644 --- a/core/middleware.py +++ b/core/middleware.py @@ -17,6 +17,15 @@ INTERNAL_TOOL_TOKEN = os.environ.get("ODYSSEUS_INTERNAL_TOKEN") or secrets.token INTERNAL_TOOL_HEADER = "X-Odysseus-Internal-Token" +def is_cors_preflight(method: str, headers) -> bool: + """True for a genuine CORS preflight: an OPTIONS request carrying the + Access-Control-Request-Method header. Such requests are credential-less by + design and must reach CORSMiddleware to be answered -- gating them on auth + 401s the preflight and breaks every cross-origin browser/WebView client. + Pure so it can be unit-tested without standing up the app.""" + return method == "OPTIONS" and "access-control-request-method" in headers + + def require_admin(request: Request): """Raise 403 if the current user isn't an admin. Allows access when auth is explicitly disabled, or when the request carries @@ -27,7 +36,8 @@ def require_admin(request: Request): # (b) the auth middleware already validated the token and stamped # request.state.current_user = "internal-tool". try: - if request.headers.get(INTERNAL_TOOL_HEADER) == INTERNAL_TOOL_TOKEN: + hdr = request.headers.get(INTERNAL_TOOL_HEADER) + if hdr and secrets.compare_digest(hdr, INTERNAL_TOOL_TOKEN): return if getattr(request.state, "current_user", None) == "internal-tool": return @@ -57,11 +67,22 @@ class SecurityHeadersMiddleware(BaseHTTPMiddleware): # Tool render endpoints are served inside iframes — allow framing by self is_tool_render = path.startswith("/api/tools/") and path.endswith("/render") + # PDF previews are embedded by the in-app document library. Keep the + # exception route-scoped so normal app pages remain unframeable. + is_document_pdf_preview = path.startswith("/api/document/") and path.endswith("/render-pdf") # Visual report pages are self-contained HTML — need inline scripts + external images is_report = path.startswith("/api/research/report/") response.headers["X-Content-Type-Options"] = "nosniff" response.headers["Referrer-Policy"] = "no-referrer" + response.headers["Permissions-Policy"] = "camera=(), microphone=(self), geolocation=()" + + is_https = ( + request.url.scheme == "https" + or request.headers.get("X-Forwarded-Proto") == "https" + ) + if is_https: + response.headers["Strict-Transport-Security"] = "max-age=31536000; includeSubDomains" if is_report: response.headers["Content-Security-Policy"] = ( @@ -78,6 +99,12 @@ class SecurityHeadersMiddleware(BaseHTTPMiddleware): # sandbox="allow-scripts" attribute provides isolation. # Don't overwrite the route's own restrictive CSP either. pass + elif is_document_pdf_preview: + response.headers["X-Frame-Options"] = "SAMEORIGIN" + response.headers["Content-Security-Policy"] = ( + "default-src 'none'; " + "frame-ancestors 'self'" + ) else: response.headers["X-Frame-Options"] = "DENY" # NOTE: `style-src 'unsafe-inline'` is intentionally retained. diff --git a/core/models.py b/core/models.py index 6914b20a4..1adae65ed 100644 --- a/core/models.py +++ b/core/models.py @@ -76,8 +76,20 @@ class Session: _session_manager._persist_message(self.id, message) def get_context_messages(self) -> List[Dict[str, Any]]: - """Get messages in format for LLM API.""" - return [msg.to_dict() for msg in self.history] + """Get messages in format for LLM API. + + Slash-command / setup replies are persisted to history so they render + in the transcript, but they are UI chatter (e.g. ``/setup ...`` and its + status lines) the user never meant as conversation. They carry + ``metadata.source == "slash"``; exclude them here so they never reach + the model. Display/history-load paths use the raw ``history`` and are + unaffected. + """ + return [ + msg.to_dict() + for msg in self.history + if (msg.metadata or {}).get("source") != "slash" + ] def get(self, key: str, default=None): """Dict-like access for compatibility.""" diff --git a/core/platform_compat.py b/core/platform_compat.py index 01ebe325e..3eda4a107 100644 --- a/core/platform_compat.py +++ b/core/platform_compat.py @@ -14,13 +14,26 @@ Design rules: from __future__ import annotations import os +import ntpath import shutil import subprocess from pathlib import Path +import sys from typing import List, Optional +import platform IS_WINDOWS = os.name == "nt" IS_POSIX = not IS_WINDOWS +# Allows APFEL support and ARM-native binary recommendations on Apple Silicon Macs. +IS_APPLE_SILICON = ( + IS_POSIX + and platform.system() == "Darwin" + and platform.machine().lower() + in { + "arm64", + "aarch64", + } +) # ── File permissions ──────────────────────────────────────────────────────── @@ -52,9 +65,8 @@ def detached_popen_kwargs() -> dict: and is detached from any console. """ if IS_WINDOWS: - flags = ( - getattr(subprocess, "CREATE_NEW_PROCESS_GROUP", 0x00000200) - | getattr(subprocess, "DETACHED_PROCESS", 0x00000008) + flags = getattr(subprocess, "CREATE_NEW_PROCESS_GROUP", 0x00000200) | getattr( + subprocess, "DETACHED_PROCESS", 0x00000008 ) return {"creationflags": flags} return {"start_new_session": True} @@ -134,11 +146,87 @@ _BASH_CACHE: Optional[str] = None _BASH_PROBED = False # Common Git-for-Windows install locations to probe when bash isn't on PATH. -_WINDOWS_BASH_FALLBACKS = ( - r"C:\Program Files\Git\bin\bash.exe", - r"C:\Program Files\Git\usr\bin\bash.exe", - r"C:\Program Files (x86)\Git\bin\bash.exe", +_WINDOWS_BASH_ROOT_ENV_VARS = ( + "ProgramFiles", + "ProgramW6432", + "ProgramFiles(x86)", + "LocalAppData", ) +_WINDOWS_BASH_DEFAULT_ROOTS = ( + r"C:\Program Files\Git", + r"C:\Program Files (x86)\Git", +) +_WINDOWS_BASH_RELATIVE_PATHS = ( + ("bin", "bash.exe"), + ("usr", "bin", "bash.exe"), +) + +# Paths to add to the remote SSH probe command to find tools like nvidia-smi that may not be on PATH. +_SSH_PATH_MEMBERS = ( + "/usr/bin", + "/usr/local/bin", + "/usr/local/cuda/bin", + "/usr/lib/wsl/lib" +) +# Fallback locations for nvidia-smi on WSL and other Linux distros where it may not be on PATH. +NVIDIA_PATH_CANDIDATES = ( + "/usr/bin/nvidia-smi", + "/usr/local/bin/nvidia-smi", + "/usr/local/cuda/bin/nvidia-smi", + "/usr/lib/wsl/lib/nvidia-smi", +) + + +def _ssh_path_override() -> str: + """Build the PATH export snippet used for remote SSH shell probes.""" + return f"export PATH=\"$PATH:{':'.join(_SSH_PATH_MEMBERS)}\"; " + + +SSH_PATH_OVERRIDE = _ssh_path_override() + + +def _windows_bash_fallbacks() -> List[str]: + roots: List[str] = [] + for env_name in _WINDOWS_BASH_ROOT_ENV_VARS: + base = os.environ.get(env_name) + if base: + roots.append(ntpath.join(base, "Git")) + roots.extend(_WINDOWS_BASH_DEFAULT_ROOTS) + + paths: List[str] = [] + seen = set() + for root in roots: + for rel in _WINDOWS_BASH_RELATIVE_PATHS: + path = ntpath.join(root, *rel) + key = path.lower() + if key not in seen: + seen.add(key) + paths.append(path) + return paths + + +def _is_windows_bash_stub(path: str) -> bool: + lowered = path.lower() + return ( + "system32\\bash.exe" in lowered + or "sysnative\\bash.exe" in lowered + or "windowsapps\\bash.exe" in lowered + ) + + +def git_bash_path(path: str | Path) -> str: + """Convert a path to POSIX style suitable for Git Bash on Windows. + + Transforms drive letters (e.g., 'C:\\path') to POSIX '/c/path', + and uses forward slashes. + """ + p = Path(path) + p_str = p.as_posix() + if IS_WINDOWS and len(p_str) >= 2 and p_str[1] == ":": + drive = p_str[0].lower() + return f"/{drive}{p_str[2:]}" + return p_str + def find_bash() -> Optional[str]: @@ -153,9 +241,11 @@ def find_bash() -> Optional[str]: if _BASH_PROBED: return _BASH_CACHE _BASH_PROBED = True - found = shutil.which("bash") + found = which_tool("bash") + if found and IS_WINDOWS and _is_windows_bash_stub(found): + found = None if not found and IS_WINDOWS: - for cand in _WINDOWS_BASH_FALLBACKS: + for cand in _windows_bash_fallbacks(): if os.path.exists(cand): found = cand break @@ -201,3 +291,156 @@ def run_script_argv(script_path) -> List[str]: comspec = os.environ.get("ComSpec", "cmd.exe") return [comspec, "/c", str(script_path)] return ["sh", str(script_path)] + + +def is_wsl() -> bool: + """True if running inside Windows Subsystem for Linux (WSL).""" + import sys + if sys.platform.startswith("linux") or os.name == "posix": + try: + with open("/proc/version", "r") as f: + if "microsoft" in f.read().lower(): + return True + except Exception: + pass + return False + + +def translate_path(path_str: str) -> str: + """Translate a path (possibly a Windows path) to the current OS format. + + Particularly handles Windows paths (e.g. C:\\foo or C:/foo) when running + under WSL, translating them to /mnt/c/foo. + Also handles standard path normalization to avoid string breakages. + """ + if not path_str: + return path_str + + if is_wsl(): + path_str = path_str.replace("\\", "/") + import re + m = re.match(r"^([a-zA-Z]):(.*)", path_str) + if m: + drive = m.group(1).lower() + rest = m.group(2) + if not rest.startswith("/"): + rest = "/" + rest + return f"/mnt/{drive}{rest}" + + try: + return str(Path(path_str).resolve()) + except Exception: + return path_str + + +def get_wsl_windows_user_profile() -> Optional[str]: + """Retrieve the Windows host User Profile path from inside WSL.""" + if not is_wsl(): + return None + try: + r = run_wsl_windows_powershell("Write-Output $env:USERPROFILE", timeout=5) + if r.returncode == 0 and r.stdout.strip(): + return translate_path(r.stdout.strip()) + except Exception: + pass + + try: + users_dir = "/mnt/c/Users" + if os.path.isdir(users_dir): + for entry in os.listdir(users_dir): + if entry not in ("All Users", "Default", "Default User", "desktop.ini", "Public"): + path = os.path.join(users_dir, entry) + if os.path.isdir(path): + return path + except Exception: + pass + return None + + +def _ssh_exec_argv( + remote: str, + ssh_port: str | None, + *, + remote_cmd: str | None = None, + connect_timeout: int | None = None, + strict_host_key_checking: bool | None = None, +) -> list[str]: + """Build a consistent ssh argv for remote command execution.""" + argv = ["ssh"] + if connect_timeout is not None: + argv.extend(["-o", f"ConnectTimeout={int(connect_timeout)}"]) + if strict_host_key_checking is not None: + argv.extend( + [ + "-o", + "StrictHostKeyChecking=yes" + if strict_host_key_checking + else "StrictHostKeyChecking=no", + ] + ) + if ssh_port and ssh_port != "22": + argv.extend(["-p", str(ssh_port)]) + argv.append(remote) + if remote_cmd is not None: + argv.append(remote_cmd) + return argv + + +def run_ssh_command( + remote: str, + ssh_port: str | None, + remote_cmd: str, + *, + timeout: float, + connect_timeout: int | None = None, + strict_host_key_checking: bool | None = None, + text: bool = True, +) -> subprocess.CompletedProcess: + """Run an ssh command with centralized timeout and stderr/stdout capture.""" + return subprocess.run( + _ssh_exec_argv( + remote, + ssh_port, + remote_cmd=remote_cmd, + connect_timeout=connect_timeout, + strict_host_key_checking=strict_host_key_checking, + ), + timeout=timeout, + capture_output=True, + text=text, + ) + + +def _windows_powershell_argv( + command: str, + *, + no_profile: bool = True, + non_interactive: bool = True, +) -> List[str]: + argv: List[str] = ["powershell.exe"] + if no_profile: + argv.append("-NoProfile") + if non_interactive: + argv.append("-NonInteractive") + argv.extend(["-Command", command]) + return argv + + +def run_wsl_windows_powershell( + command: str, + *, + timeout: float = 5, +) -> subprocess.CompletedProcess[str]: + """Run a PowerShell command on the Windows host from WSL. + + Raises ``RuntimeError`` when called outside WSL. + """ + + if not is_wsl(): + raise RuntimeError("run_wsl_windows_powershell is only supported in WSL") + return subprocess.run( + _windows_powershell_argv(command), + capture_output=True, + text=True, + timeout=timeout, + ) diff --git a/core/session_manager.py b/core/session_manager.py index e9a274097..ecc23e088 100644 --- a/core/session_manager.py +++ b/core/session_manager.py @@ -14,7 +14,7 @@ import logging from datetime import datetime, timezone, timedelta from typing import Dict, Optional -from .database import Session as DbSession, ChatMessage as DbChatMessage, Document as DbDocument, SessionLocal +from .database import Session as DbSession, ChatMessage as DbChatMessage, Document as DbDocument, SessionLocal, utcnow_naive from .models import Session, ChatMessage logger = logging.getLogger(__name__) @@ -29,6 +29,21 @@ def _message_timestamp_iso(value: Optional[datetime]) -> Optional[str]: return value.isoformat().replace("+00:00", "Z") +def _parse_msg_content(raw): + """Parse message content from DB — deserialises JSON arrays back to lists + (multimodal content with image/audio attachments).""" + if isinstance(raw, list): + return raw + if isinstance(raw, str) and raw.startswith('[{') and '"type"' in raw: + try: + parsed = json.loads(raw) + if isinstance(parsed, list) and all(isinstance(p, dict) for p in parsed): + return parsed + except (json.JSONDecodeError, ValueError): + pass + return raw + + class SessionManager: """ Manages chat sessions with database persistence. @@ -119,7 +134,7 @@ class SessionManager: meta.setdefault('timestamp', _message_timestamp_iso(db_msg.timestamp)) history.append(ChatMessage( role=db_msg.role, - content=db_msg.content, + content=_parse_msg_content(db_msg.content), metadata=meta, )) else: @@ -134,7 +149,7 @@ class SessionManager: meta.setdefault('timestamp', _message_timestamp_iso(db_msg.timestamp)) history.append(ChatMessage( role=db_msg.role, - content=db_msg.content, + content=_parse_msg_content(db_msg.content), metadata=meta, )) @@ -187,30 +202,43 @@ class SessionManager: """Persist a single message to the database.""" db = SessionLocal() try: + db_session = db.query(DbSession).filter(DbSession.id == session_id).first() + if db_session is None: + # A stream/tool callback can outlive a session delete. Do not + # create a chat_messages row with no parent session; also drop + # any stale cached session so later writes fail closed too. + self.sessions.pop(session_id, None) + logger.warning("Dropping message for deleted session %s", session_id) + return + msg_id = str(uuid.uuid4()) msg_time = datetime.utcnow() if message.metadata is None: message.metadata = {} message.metadata.setdefault('timestamp', _message_timestamp_iso(msg_time)) + # Multimodal content (image/audio attachments) is a list — serialize + # to JSON so the Text column can store it. On reload, _db_to_session + # detects the JSON-array prefix and parses it back. + _content = message.content + if isinstance(_content, list): + _content = json.dumps(_content) db_message = DbChatMessage( id=msg_id, session_id=session_id, role=message.role, - content=message.content, + content=_content, meta_data=json.dumps(message.metadata) if message.metadata else None, timestamp=msg_time, ) db.add(db_message) - db_session = db.query(DbSession).filter(DbSession.id == session_id).first() - if db_session: - db_session.message_count = len(self.sessions.get(session_id, {}).history) if session_id in self.sessions else 0 - _now = datetime.now(timezone.utc) - db_session.last_accessed = _now - # Clean "last conversation" timestamp — only bumped here on a - # real message persist, so it powers an accurate "Last active" - # sort that ignores renames / model swaps / mere opens. - db_session.last_message_at = _now + db_session.message_count = len(self.sessions.get(session_id, {}).history) if session_id in self.sessions else 0 + _now = datetime.now(timezone.utc) + db_session.last_accessed = _now + # Clean "last conversation" timestamp — only bumped here on a + # real message persist, so it powers an accurate "Last active" + # sort that ignores renames / model swaps / mere opens. + db_session.last_message_at = _now db.commit() @@ -245,7 +273,10 @@ class SessionManager: db_session = db.query(DbSession).filter(DbSession.id == session_id).first() if db_session: - db_session.message_count = keep_count + # keep_count can exceed the real message total (e.g. the AI tool + # defaults to keep_count=10 on a short session); message_count must + # track the rows that actually remain, not the requested cap. + db_session.message_count = min(keep_count, len(db_messages)) db_session.updated_at = datetime.now(timezone.utc) db.commit() @@ -276,7 +307,15 @@ class SessionManager: id=msg_id, session_id=session_id, role=message.role, - content=message.content, + # Multimodal content (image/audio attachments) is a list; + # serialize to JSON so the Text column round-trips via + # _parse_msg_content. Storing the raw list let SQLAlchemy + # bind its single-quoted repr, which _parse_msg_content + # cannot parse (it looks for double-quoted "type"), so the + # attachment was destroyed on reload. Mirrors _persist_message. + content=(json.dumps(message.content) + if isinstance(message.content, list) + else message.content), meta_data=json.dumps(message.metadata) if message.metadata else None, timestamp=now + timedelta(microseconds=i), ) @@ -466,11 +505,17 @@ class SessionManager: db_session = db.query(DbSession).filter(DbSession.id == session_id).first() if db_session: db.delete(db_session) + + # Drop the in-memory copy even when there is no DB row. A "ghost" + # session lives only here (never persisted, or its row was removed + # out-of-band); without this it can never be cleared and keeps + # 404ing on every operation (issue #1044). + removed_in_memory = self.sessions.pop(session_id, None) is not None + + if db_session or removed_in_memory: + # Commit the document-detach / message-delete above (a no-op when + # the ghost had no rows) together with the session delete. db.commit() - - if session_id in self.sessions: - del self.sessions[session_id] - logger.info(f"Deleted session {session_id}") return True return False @@ -574,7 +619,7 @@ class SessionManager: try: all_sessions = db.query(DbSession).all() - cutoff_date = datetime.now(timezone.utc) - timedelta(days=auto_archive_days) + cutoff_date = utcnow_naive() - timedelta(days=auto_archive_days) for db_session in all_sessions: stats['total_checked'] += 1 diff --git a/docker-compose.gpu-amd.yml b/docker-compose.gpu-amd.yml new file mode 100644 index 000000000..b95dde1bf --- /dev/null +++ b/docker-compose.gpu-amd.yml @@ -0,0 +1,166 @@ +# Standalone AMD ROCm GPU Compose file for stack-management UIs (Portainer, +# Coolify, Dockhand, etc.) that accept only a single Compose file and do not +# reliably honor COMPOSE_FILE or multiple `-f` overlays. +# +# This is equivalent to: docker-compose.yml + docker/gpu.amd.yml. +# The base docker-compose.yml plus the docker/gpu.amd.yml overlay remain the +# source of truth — CLI users should keep using the COMPOSE_FILE overlay +# workflow. Keep this file in sync with both when either changes. +# +# Requires ROCm drivers on the host (kfd + DRI devices) and the host user +# running Docker in the `video` and `render` groups. Set RENDER_GID to your +# host's numeric render group id when needed. See docker/gpu.amd.yml for details. +services: + odysseus: + build: . + ports: + - "${APP_BIND:-127.0.0.1}:${APP_PORT:-7000}:7000" + volumes: + - ./data:/app/data:z + - ./logs:/app/logs:z + # Cookbook remote-server SSH identity. Odysseus can generate a key here; + # add the shown public key to each remote server's authorized_keys. + - ./data/ssh:/app/.ssh:z + # Cookbook local model cache. Inside Docker, "Local" means the Odysseus + # container, so persist its HuggingFace cache under ./data/huggingface. + - ./data/huggingface:/app/.cache/huggingface:z + # Cookbook-installed Python CLIs/packages (vLLM, llama-cpp-python, etc.) + # land under /app/.local for the odysseus user. Persist them so a + # container recreate does not silently remove installed serve engines. + - ./data/local:/app/.local:z + extra_hosts: + # Lets the container reach local services on the Docker host, including + # Ollama at http://host.docker.internal:11434. + - "host.docker.internal:host-gateway" + environment: + - LLM_HOST=${LLM_HOST:-localhost} + - LLM_HOSTS=${LLM_HOSTS:-} + - OPENAI_API_KEY=${OPENAI_API_KEY:-} + - OLLAMA_BASE_URL=${OLLAMA_BASE_URL:-} + - RESEARCH_LLM_ENDPOINT=${RESEARCH_LLM_ENDPOINT:-} + - HF_TOKEN=${HF_TOKEN:-} + - HUGGING_FACE_HUB_TOKEN=${HUGGING_FACE_HUB_TOKEN:-} + - SEARXNG_INSTANCE=http://searxng:8080 + - CHROMADB_HOST=chromadb + - CHROMADB_PORT=8000 + - DATABASE_URL=${DATABASE_URL:-sqlite:///./data/app.db} + - AUTH_ENABLED=${AUTH_ENABLED:-true} + - LOCALHOST_BYPASS=${LOCALHOST_BYPASS:-false} + - ODYSSEUS_ADMIN_USER=${ODYSSEUS_ADMIN_USER:-admin} + - ODYSSEUS_ADMIN_PASSWORD=${ODYSSEUS_ADMIN_PASSWORD:-} + - ALLOWED_ORIGINS=${ALLOWED_ORIGINS:-http://localhost,http://127.0.0.1} + - SECURE_COOKIES=${SECURE_COOKIES:-false} + - EMBEDDING_URL=${EMBEDDING_URL:-} + - EMBEDDING_MODEL=${EMBEDDING_MODEL:-} + - EMBEDDING_API_KEY=${EMBEDDING_API_KEY:-} + - FASTEMBED_MODEL=${FASTEMBED_MODEL:-sentence-transformers/all-MiniLM-L6-v2} + - FASTEMBED_CACHE_PATH=${FASTEMBED_CACHE_PATH:-} + - CLEANUP_INTERVAL_HOURS=${CLEANUP_INTERVAL_HOURS:-24} + - ODYSSEUS_INPROCESS_POLLERS=${ODYSSEUS_INPROCESS_POLLERS:-1} + - ODYSSEUS_INPROCESS_TASKS=${ODYSSEUS_INPROCESS_TASKS:-1} + - ODYSSEUS_SCRIPT_HOST=${ODYSSEUS_SCRIPT_HOST:-localhost} + - ODYSSEUS_CHAT_UPLOAD_MAX_BYTES=${ODYSSEUS_CHAT_UPLOAD_MAX_BYTES:-10485760} + - DATA_BRAVE_API_KEY=${DATA_BRAVE_API_KEY:-} + - GOOGLE_API_KEY=${GOOGLE_API_KEY:-} + - GOOGLE_PSE_CX=${GOOGLE_PSE_CX:-} + - TAVILY_API_KEY=${TAVILY_API_KEY:-} + - SERPER_API_KEY=${SERPER_API_KEY:-} + # PUID / PGID — the user/group the container drops to before + # running uvicorn (entrypoint also chowns /app/data + /app/logs + # to match, so bind-mounted files stay editable from the host). + # 1000 is the default first user on most Linux installs. If your + # host user has a different id, override here or via .env, e.g.: + # PUID=1001 + # PGID=1001 + # Find yours with: id -u / id -g + - PUID=${PUID:-1000} + - PGID=${PGID:-1000} + depends_on: + searxng: + condition: service_healthy + chromadb: + condition: service_started + restart: unless-stopped + # AMD ROCm overlay (from docker/gpu.amd.yml). + devices: + - /dev/kfd + - /dev/dri + group_add: + - video + - ${RENDER_GID:-render} + + chromadb: + image: docker.io/chromadb/chroma:latest + ports: + - "${CHROMADB_BIND:-127.0.0.1}:8100:8000" + volumes: + - chromadb-data:/chroma/chroma + environment: + - ANONYMIZED_TELEMETRY=FALSE + restart: unless-stopped + + searxng: + # Pinned, not :latest — odysseus waits on searxng's healthcheck + # (depends_on: condition: service_healthy), so a broken upstream `latest` + # tag blocks the whole app from starting. 2026.6.2 crashes on boot with + # `KeyError: 'default_doi_resolver'`, failing the healthcheck (issue #1414). + # Bump this deliberately after verifying a newer tag boots clean. + image: docker.io/searxng/searxng:2026.5.31-7159b8aed + entrypoint: + - /bin/sh + - -c + - | + set -eu + if [ ! -s /etc/searxng/settings.yml ] || grep -q 'odysseus-local-searxng-json-2026-05-30\|__SEARXNG_SECRET__' /etc/searxng/settings.yml; then + secret="$${SEARXNG_SECRET:-}" + if [ -z "$$secret" ]; then + secret="$$(python -c 'import secrets; print(secrets.token_urlsafe(48))')" + fi + sed "s|__SEARXNG_SECRET__|$$secret|g" /tmp/searxng-settings.yml.template > /etc/searxng/settings.yml + fi + exec /usr/local/searxng/entrypoint.sh + ports: + - "127.0.0.1:8080:8080" + volumes: + - searxng-data:/etc/searxng + - ./config/searxng/settings.yml:/tmp/searxng-settings.yml.template:ro,z + environment: + - SEARXNG_BASE_URL=http://localhost:8080/ + - SEARXNG_SECRET=${SEARXNG_SECRET:-} + # The official searxng image runs as the non-root `searxng` user, but its + # entrypoint still needs to chown /etc/searxng on first boot, drop privs via + # su-exec, and (with our wrapper above) write settings.yml into the named + # volume. Without these capabilities the wrapper aborts at the redirection + # with EACCES and the container fails its healthcheck with permission + # errors during setup. Mirrors the cap set recommended by the upstream + # searxng-docker compose file. See issue #721. + cap_drop: + - ALL + cap_add: + - CHOWN + - SETGID + - SETUID + - DAC_OVERRIDE + healthcheck: + test: ["CMD-SHELL", "python -c \"import urllib.request; urllib.request.urlopen('http://localhost:8080/', timeout=5).read(1)\""] + interval: 5s + timeout: 6s + retries: 20 + start_period: 10s + restart: unless-stopped + + ntfy: + image: docker.io/binwiederhier/ntfy + command: serve + ports: + - "${NTFY_BIND:-127.0.0.1}:8091:80" + volumes: + - ntfy-cache:/var/cache/ntfy + environment: + - NTFY_BASE_URL=${NTFY_BASE_URL:-http://localhost:8091} + restart: unless-stopped + +volumes: + searxng-data: + chromadb-data: + ntfy-cache: diff --git a/docker-compose.gpu-nvidia.yml b/docker-compose.gpu-nvidia.yml new file mode 100644 index 000000000..fa50896ba --- /dev/null +++ b/docker-compose.gpu-nvidia.yml @@ -0,0 +1,169 @@ +# Standalone NVIDIA GPU Compose file for stack-management UIs (Portainer, +# Coolify, Dockhand, etc.) that accept only a single Compose file and do not +# reliably honor COMPOSE_FILE or multiple `-f` overlays. +# +# This is equivalent to: docker-compose.yml + docker/gpu.nvidia.yml. +# The base docker-compose.yml plus the docker/gpu.nvidia.yml overlay remain +# the source of truth — CLI users should keep using the COMPOSE_FILE overlay +# workflow. Keep this file in sync with both when either changes. +# +# Requires the NVIDIA Container Toolkit on the host. See docker/gpu.nvidia.yml +# for setup details. +services: + odysseus: + build: . + ports: + - "${APP_BIND:-127.0.0.1}:${APP_PORT:-7000}:7000" + volumes: + - ./data:/app/data:z + - ./logs:/app/logs:z + # Cookbook remote-server SSH identity. Odysseus can generate a key here; + # add the shown public key to each remote server's authorized_keys. + - ./data/ssh:/app/.ssh:z + # Cookbook local model cache. Inside Docker, "Local" means the Odysseus + # container, so persist its HuggingFace cache under ./data/huggingface. + - ./data/huggingface:/app/.cache/huggingface:z + # Cookbook-installed Python CLIs/packages (vLLM, llama-cpp-python, etc.) + # land under /app/.local for the odysseus user. Persist them so a + # container recreate does not silently remove installed serve engines. + - ./data/local:/app/.local:z + extra_hosts: + # Lets the container reach local services on the Docker host, including + # Ollama at http://host.docker.internal:11434. + - "host.docker.internal:host-gateway" + environment: + - LLM_HOST=${LLM_HOST:-localhost} + - LLM_HOSTS=${LLM_HOSTS:-} + - OPENAI_API_KEY=${OPENAI_API_KEY:-} + - OLLAMA_BASE_URL=${OLLAMA_BASE_URL:-} + - RESEARCH_LLM_ENDPOINT=${RESEARCH_LLM_ENDPOINT:-} + - HF_TOKEN=${HF_TOKEN:-} + - HUGGING_FACE_HUB_TOKEN=${HUGGING_FACE_HUB_TOKEN:-} + - SEARXNG_INSTANCE=http://searxng:8080 + - CHROMADB_HOST=chromadb + - CHROMADB_PORT=8000 + - DATABASE_URL=${DATABASE_URL:-sqlite:///./data/app.db} + - AUTH_ENABLED=${AUTH_ENABLED:-true} + - LOCALHOST_BYPASS=${LOCALHOST_BYPASS:-false} + - ODYSSEUS_ADMIN_USER=${ODYSSEUS_ADMIN_USER:-admin} + - ODYSSEUS_ADMIN_PASSWORD=${ODYSSEUS_ADMIN_PASSWORD:-} + - ALLOWED_ORIGINS=${ALLOWED_ORIGINS:-http://localhost,http://127.0.0.1} + - SECURE_COOKIES=${SECURE_COOKIES:-false} + - EMBEDDING_URL=${EMBEDDING_URL:-} + - EMBEDDING_MODEL=${EMBEDDING_MODEL:-} + - EMBEDDING_API_KEY=${EMBEDDING_API_KEY:-} + - FASTEMBED_MODEL=${FASTEMBED_MODEL:-sentence-transformers/all-MiniLM-L6-v2} + - FASTEMBED_CACHE_PATH=${FASTEMBED_CACHE_PATH:-} + - CLEANUP_INTERVAL_HOURS=${CLEANUP_INTERVAL_HOURS:-24} + - ODYSSEUS_INPROCESS_POLLERS=${ODYSSEUS_INPROCESS_POLLERS:-1} + - ODYSSEUS_INPROCESS_TASKS=${ODYSSEUS_INPROCESS_TASKS:-1} + - ODYSSEUS_SCRIPT_HOST=${ODYSSEUS_SCRIPT_HOST:-localhost} + - ODYSSEUS_CHAT_UPLOAD_MAX_BYTES=${ODYSSEUS_CHAT_UPLOAD_MAX_BYTES:-10485760} + - DATA_BRAVE_API_KEY=${DATA_BRAVE_API_KEY:-} + - GOOGLE_API_KEY=${GOOGLE_API_KEY:-} + - GOOGLE_PSE_CX=${GOOGLE_PSE_CX:-} + - TAVILY_API_KEY=${TAVILY_API_KEY:-} + - SERPER_API_KEY=${SERPER_API_KEY:-} + # PUID / PGID — the user/group the container drops to before + # running uvicorn (entrypoint also chowns /app/data + /app/logs + # to match, so bind-mounted files stay editable from the host). + # 1000 is the default first user on most Linux installs. If your + # host user has a different id, override here or via .env, e.g.: + # PUID=1001 + # PGID=1001 + # Find yours with: id -u / id -g + - PUID=${PUID:-1000} + - PGID=${PGID:-1000} + # NVIDIA overlay (from docker/gpu.nvidia.yml). + - NVIDIA_VISIBLE_DEVICES=all + - NVIDIA_DRIVER_CAPABILITIES=compute,utility + depends_on: + searxng: + condition: service_healthy + chromadb: + condition: service_started + restart: unless-stopped + # NVIDIA overlay (from docker/gpu.nvidia.yml). + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + + chromadb: + image: docker.io/chromadb/chroma:latest + ports: + - "${CHROMADB_BIND:-127.0.0.1}:8100:8000" + volumes: + - chromadb-data:/chroma/chroma + environment: + - ANONYMIZED_TELEMETRY=FALSE + restart: unless-stopped + + searxng: + # Pinned, not :latest — odysseus waits on searxng's healthcheck + # (depends_on: condition: service_healthy), so a broken upstream `latest` + # tag blocks the whole app from starting. 2026.6.2 crashes on boot with + # `KeyError: 'default_doi_resolver'`, failing the healthcheck (issue #1414). + # Bump this deliberately after verifying a newer tag boots clean. + image: docker.io/searxng/searxng:2026.5.31-7159b8aed + entrypoint: + - /bin/sh + - -c + - | + set -eu + if [ ! -s /etc/searxng/settings.yml ] || grep -q 'odysseus-local-searxng-json-2026-05-30\|__SEARXNG_SECRET__' /etc/searxng/settings.yml; then + secret="$${SEARXNG_SECRET:-}" + if [ -z "$$secret" ]; then + secret="$$(python -c 'import secrets; print(secrets.token_urlsafe(48))')" + fi + sed "s|__SEARXNG_SECRET__|$$secret|g" /tmp/searxng-settings.yml.template > /etc/searxng/settings.yml + fi + exec /usr/local/searxng/entrypoint.sh + ports: + - "127.0.0.1:8080:8080" + volumes: + - searxng-data:/etc/searxng + - ./config/searxng/settings.yml:/tmp/searxng-settings.yml.template:ro,z + environment: + - SEARXNG_BASE_URL=http://localhost:8080/ + - SEARXNG_SECRET=${SEARXNG_SECRET:-} + # The official searxng image runs as the non-root `searxng` user, but its + # entrypoint still needs to chown /etc/searxng on first boot, drop privs via + # su-exec, and (with our wrapper above) write settings.yml into the named + # volume. Without these capabilities the wrapper aborts at the redirection + # with EACCES and the container fails its healthcheck with permission + # errors during setup. Mirrors the cap set recommended by the upstream + # searxng-docker compose file. See issue #721. + cap_drop: + - ALL + cap_add: + - CHOWN + - SETGID + - SETUID + - DAC_OVERRIDE + healthcheck: + test: ["CMD-SHELL", "python -c \"import urllib.request; urllib.request.urlopen('http://localhost:8080/', timeout=5).read(1)\""] + interval: 5s + timeout: 6s + retries: 20 + start_period: 10s + restart: unless-stopped + + ntfy: + image: docker.io/binwiederhier/ntfy + command: serve + ports: + - "${NTFY_BIND:-127.0.0.1}:8091:80" + volumes: + - ntfy-cache:/var/cache/ntfy + environment: + - NTFY_BASE_URL=${NTFY_BASE_URL:-http://localhost:8091} + restart: unless-stopped + +volumes: + searxng-data: + chromadb-data: + ntfy-cache: diff --git a/docker-compose.yml b/docker-compose.yml index 8b4817017..9841b1dca 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,30 +2,57 @@ services: odysseus: build: . ports: - - "${APP_PORT:-7000}:7000" + - "${APP_BIND:-127.0.0.1}:${APP_PORT:-7000}:7000" volumes: - - ./data:/app/data - - ./logs:/app/logs + - ./data:/app/data:z + - ./logs:/app/logs:z # Cookbook remote-server SSH identity. Odysseus can generate a key here; # add the shown public key to each remote server's authorized_keys. - - ./data/ssh:/app/.ssh + - ./data/ssh:/app/.ssh:z # Cookbook local model cache. Inside Docker, "Local" means the Odysseus # container, so persist its HuggingFace cache under ./data/huggingface. - - ./data/huggingface:/app/.cache/huggingface + - ./data/huggingface:/app/.cache/huggingface:z # Cookbook-installed Python CLIs/packages (vLLM, llama-cpp-python, etc.) # land under /app/.local for the odysseus user. Persist them so a # container recreate does not silently remove installed serve engines. - - ./data/local:/app/.local + - ./data/local:/app/.local:z extra_hosts: # Lets the container reach local services on the Docker host, including # Ollama at http://host.docker.internal:11434. - "host.docker.internal:host-gateway" - env_file: - - .env environment: + - LLM_HOST=${LLM_HOST:-localhost} + - LLM_HOSTS=${LLM_HOSTS:-} + - OPENAI_API_KEY=${OPENAI_API_KEY:-} + - OLLAMA_BASE_URL=${OLLAMA_BASE_URL:-} + - RESEARCH_LLM_ENDPOINT=${RESEARCH_LLM_ENDPOINT:-} + - HF_TOKEN=${HF_TOKEN:-} + - HUGGING_FACE_HUB_TOKEN=${HUGGING_FACE_HUB_TOKEN:-} - SEARXNG_INSTANCE=http://searxng:8080 - CHROMADB_HOST=chromadb - CHROMADB_PORT=8000 + - DATABASE_URL=${DATABASE_URL:-sqlite:///./data/app.db} + - AUTH_ENABLED=${AUTH_ENABLED:-true} + - LOCALHOST_BYPASS=${LOCALHOST_BYPASS:-false} + - ODYSSEUS_ADMIN_USER=${ODYSSEUS_ADMIN_USER:-admin} + - ODYSSEUS_ADMIN_PASSWORD=${ODYSSEUS_ADMIN_PASSWORD:-} + - ALLOWED_ORIGINS=${ALLOWED_ORIGINS:-http://localhost,http://127.0.0.1} + - SECURE_COOKIES=${SECURE_COOKIES:-false} + - EMBEDDING_URL=${EMBEDDING_URL:-} + - EMBEDDING_MODEL=${EMBEDDING_MODEL:-} + - EMBEDDING_API_KEY=${EMBEDDING_API_KEY:-} + - FASTEMBED_MODEL=${FASTEMBED_MODEL:-sentence-transformers/all-MiniLM-L6-v2} + - FASTEMBED_CACHE_PATH=${FASTEMBED_CACHE_PATH:-} + - CLEANUP_INTERVAL_HOURS=${CLEANUP_INTERVAL_HOURS:-24} + - ODYSSEUS_INPROCESS_POLLERS=${ODYSSEUS_INPROCESS_POLLERS:-1} + - ODYSSEUS_INPROCESS_TASKS=${ODYSSEUS_INPROCESS_TASKS:-1} + - ODYSSEUS_SCRIPT_HOST=${ODYSSEUS_SCRIPT_HOST:-localhost} + - ODYSSEUS_CHAT_UPLOAD_MAX_BYTES=${ODYSSEUS_CHAT_UPLOAD_MAX_BYTES:-10485760} + - DATA_BRAVE_API_KEY=${DATA_BRAVE_API_KEY:-} + - GOOGLE_API_KEY=${GOOGLE_API_KEY:-} + - GOOGLE_PSE_CX=${GOOGLE_PSE_CX:-} + - TAVILY_API_KEY=${TAVILY_API_KEY:-} + - SERPER_API_KEY=${SERPER_API_KEY:-} # PUID / PGID — the user/group the container drops to before # running uvicorn (entrypoint also chowns /app/data + /app/logs # to match, so bind-mounted files stay editable from the host). @@ -54,7 +81,12 @@ services: restart: unless-stopped searxng: - image: docker.io/searxng/searxng:latest + # Pinned, not :latest — odysseus waits on searxng's healthcheck + # (depends_on: condition: service_healthy), so a broken upstream `latest` + # tag blocks the whole app from starting. 2026.6.2 crashes on boot with + # `KeyError: 'default_doi_resolver'`, failing the healthcheck (issue #1414). + # Bump this deliberately after verifying a newer tag boots clean. + image: docker.io/searxng/searxng:2026.5.31-7159b8aed entrypoint: - /bin/sh - -c @@ -72,10 +104,24 @@ services: - "127.0.0.1:8080:8080" volumes: - searxng-data:/etc/searxng - - ./config/searxng/settings.yml:/tmp/searxng-settings.yml.template:ro + - ./config/searxng/settings.yml:/tmp/searxng-settings.yml.template:ro,z environment: - SEARXNG_BASE_URL=http://localhost:8080/ - SEARXNG_SECRET=${SEARXNG_SECRET:-} + # The official searxng image runs as the non-root `searxng` user, but its + # entrypoint still needs to chown /etc/searxng on first boot, drop privs via + # su-exec, and (with our wrapper above) write settings.yml into the named + # volume. Without these capabilities the wrapper aborts at the redirection + # with EACCES and the container fails its healthcheck with permission + # errors during setup. Mirrors the cap set recommended by the upstream + # searxng-docker compose file. See issue #721. + cap_drop: + - ALL + cap_add: + - CHOWN + - SETGID + - SETUID + - DAC_OVERRIDE healthcheck: test: ["CMD-SHELL", "python -c \"import urllib.request; urllib.request.urlopen('http://localhost:8080/', timeout=5).read(1)\""] interval: 5s diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index 1af879cdf..668018ac1 100644 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -56,13 +56,34 @@ done # Auto-set CUDA_HOME if a pip-installed nvcc is present, and disable the # FlashInfer JIT sampler — sampler only, no impact on attention path. # No-op when vllm isn't installed. -for cu in /app/.local/lib/python*/site-packages/nvidia/cu13; do +# +# Checked layouts (all are real pip-wheel install paths): +# nvidia/cu13 — nvidia-nvcc-cu13 (CUDA 13.x wheel style) +# nvidia/cu12 — nvidia-nvcc-cu12 (CUDA 12.x wheel style) +# nvidia/cuda_nvcc — nvidia-cuda-nvcc-cu12 (older cu12 sub-package style) +for cu in \ + /app/.local/lib/python*/site-packages/nvidia/cu13 \ + /app/.local/lib/python*/site-packages/nvidia/cu12 \ + /app/.local/lib/python*/site-packages/nvidia/cuda_nvcc; do if [ -x "$cu/bin/nvcc" ]; then export CUDA_HOME="$cu" - export VLLM_USE_FLASHINFER_SAMPLER="${VLLM_USE_FLASHINFER_SAMPLER:-0}" break fi done +# Disable the FlashInfer JIT sampler unconditionally — it is sampler-only +# and has no impact on the attention path, but requires nvcc + matching +# CUDA headers at startup. Without this, vLLM crashes with "Could not find +# nvcc" even when the GPU itself is fully visible to the container. +export VLLM_USE_FLASHINFER_SAMPLER="${VLLM_USE_FLASHINFER_SAMPLER:-0}" + +# Make Cookbook-installed Python CLIs visible after `pip install --user`. +# vLLM and helper scripts land here because /app is the non-root user's HOME. +export PATH="/app/.local/bin:$PATH" + +# Run first-time setup as the app user so data/ files get the right ownership. +# setup.py is idempotent — skips auth.json / .env if they already exist. +# || true so a setup failure never prevents the container from starting. +gosu "$PUID:$PGID" python /app/setup.py || true # Drop root and run the actual app. `gosu` is preferred over `su` / # `sudo` because it cleans up the process tree (no extra shell layer) diff --git a/docker/gpu.amd.yml b/docker/gpu.amd.yml index 6a0ac396b..1bda9cfdd 100644 --- a/docker/gpu.amd.yml +++ b/docker/gpu.amd.yml @@ -1,5 +1,6 @@ # AMD ROCm GPU overlay. Enable by setting COMPOSE_FILE in .env: # COMPOSE_FILE=docker-compose.yml:docker/gpu.amd.yml +# RENDER_GID= # # Requires ROCm drivers on the host (kfd + DRI devices). The host user # running Docker must be in the `video` and `render` groups. @@ -15,4 +16,4 @@ services: - /dev/dri group_add: - video - - render + - ${RENDER_GID:-render} diff --git a/docker/gpu.nvidia.yml b/docker/gpu.nvidia.yml index 32f7fb2dc..5590ba439 100644 --- a/docker/gpu.nvidia.yml +++ b/docker/gpu.nvidia.yml @@ -1,6 +1,11 @@ # NVIDIA GPU overlay. Enable by setting COMPOSE_FILE in .env: # COMPOSE_FILE=docker-compose.yml:docker/gpu.nvidia.yml # +# Use scripts/check-docker-gpu.sh to diagnose GPU passthrough, optionally +# install the NVIDIA Container Toolkit (Ubuntu/Debian), and write COMPOSE_FILE +# to .env. The script is read-only by default — it installs nothing and never +# edits .env unless explicitly asked. +# # Requires the NVIDIA Container Toolkit on the host. # Arch: sudo pacman -S nvidia-container-toolkit # Debian: sudo apt install nvidia-container-toolkit diff --git a/docs/email-outlook.md b/docs/email-outlook.md new file mode 100644 index 000000000..1f8b97d5d --- /dev/null +++ b/docs/email-outlook.md @@ -0,0 +1,17 @@ +# Outlook / Office 365 email accounts + +Odysseus email accounts currently use IMAP and SMTP with username/password +authentication. That works for providers that still allow app passwords or +mailbox passwords for IMAP/SMTP. + +Microsoft disables basic authentication for Outlook and Microsoft 365 in most +modern accounts and tenants. If you try to add an Outlook account with a normal +password, Microsoft may return errors such as: + +- `IMAP: AUTHENTICATE failed` +- `SMTP: 535 5.7.139 Authentication unsuccessful, basic authentication is disabled` + +This is expected. Odysseus does not support Microsoft OAuth or Graph Mail yet, +so Outlook / Office 365 accounts cannot currently be added through the password +form. Use another email provider with app-password support, or track the future +Microsoft Graph OAuth integration. diff --git a/docs/index.html b/docs/index.html index 00b37d5a4..f740e0bb9 100644 --- a/docs/index.html +++ b/docs/index.html @@ -26,16 +26,15 @@ } * { box-sizing: border-box; } html { scroll-behavior: smooth; scroll-padding-top: 60px; } - /* REMOVED: "scroll-snap-type: y mandatory" + /* REMOVED: "scroll-snap-type: y proximity" The idea was: >>Each section is a full-viewport "page" with its content centered, so only one shows at a time and the snap is obvious.<< PROBLEM: sections easily grow taller than 100vh IRL This cause forced jumps mid-read. It's intrusive UX. + The landing-page is not a PowerPoint presentation! - Preserved: CSS snap-points to avoid destroying code meta-data - Less intrusive version: "scroll-snap-type: y proximity" - For now: fully removed (bad UX)*/ + Preserved: CSS snap-points to avoid destroying code meta-data*/ .hero, section { scroll-snap-align: start; min-height: 100vh; display: flex; flex-direction: column; justify-content: center; diff --git a/docs/pr-blocker-audit.md b/docs/pr-blocker-audit.md new file mode 100644 index 000000000..b56f28cb3 --- /dev/null +++ b/docs/pr-blocker-audit.md @@ -0,0 +1,188 @@ +# PR Blocker Audit + +`scripts/pr_blocker_audit.py` is a small, read-only triage helper for maintainers who need to inspect open pull request overlap before reviewing or starting related work. + +It is a triage helper, not a replacement for maintainer judgment. + +## What it does + +- Reads open PR metadata from a local JSON file or from `gh`. +- Reports files touched by more than one open PR. +- Groups active work into broad code areas. +- Ranks PRs with a deterministic heuristic score. +- Flags possible duplicate candidates based on title keyword overlap and changed-file similarity. +- Suggests quieter areas for conservative new work. +- Prints Markdown by default, compact terminal output when requested, or machine-readable JSON. + +## What it does not do + +- It does not post comments. +- It does not review, approve, label, close, merge, or otherwise mutate PRs. +- It does not add or run GitHub Actions. +- It does not import the Odysseus application package. +- It does not claim that a PR is definitely blocked or duplicated. + +## Read-only safety guarantee + +Offline mode only reads a local JSON file. Live mode runs read-only GitHub CLI commands: + +```bash +gh pr list --repo OWNER/REPO --state open --limit 1000 --json number,title,author,files,mergeStateStatus,reviewDecision,updatedAt,url +``` + +If a PR from that list has missing or empty changed-file metadata, live mode fills it with read-only per-PR REST calls: + +```bash +gh api --paginate "repos/OWNER/REPO/pulls/NUMBER/files?per_page=100" +``` + +If that GraphQL-backed command fails, it falls back to: + +```bash +gh api --paginate "repos/OWNER/REPO/pulls?state=open&per_page=100" +``` + +Per-PR file fetching makes live overlap results useful, but it can be slower on repositories with hundreds of open PRs. + +## Generate input JSON + +For repeatable offline audits, capture PR metadata first: + +```bash +gh pr list --repo OWNER/REPO --state open --limit 1000 --json number,title,author,files,mergeStateStatus,reviewDecision,updatedAt,url > open-prs.json +``` + +## Run offline mode + +```bash +python3 scripts/pr_blocker_audit.py --input open-prs.json +``` + +## Run live mode + +```bash +python3 scripts/pr_blocker_audit.py --repo OWNER/REPO +``` + +Live mode fetches up to 1000 open PRs by default. Use `--limit` to cap how many open PRs are fetched and analyzed, and `--top` to cap how many rows are displayed in ranked sections: + +```bash +python3 scripts/pr_blocker_audit.py --repo OWNER/REPO --limit 50 --top 10 +``` + +Live mode may take time on large PR queues because it fetches changed-file metadata for each PR that did not include it in the initial list response. Progress is shown on `stderr` by default only when `stderr` is a TTY: + +```bash +python3 scripts/pr_blocker_audit.py --repo OWNER/REPO --progress auto +python3 scripts/pr_blocker_audit.py --repo OWNER/REPO --progress always +python3 scripts/pr_blocker_audit.py --repo OWNER/REPO --progress never +``` + +Use `--quiet` to suppress progress and non-fatal warning output. Progress and warnings never go to `stdout`, so redirected reports and `--output` files remain clean. + +For a faster metadata-only scan, skip changed-file metadata entirely: + +```bash +python3 scripts/pr_blocker_audit.py --repo OWNER/REPO --no-fetch-files +``` + +## JSON output + +Use `--format json` for machine-readable output suitable for scripting or downstream tooling: + +```bash +python3 scripts/pr_blocker_audit.py --input open-prs.json --format json +python3 scripts/pr_blocker_audit.py --input open-prs.json --format json --output report.json +``` + +JSON output is stable and deterministic for the same input. It uses `sort_keys=True` so field order does not vary between runs. It never includes ANSI escape codes, even with `--color always`. Progress text is always `stderr`-only and never appears in JSON output. + +The top-level object contains these keys: + +- `summary` — scalar overview: `total_prs_analyzed`, `unique_files_touched`, `prs_missing_changed_file_metadata`, `main_overlap_drivers`, `highest_risk_areas`, `recommended_first_review_target` +- `locked_areas` — list of objects with `area`, `files` (top paths as a string), `prs` (list of PR numbers), `why`, `priority` +- `hot_files` — list of objects with `file`, `pr_count`, `pr_numbers` (list of PR numbers); capped at `--top` +- `review_priorities` — ranked list with `rank`, `number`, `score`, `title`, `url`, `merge_state`, `review_decision`, `reasons` (list); capped at `--top` +- `duplicate_candidates` — list of objects with `pr_numbers` (list) and `titles` (list, one entry per PR in the group) +- `safer_areas` — list of strings + +## Write output to a file + +```bash +python3 scripts/pr_blocker_audit.py --input open-prs.json --output pr-blocker-report.md +python3 scripts/pr_blocker_audit.py --input open-prs.json --format json --output report.json +``` + +Markdown and JSON output never include ANSI color codes. ANSI codes are stripped defensively when writing any output file. + +## Terminal output and color + +Use terminal output for quick interactive scans: + +```bash +python3 scripts/pr_blocker_audit.py --input open-prs.json --format terminal +``` + +Terminal output includes locked areas, hot files, review / blocker priorities, possible duplicate candidates, and safer areas. + +Color is readability-only. It is never included in Markdown reports and is stripped defensively when writing output files. Color modes are: + +```bash +python3 scripts/pr_blocker_audit.py --input open-prs.json --format terminal --color auto +python3 scripts/pr_blocker_audit.py --input open-prs.json --format terminal --color always +python3 scripts/pr_blocker_audit.py --input open-prs.json --format terminal --color never +``` + +`--no-color` is kept as an alias for `--color never`. With `--color auto`, color is used only for terminal output on a TTY when `NO_COLOR` is not set and output is not being written to a file. + +## Interpret locked areas + +Locked areas are broad categories with one or more open PRs. An area is higher priority when several PRs touch it, when PRs share files, or when the highest scoring PR in that area has risk signals. Treat this as a prompt to inspect the PRs together. + +`PRs missing changed-file metadata` counts PRs that still had no changed-file paths after live file fetching, or PRs from offline input that did not include files. Those PRs can still appear in area summaries from title matching, but file overlap analysis is weaker for them. + +`Docs / tooling / tests` is conservative: runtime PRs are not classified there just because they include tests or README changes. Docs-only, README-only, scripts-only, tests-only, or strongly titled docs/tooling/test work still maps there. + +`Other / unclassified` is kept visible for PRs that do not match the area rules. When most of it comes from missing file metadata, the report summarizes that instead of letting long PR lists dominate the locked-area section. + +## Interpret duplicate candidates + +Duplicate candidates are labeled as possible duplicate / needs human review. The script groups PRs only when their file sets are highly similar and their titles share meaningful keywords. Similar PRs can still be complementary. + +## Interpret heuristic scores + +The review priority score is deterministic for the same input. Recency is measured against the newest parseable PR update timestamp in the input, and the score uses simple weights for: + +- direct auth, bearer-token, API-token, privilege, or permission lifecycle signals +- security, secret, or data exposure keywords +- persistence, migration, database, SQLite, or Postgres keywords +- memory, vector, RAG, embedding, or retrieval keywords +- overlapping changed files +- clean merge state as a small actionability signal +- review state +- recently updated PRs when timestamp data exists + +Higher scores mean "inspect earlier", not "correct" or "merge-ready". Broad PRs can score high because they overlap many files and may block other work, but they still need normal review and validation. + +Dirty, blocked, conflicting, and unknown merge states are shown as risk/caution reasons. They do not add importance points by themselves. + +## Design note: intentional single-script layout + +`pr_blocker_audit.py` is intentionally kept as one standalone script. The goal is to keep this maintainer/contributor workflow helper low-friction while broader repo tooling and test-suite conventions are still evolving. Splitting it into packages or modules is not ruled out, but is deferred until there is a clearer settled pattern to follow. + +## Limitations + +- Some PRs may still lack changed files if GitHub file metadata calls fail or metadata-only mode is used. +- Area classification is intentionally small and editable. +- Title keyword matching misses semantic duplicates. +- Heuristic scoring cannot know project strategy, reviewer availability, or hidden dependency chains. +- Empty or missing file metadata produces a valid report but weak overlap analysis. + +## Validation + +```bash +python3 -m py_compile scripts/pr_blocker_audit.py tests/test_pr_blocker_audit.py +python3 -m pytest tests/test_pr_blocker_audit.py -q +python3 scripts/pr_blocker_audit.py --help +git diff --check +``` diff --git a/integrations/claude/README.md b/integrations/claude/README.md new file mode 100644 index 000000000..e2671f8c3 --- /dev/null +++ b/integrations/claude/README.md @@ -0,0 +1,36 @@ +# Odysseus Claude Code Integration + +This directory contains the Claude Code skill bundle for Odysseus. + +## User Flow + +1. Open Odysseus Settings > Integrations. +2. Add a Claude Agent. +3. Copy the full setup commands shown after the generated token. +4. Toggle the tools Claude is allowed to use. +5. Configure the terminal Claude Code session: + +```bash +export ODYSSEUS_URL=http://your-odysseus-host:7000 +export ODYSSEUS_API_TOKEN=ody_generated_token +mkdir -p ~/.claude +curl -fsSL -H "Authorization: Bearer $ODYSSEUS_API_TOKEN" "$ODYSSEUS_URL/api/claude/plugin.zip" -o /tmp/odysseus-claude-skill.zip +python3 -m zipfile -e /tmp/odysseus-claude-skill.zip ~/.claude/ +``` + +Claude Code auto-loads anything under `~/.claude/skills/`, so the `odysseus` skill is +available in any session that has `ODYSSEUS_URL` and `ODYSSEUS_API_TOKEN` in its +environment. + +## What's in the bundle + +- `skills/odysseus/SKILL.md` — the skill definition Claude Code reads. +- `skills/odysseus/scripts/odysseus_api.py` — small helper that calls the scoped + `/api/codex/*` endpoints (these are the canonical scope-gated agent API; the + `codex` path is historic and shared by all agent integrations). + +## Scope enforcement + +The token is scope-gated. Every tool surface is checked server-side in Odysseus, +so even if Claude tries to call a forbidden endpoint, it gets `403` until the +user enables the matching toggle in Settings > Integrations > Claude Agent. diff --git a/integrations/claude/skills/odysseus/SKILL.md b/integrations/claude/skills/odysseus/SKILL.md new file mode 100644 index 000000000..d3b55b3dd --- /dev/null +++ b/integrations/claude/skills/odysseus/SKILL.md @@ -0,0 +1,153 @@ +--- +name: odysseus +description: Use when the user asks Claude Code to read or write Odysseus data (todos, email, calendar, memory, documents) or to launch/monitor/stop a Cookbook model-serve task through the scoped Claude Agent API. Requires ODYSSEUS_URL and ODYSSEUS_API_TOKEN. +--- + +# Odysseus + +Use this skill when a user asks to interact with Odysseus from Claude Code. + +## Configuration + +Expect these environment variables: + +- `ODYSSEUS_URL`: Base URL for the user's Odysseus instance, for example `http://127.0.0.1:7000`. +- `ODYSSEUS_API_TOKEN`: Scoped API token created in Odysseus Settings > Integrations > Add Integration > Claude Agent. + +If either value is missing, do not guess credentials. Tell the user to create a Claude Agent token in Odysseus Settings and expose both values to the terminal session. + +## When to use what + +- **Reminder ("remind me at 5pm to do X")** → TODO with `due_date`. The due_date IS the reminder — it fires a notification automatically via the user's configured channel (browser/email/ntfy). **Do NOT create a calendar event for a reminder.** Creating a calendar event named "Reminder" does NOT trigger a notification — it's just a time block on the calendar. +- **Calendar event ("meeting at 3pm", "dentist Tuesday 10am")** → calendar event. Use for scheduled time blocks, meetings, appointments, recurring schedules. These show up on the calendar grid; reminders for them are configured separately in Odysseus settings. +- **Note / freeform info ("note that the wifi password is ...")** → memory or todo without a due_date (depending on whether it's a fact about the user or an action item). +- **Persistent fact / preference about the user** → memory. + +If the user says "reminder" + a time, default to TODO with due_date. Only switch to calendar if the user explicitly says "calendar", "event", "meeting", "appointment", or describes a time *range*. + +## Safety + +- All Odysseus data access MUST go through the scoped HTTP API under `/api/codex/*` (the canonical scope-gated agent API, shared by all agent integrations). +- Check `/api/codex/capabilities` before using a tool surface. +- Treat `403` as an intentional Settings restriction. Do not work around it. +- Do not use SSH, Docker, direct Python imports, SQLite queries, MCP internals, browser cookies, or local files to read/write Odysseus user data. +- Do not call helpers like `do_manage_notes`, email MCP internals, or database sessions directly for user data, even if shell access exists. +- Never send email directly unless the user explicitly asks to send and the token has a send-capable scope. +- Keep actions scoped to the token owner. + +## Todos + +The scoped agent API supports todos/checklists: + +- `GET /api/codex/todos` +- `POST /api/codex/todos` + +Use the bundled helper script when available: + +```bash +python3 ~/.claude/skills/odysseus/scripts/odysseus_api.py capabilities +python3 ~/.claude/skills/odysseus/scripts/odysseus_api.py todos list +python3 ~/.claude/skills/odysseus/scripts/odysseus_api.py todos add "Follow up" +``` + +Supported todo actions are `list`, `add`, `update`, `delete`, and `toggle_item`. + +**Reminders (todos with a due date)** — the backend parses natural language. Send `due_date` in the body via the generic POST so the time becomes a structured reminder, NOT a literal substring inside the title. The `todos add TITLE` shortcut only sets the title, so use the POST form for anything with a time: + +```bash +python3 ~/.claude/skills/odysseus/scripts/odysseus_api.py POST /api/codex/todos '{"action":"add","title":"Call dentist","due_date":"tomorrow at 5pm"}' +``` + +The backend accepts both ISO timestamps and natural language like `"tomorrow 5pm"`, `"next Monday 9am"`, `"in 2 hours"`. It anchors to the user's timezone. + +## Email + +The scoped agent API supports email reads: + +- `GET /api/codex/emails?folder=INBOX&limit=10&offset=0&filter=all` +- `GET /api/codex/emails/{uid}?folder=INBOX` + +Use the bundled helper script when available: + +```bash +python3 ~/.claude/skills/odysseus/scripts/odysseus_api.py emails list 5 +python3 ~/.claude/skills/odysseus/scripts/odysseus_api.py emails read UID +``` + +If `/api/codex/capabilities` does not show `email.read: true`, do not inspect email. Ask the user to enable Email read in the Claude Agent settings. + +## Memory + +- `GET /api/codex/memory` — list memories for the token owner. +- `POST /api/codex/memory` — body `{"text": "...", "category": "fact", "source": "user", "session_id": null}`. Requires `memory:write`. +- `DELETE /api/codex/memory/{memory_id}` — remove a memory entry. Requires `memory:write`. + +```bash +python3 ~/.claude/skills/odysseus/scripts/odysseus_api.py GET /api/codex/memory +python3 ~/.claude/skills/odysseus/scripts/odysseus_api.py POST /api/codex/memory '{"text":"User prefers SI units","category":"preference"}' +``` + +## Calendar + +- `GET /api/codex/calendar/events?start=ISO&end=ISO` — list events in window. +- `POST /api/codex/calendar/events` — body matches `EventCreate` (`summary`, `dtstart`, `dtend`, `all_day`, `description`, `location`, `calendar_href`, `rrule`, `color`). Requires `calendar:write`. +- `DELETE /api/codex/calendar/events/{uid}` — delete event by uid (the value returned in the POST response). Requires `calendar:write`. + +## Documents + +- `GET /api/codex/documents?search=...&limit=50` — paginated library. +- `GET /api/codex/documents/{doc_id}` — fetch one document. +- `POST /api/codex/documents` — body `{"session_id": "...", "title": "...", "content": "...", "language": "markdown"}`. Requires `documents:write`. +- `DELETE /api/codex/documents/{doc_id}` — delete a document. Requires `documents:write`. + +## Email draft + send + +- `POST /api/codex/emails/draft` — body matches `SendEmailRequest` (`to`, `cc`, `bcc`, `subject`, `body`, `body_html`, `attachments`, `account_id`, `in_reply_to`, `references`). Requires `email:draft` (or `email:send`). +- `POST /api/codex/emails/send` — same body. Requires `email:send`. Never send without explicit user instruction. + +## Cookbook serve (debug a failing model launch) + +The Cookbook surface lets you reproduce what a human would do in Odysseus → Cookbook: read which serves are running, tail their tmux output to see why they crashed, edit the launch command, relaunch, kill a stuck one. Use this when the user is debugging a model server that won't come up (compute-capability errors, OOM, missing kernels, wrong attention backend, etc.). + +- `GET /api/codex/cookbook/tasks` — list active serve/download/install tasks (sessionId, type, status, repo_id, remoteHost, payload._cmd). Requires `cookbook:read`. +- `GET /api/codex/cookbook/servers` — list configured servers (name, host, port, env type + path, model dirs). Requires `cookbook:read`. +- `GET /api/codex/cookbook/cached?host=` — list models already cached on the named server (HF cache + Ollama + extra modelDirs). Call BEFORE `serve` to see what's already on disk. Requires `cookbook:read`. +- `GET /api/codex/cookbook/presets` — list saved serve presets (model + host + port + cmd). The user's saved preset usually has a working cmd — try `preset NAME` before composing your own. Requires `cookbook:read`. +- `GET /api/codex/cookbook/output/{session_id}?tail=400` — read the last N lines of the task's persistent log file (preferred) or tmux pane (fallback). The log file persists across vllm crashes, so this returns the actual Python traceback even after the bash prompt + neofetch banner overwrites the pane. Default tail=400. Requires `cookbook:read`. +- `POST /api/codex/cookbook/serve` — launch a serve task. Body matches `ServeRequest`: `{ repo_id, cmd, remote_host?, ssh_port?, env_prefix?, gpus?, platform? }`. The `cmd` is validated: leading binary must be `vllm`/`python3`/`sglang`/`llama-server`/`ollama`/`node`/`npx`. NEVER prefix with `cd …`, `source …`, or chain with `&&`/`||`/`;`/`$(...)` — the validator rejects shell metacharacters. The venv activation (`env_prefix`) is added automatically from the host's saved settings, so pass the bare binary + args. Requires `cookbook:launch`. +- `POST /api/codex/cookbook/preset/{name}` — launch a saved preset by name. Reuses the working cmd + host the user already saved. Requires `cookbook:launch`. +- `POST /api/codex/cookbook/adopt` — register an externally-launched tmux session into cookbook tracking. Body: `{ tmux_session, model, host?, port? }`. Use this when serve_model rejected a cmd and you fell back to direct ssh+tmux — without adoption, the session is invisible to the UI. Requires `cookbook:launch`. +- `POST /api/codex/cookbook/stop/{session_id}` — kill the tmux session for that task. Requires `cookbook:launch`. + +```bash +# Survey what's running +python3 ~/.claude/skills/odysseus/scripts/odysseus_api.py cookbook tasks + +# Tail the failing one (sessionId from `cookbook tasks`) +python3 ~/.claude/skills/odysseus/scripts/odysseus_api.py cookbook output serve-abc12345 400 + +# Stop the previous attempt before you try a new flag set +python3 ~/.claude/skills/odysseus/scripts/odysseus_api.py cookbook stop serve-abc12345 + +# Relaunch with new flags. cmd MUST begin with one of the allowlisted binaries. +python3 ~/.claude/skills/odysseus/scripts/odysseus_api.py cookbook serve \ + /mnt/HADES/models/Qwen3.5-397B-A17B-AWQ \ + "vllm serve /mnt/HADES/models/Qwen3.5-397B-A17B-AWQ --host 0.0.0.0 --port 8001 --tensor-parallel-size 8 --max-model-len 262144 --gpu-memory-utilization 0.90 --dtype auto --max-num-seqs 8 --trust-remote-code --enable-expert-parallel --enable-auto-tool-choice --tool-call-parser qwen3_coder --reasoning-parser qwen3" \ + pewds@192.168.1.12 +``` + +**Debug loop pattern:** when a serve is failing, the productive sequence is + +1. `cookbook tasks` → find the failing sessionId. +2. `cookbook output SID 600` → read the last 600 lines, find the actual root-cause line (often above the visible tail because tmux scrollback rolled — request a larger `tail` if the error references "above"). +3. `cookbook stop SID` — kill the previous attempt before relaunching; two serves on the same `--port` collide. +4. `cookbook serve repo "new cmd"` — try the next variation. Wait ~20s, then `cookbook output` on the new sessionId. + +**Hard limits this surface enforces:** +- `cookbook serve` cmd allowlist + shell-metacharacter rejection — you cannot run arbitrary shell, only model-server binaries. +- `cookbook stop` only targets task sessionIds matching `[a-zA-Z0-9_-]+`. +- The agent CAN spawn GPU-pinning long-lived processes — always `cookbook stop` your previous attempt before relaunching, and check `cookbook tasks` for collisions on the same `--port` before launching. + +## Forbidden Bypass Pattern + +If you are about to reach the Odysseus host/container, import app internals, query the database, or call MCP helper modules directly, stop. Those paths bypass Odysseus Settings and token scopes. Ask the user to enable the relevant Claude Agent tool toggle instead. diff --git a/integrations/claude/skills/odysseus/scripts/odysseus_api.py b/integrations/claude/skills/odysseus/scripts/odysseus_api.py new file mode 100755 index 000000000..fcef8a777 --- /dev/null +++ b/integrations/claude/skills/odysseus/scripts/odysseus_api.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python3 +"""Small Odysseus scoped API helper for Codex terminal sessions.""" + +from __future__ import annotations + +import json +import os +import sys +import urllib.error +import urllib.request + + +def _usage() -> int: + print("usage:", file=sys.stderr) + print(" odysseus_api.py capabilities", file=sys.stderr) + print(" odysseus_api.py todos list", file=sys.stderr) + print(" odysseus_api.py todos add TITLE", file=sys.stderr) + print(" odysseus_api.py emails list [limit]", file=sys.stderr) + print(" odysseus_api.py emails read UID", file=sys.stderr) + print(" odysseus_api.py cookbook tasks", file=sys.stderr) + print(" odysseus_api.py cookbook servers", file=sys.stderr) + print(" odysseus_api.py cookbook cached [HOST]", file=sys.stderr) + print(" odysseus_api.py cookbook presets", file=sys.stderr) + print(" odysseus_api.py cookbook output SESSION_ID [tail]", file=sys.stderr) + print(" odysseus_api.py cookbook serve REPO_ID 'CMD' [REMOTE_HOST]", file=sys.stderr) + print(" odysseus_api.py cookbook preset NAME", file=sys.stderr) + print(" odysseus_api.py cookbook adopt SESSION_ID MODEL [HOST] [PORT]", file=sys.stderr) + print(" odysseus_api.py cookbook stop SESSION_ID", file=sys.stderr) + print(" odysseus_api.py METHOD /api/codex/path [json-body]", file=sys.stderr) + return 2 + + +def _config() -> tuple[str, str] | None: + base_url = os.environ.get("ODYSSEUS_URL", "").strip().rstrip("/") + token = os.environ.get("ODYSSEUS_API_TOKEN", "").strip() + missing = [] + if not base_url: + missing.append("ODYSSEUS_URL") + if not token: + missing.append("ODYSSEUS_API_TOKEN") + if missing: + print(f"missing {', '.join(missing)}; create a Codex Agent token in Odysseus Settings", file=sys.stderr) + return None + return base_url, token + + +def main() -> int: + if len(sys.argv) < 2: + return _usage() + + command = sys.argv[1].lower() + if command == "capabilities": + method = "GET" + path = "/api/codex/capabilities" + body = None + elif command == "todos": + if len(sys.argv) < 3: + return _usage() + action = sys.argv[2].lower() + path = "/api/codex/todos" + if action == "list": + method = "GET" + body = None + elif action == "add" and len(sys.argv) >= 4: + method = "POST" + body = json.dumps({"action": "add", "title": " ".join(sys.argv[3:])}) + else: + return _usage() + elif command == "emails": + if len(sys.argv) < 3: + return _usage() + action = sys.argv[2].lower() + if action == "list": + method = "GET" + limit = sys.argv[3] if len(sys.argv) >= 4 else "10" + path = f"/api/codex/emails?folder=INBOX&limit={limit}&offset=0&filter=all" + body = None + elif action == "read" and len(sys.argv) >= 4: + method = "GET" + path = f"/api/codex/emails/{sys.argv[3]}" + body = None + else: + return _usage() + elif command == "cookbook": + if len(sys.argv) < 3: + return _usage() + action = sys.argv[2].lower() + if action == "tasks": + method = "GET" + path = "/api/codex/cookbook/tasks" + body = None + elif action == "servers": + method = "GET" + path = "/api/codex/cookbook/servers" + body = None + elif action == "output" and len(sys.argv) >= 4: + method = "GET" + sid = sys.argv[3] + tail = sys.argv[4] if len(sys.argv) >= 5 else "400" + path = f"/api/codex/cookbook/output/{sid}?tail={tail}" + body = None + elif action == "cached": + method = "GET" + if len(sys.argv) >= 4: + from urllib.parse import quote + path = f"/api/codex/cookbook/cached?host={quote(sys.argv[3])}" + else: + path = "/api/codex/cookbook/cached" + body = None + elif action == "presets": + method = "GET" + path = "/api/codex/cookbook/presets" + body = None + elif action == "preset" and len(sys.argv) >= 4: + from urllib.parse import quote + method = "POST" + path = f"/api/codex/cookbook/preset/{quote(sys.argv[3])}" + body = None + elif action == "adopt" and len(sys.argv) >= 5: + method = "POST" + path = "/api/codex/cookbook/adopt" + payload = {"tmux_session": sys.argv[3], "model": sys.argv[4]} + if len(sys.argv) >= 6: payload["host"] = sys.argv[5] + if len(sys.argv) >= 7: payload["port"] = int(sys.argv[6]) + body = json.dumps(payload) + elif action == "serve" and len(sys.argv) >= 5: + method = "POST" + path = "/api/codex/cookbook/serve" + payload = {"repo_id": sys.argv[3], "cmd": sys.argv[4]} + if len(sys.argv) >= 6: + payload["remote_host"] = sys.argv[5] + body = json.dumps(payload) + elif action == "stop" and len(sys.argv) >= 4: + method = "POST" + path = f"/api/codex/cookbook/stop/{sys.argv[3]}" + body = None + else: + return _usage() + else: + if len(sys.argv) < 3: + return _usage() + method = sys.argv[1].upper() + path = sys.argv[2] + body = sys.argv[3] if len(sys.argv) > 3 else None + + if not path.startswith("/"): + path = "/" + path + if not path.startswith("/api/codex/"): + print("refusing non-/api/codex path; use scoped Odysseus integration endpoints only", file=sys.stderr) + return 2 + + config = _config() + if config is None: + return 2 + base_url, token = config + + data = None + headers = { + "Accept": "application/json", + "Authorization": f"Bearer {token}", + } + if body is not None: + try: + parsed = json.loads(body) + except json.JSONDecodeError as exc: + print(f"invalid json body: {exc}", file=sys.stderr) + return 2 + data = json.dumps(parsed).encode("utf-8") + headers["Content-Type"] = "application/json" + + req = urllib.request.Request(base_url + path, data=data, headers=headers, method=method) + try: + with urllib.request.urlopen(req, timeout=20) as resp: + print(resp.read().decode("utf-8")) + return 0 + except urllib.error.HTTPError as exc: + text = exc.read().decode("utf-8", errors="replace") + print(text or f"HTTP {exc.code}", file=sys.stderr) + return 1 + except OSError as exc: + print(f"request failed: {exc}", file=sys.stderr) + return 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/integrations/codex/.codex-plugin/plugin.json b/integrations/codex/.codex-plugin/plugin.json new file mode 100644 index 000000000..239451f7b --- /dev/null +++ b/integrations/codex/.codex-plugin/plugin.json @@ -0,0 +1,22 @@ +{ + "name": "odysseus", + "version": "0.1.1", + "description": "Connect Codex to a scoped Odysseus instance.", + "author": { + "name": "Odysseus" + }, + "skills": "./skills/", + "interface": { + "displayName": "Odysseus", + "shortDescription": "Use scoped Odysseus tools from Codex.", + "longDescription": "Connects Codex terminal sessions to Odysseus through user-controlled scoped API tokens. Codex must use /api/codex/* endpoints so Odysseus Settings can enforce tool access.", + "developerName": "Odysseus", + "category": "Productivity", + "capabilities": [ + "todos", + "email", + "scoped-api" + ], + "defaultPrompt": "Use Odysseus only through configured scoped access. Check capabilities before reading or writing data." + } +} diff --git a/integrations/codex/README.md b/integrations/codex/README.md new file mode 100644 index 000000000..fff4e84e5 --- /dev/null +++ b/integrations/codex/README.md @@ -0,0 +1,51 @@ +# Odysseus Codex Integration + +This directory contains the Codex plugin/skill bundle for Odysseus. + +## User Flow + +1. Open Odysseus Settings > Integrations. +2. Add a Codex Agent. +3. Copy the full setup commands shown after the generated token. +4. Toggle the tools Codex is allowed to use. +5. Configure the terminal Codex session: + +```bash +export ODYSSEUS_URL=http://your-odysseus-host:7000 +export ODYSSEUS_API_TOKEN=ody_generated_token +mkdir -p ~/plugins +curl -fsSL -H "Authorization: Bearer $ODYSSEUS_API_TOKEN" "$ODYSSEUS_URL/api/codex/plugin.zip" -o /tmp/odysseus-codex-plugin.zip +python3 -m zipfile -e /tmp/odysseus-codex-plugin.zip ~/plugins +python3 - <<'PY' +import json +from pathlib import Path + +p = Path.home() / ".agents" / "plugins" / "marketplace.json" +p.parent.mkdir(parents=True, exist_ok=True) +if p.exists(): + data = json.loads(p.read_text()) +else: + data = {"name": "personal", "interface": {"displayName": "Personal"}, "plugins": []} + +data.setdefault("name", "personal") +data.setdefault("interface", {}).setdefault("displayName", "Personal") +plugins = data.setdefault("plugins", []) +entry = { + "name": "odysseus", + "source": {"source": "local", "path": "./plugins/odysseus"}, + "policy": {"installation": "AVAILABLE", "authentication": "ON_INSTALL"}, + "category": "Productivity", +} +data["plugins"] = [item for item in plugins if item.get("name") != "odysseus"] + [entry] +p.write_text(json.dumps(data, indent=2) + "\n") +PY +codex plugin add odysseus@personal +``` + +6. Verify: + +```bash +python3 ~/plugins/odysseus/scripts/odysseus_api.py capabilities +``` + +Codex must use `/api/codex/*` endpoints. SSH, Docker, direct Python imports, database queries, and MCP internals bypass Odysseus Settings and must not be used for user data access. diff --git a/integrations/codex/scripts/odysseus_api.py b/integrations/codex/scripts/odysseus_api.py new file mode 100755 index 000000000..fcef8a777 --- /dev/null +++ b/integrations/codex/scripts/odysseus_api.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python3 +"""Small Odysseus scoped API helper for Codex terminal sessions.""" + +from __future__ import annotations + +import json +import os +import sys +import urllib.error +import urllib.request + + +def _usage() -> int: + print("usage:", file=sys.stderr) + print(" odysseus_api.py capabilities", file=sys.stderr) + print(" odysseus_api.py todos list", file=sys.stderr) + print(" odysseus_api.py todos add TITLE", file=sys.stderr) + print(" odysseus_api.py emails list [limit]", file=sys.stderr) + print(" odysseus_api.py emails read UID", file=sys.stderr) + print(" odysseus_api.py cookbook tasks", file=sys.stderr) + print(" odysseus_api.py cookbook servers", file=sys.stderr) + print(" odysseus_api.py cookbook cached [HOST]", file=sys.stderr) + print(" odysseus_api.py cookbook presets", file=sys.stderr) + print(" odysseus_api.py cookbook output SESSION_ID [tail]", file=sys.stderr) + print(" odysseus_api.py cookbook serve REPO_ID 'CMD' [REMOTE_HOST]", file=sys.stderr) + print(" odysseus_api.py cookbook preset NAME", file=sys.stderr) + print(" odysseus_api.py cookbook adopt SESSION_ID MODEL [HOST] [PORT]", file=sys.stderr) + print(" odysseus_api.py cookbook stop SESSION_ID", file=sys.stderr) + print(" odysseus_api.py METHOD /api/codex/path [json-body]", file=sys.stderr) + return 2 + + +def _config() -> tuple[str, str] | None: + base_url = os.environ.get("ODYSSEUS_URL", "").strip().rstrip("/") + token = os.environ.get("ODYSSEUS_API_TOKEN", "").strip() + missing = [] + if not base_url: + missing.append("ODYSSEUS_URL") + if not token: + missing.append("ODYSSEUS_API_TOKEN") + if missing: + print(f"missing {', '.join(missing)}; create a Codex Agent token in Odysseus Settings", file=sys.stderr) + return None + return base_url, token + + +def main() -> int: + if len(sys.argv) < 2: + return _usage() + + command = sys.argv[1].lower() + if command == "capabilities": + method = "GET" + path = "/api/codex/capabilities" + body = None + elif command == "todos": + if len(sys.argv) < 3: + return _usage() + action = sys.argv[2].lower() + path = "/api/codex/todos" + if action == "list": + method = "GET" + body = None + elif action == "add" and len(sys.argv) >= 4: + method = "POST" + body = json.dumps({"action": "add", "title": " ".join(sys.argv[3:])}) + else: + return _usage() + elif command == "emails": + if len(sys.argv) < 3: + return _usage() + action = sys.argv[2].lower() + if action == "list": + method = "GET" + limit = sys.argv[3] if len(sys.argv) >= 4 else "10" + path = f"/api/codex/emails?folder=INBOX&limit={limit}&offset=0&filter=all" + body = None + elif action == "read" and len(sys.argv) >= 4: + method = "GET" + path = f"/api/codex/emails/{sys.argv[3]}" + body = None + else: + return _usage() + elif command == "cookbook": + if len(sys.argv) < 3: + return _usage() + action = sys.argv[2].lower() + if action == "tasks": + method = "GET" + path = "/api/codex/cookbook/tasks" + body = None + elif action == "servers": + method = "GET" + path = "/api/codex/cookbook/servers" + body = None + elif action == "output" and len(sys.argv) >= 4: + method = "GET" + sid = sys.argv[3] + tail = sys.argv[4] if len(sys.argv) >= 5 else "400" + path = f"/api/codex/cookbook/output/{sid}?tail={tail}" + body = None + elif action == "cached": + method = "GET" + if len(sys.argv) >= 4: + from urllib.parse import quote + path = f"/api/codex/cookbook/cached?host={quote(sys.argv[3])}" + else: + path = "/api/codex/cookbook/cached" + body = None + elif action == "presets": + method = "GET" + path = "/api/codex/cookbook/presets" + body = None + elif action == "preset" and len(sys.argv) >= 4: + from urllib.parse import quote + method = "POST" + path = f"/api/codex/cookbook/preset/{quote(sys.argv[3])}" + body = None + elif action == "adopt" and len(sys.argv) >= 5: + method = "POST" + path = "/api/codex/cookbook/adopt" + payload = {"tmux_session": sys.argv[3], "model": sys.argv[4]} + if len(sys.argv) >= 6: payload["host"] = sys.argv[5] + if len(sys.argv) >= 7: payload["port"] = int(sys.argv[6]) + body = json.dumps(payload) + elif action == "serve" and len(sys.argv) >= 5: + method = "POST" + path = "/api/codex/cookbook/serve" + payload = {"repo_id": sys.argv[3], "cmd": sys.argv[4]} + if len(sys.argv) >= 6: + payload["remote_host"] = sys.argv[5] + body = json.dumps(payload) + elif action == "stop" and len(sys.argv) >= 4: + method = "POST" + path = f"/api/codex/cookbook/stop/{sys.argv[3]}" + body = None + else: + return _usage() + else: + if len(sys.argv) < 3: + return _usage() + method = sys.argv[1].upper() + path = sys.argv[2] + body = sys.argv[3] if len(sys.argv) > 3 else None + + if not path.startswith("/"): + path = "/" + path + if not path.startswith("/api/codex/"): + print("refusing non-/api/codex path; use scoped Odysseus integration endpoints only", file=sys.stderr) + return 2 + + config = _config() + if config is None: + return 2 + base_url, token = config + + data = None + headers = { + "Accept": "application/json", + "Authorization": f"Bearer {token}", + } + if body is not None: + try: + parsed = json.loads(body) + except json.JSONDecodeError as exc: + print(f"invalid json body: {exc}", file=sys.stderr) + return 2 + data = json.dumps(parsed).encode("utf-8") + headers["Content-Type"] = "application/json" + + req = urllib.request.Request(base_url + path, data=data, headers=headers, method=method) + try: + with urllib.request.urlopen(req, timeout=20) as resp: + print(resp.read().decode("utf-8")) + return 0 + except urllib.error.HTTPError as exc: + text = exc.read().decode("utf-8", errors="replace") + print(text or f"HTTP {exc.code}", file=sys.stderr) + return 1 + except OSError as exc: + print(f"request failed: {exc}", file=sys.stderr) + return 1 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/integrations/codex/skills/odysseus/SKILL.md b/integrations/codex/skills/odysseus/SKILL.md new file mode 100644 index 000000000..4cff1402e --- /dev/null +++ b/integrations/codex/skills/odysseus/SKILL.md @@ -0,0 +1,141 @@ +--- +name: odysseus +description: Use when the user asks Codex to read or write Odysseus data (todos, email, calendar, memory, documents) or to launch/monitor/stop a Cookbook model-serve task through the scoped Codex Agent API. Requires ODYSSEUS_URL and ODYSSEUS_API_TOKEN. +--- + +# Odysseus + +Use this skill when a user asks to interact with Odysseus from Codex. + +## Configuration + +Expect these environment variables: + +- `ODYSSEUS_URL`: Base URL for the user's Odysseus instance, for example `http://127.0.0.1:7000`. +- `ODYSSEUS_API_TOKEN`: Scoped API token created in Odysseus Settings > Integrations > Add Integration > Codex Agent. + +If either value is missing, do not guess credentials. Tell the user to create a Codex Agent token in Odysseus Settings and expose both values to the terminal session. + +## When to use what + +- **Reminder ("remind me at 5pm to do X")** → TODO with `due_date`. The due_date IS the reminder — it fires a notification automatically via the user's configured channel (browser/email/ntfy). **Do NOT create a calendar event for a reminder.** Creating a calendar event named "Reminder" does NOT trigger a notification — it's just a time block on the calendar. +- **Calendar event ("meeting at 3pm", "dentist Tuesday 10am")** → calendar event. Use for scheduled time blocks, meetings, appointments, recurring schedules. These show up on the calendar grid; reminders for them are configured separately in Odysseus settings. +- **Note / freeform info ("note that the wifi password is ...")** → memory or todo without a due_date (depending on whether it's a fact about the user or an action item). +- **Persistent fact / preference about the user** → memory. + +If the user says "reminder" + a time, default to TODO with due_date. Only switch to calendar if the user explicitly says "calendar", "event", "meeting", "appointment", or describes a time *range*. + +## Safety + +- All Odysseus data access MUST go through the scoped HTTP API under `/api/codex/*`. +- Check `/api/codex/capabilities` before using a tool surface. +- Treat `403` as an intentional Settings restriction. Do not work around it. +- Do not use SSH, Docker, direct Python imports, SQLite queries, MCP internals, browser cookies, or local files to read/write Odysseus user data. +- Do not call helpers like `do_manage_notes`, email MCP internals, or database sessions directly for user data, even if shell access exists. +- Never send email directly unless the user explicitly asks to send and the token has a send-capable scope. +- Keep actions scoped to the token owner. + +## Todos + +The Codex API supports todos/checklists: + +- `GET /api/codex/todos` +- `POST /api/codex/todos` + +Use the bundled helper script when available: + +```bash +python3 integrations/codex/scripts/odysseus_api.py capabilities +python3 integrations/codex/scripts/odysseus_api.py todos list +python3 integrations/codex/scripts/odysseus_api.py todos add "Follow up" +``` + +Supported todo actions are `list`, `add`, `update`, `delete`, and `toggle_item`. + +**Reminders (todos with a due date)** — the backend parses natural language. Send `due_date` in the body via the generic POST so the time becomes a structured reminder, NOT a literal substring inside the title. The `todos add TITLE` shortcut only sets the title, so use the POST form for anything with a time: + +```bash +python3 integrations/codex/scripts/odysseus_api.py POST /api/codex/todos '{"action":"add","title":"Call dentist","due_date":"tomorrow at 5pm"}' +``` + +The backend accepts both ISO timestamps and natural language like `"tomorrow 5pm"`, `"next Monday 9am"`, `"in 2 hours"`. It anchors to the user's timezone. + +## Email + +The Codex API supports scoped email reads: + +- `GET /api/codex/emails?folder=INBOX&limit=10&offset=0&filter=all` +- `GET /api/codex/emails/{uid}?folder=INBOX` + +Use the bundled helper script when available: + +```bash +python3 integrations/codex/scripts/odysseus_api.py emails list 5 +python3 integrations/codex/scripts/odysseus_api.py emails read UID +``` + +If `/api/codex/capabilities` does not show `email.read: true`, do not inspect email. Ask the user to enable Email read in the Codex Agent settings. + +## Memory + +- `GET /api/codex/memory` — list memories for the token owner. +- `POST /api/codex/memory` — body `{"text": "...", "category": "fact", "source": "user", "session_id": null}`. Requires `memory:write`. +- `DELETE /api/codex/memory/{memory_id}` — remove a memory entry. Requires `memory:write`. + +```bash +python3 integrations/codex/scripts/odysseus_api.py GET /api/codex/memory +python3 integrations/codex/scripts/odysseus_api.py POST /api/codex/memory '{"text":"User prefers SI units","category":"preference"}' +``` + +## Calendar + +- `GET /api/codex/calendar/events?start=ISO&end=ISO` — list events in window. +- `POST /api/codex/calendar/events` — body matches `EventCreate` (`summary`, `dtstart`, `dtend`, `all_day`, `description`, `location`, `calendar_href`, `rrule`, `color`). Requires `calendar:write`. +- `DELETE /api/codex/calendar/events/{uid}` — delete event by uid (the value returned in the POST response). Requires `calendar:write`. + +## Documents + +- `GET /api/codex/documents?search=...&limit=50` — paginated library. +- `GET /api/codex/documents/{doc_id}` — fetch one document. +- `POST /api/codex/documents` — body `{"session_id": "...", "title": "...", "content": "...", "language": "markdown"}`. Requires `documents:write`. +- `DELETE /api/codex/documents/{doc_id}` — delete a document. Requires `documents:write`. + +## Email draft + send + +- `POST /api/codex/emails/draft` — body matches `SendEmailRequest` (`to`, `cc`, `bcc`, `subject`, `body`, `body_html`, `attachments`, `account_id`, `in_reply_to`, `references`). Requires `email:draft` (or `email:send`). +- `POST /api/codex/emails/send` — same body. Requires `email:send`. Never send without explicit user instruction. + +## Cookbook serve (debug a failing model launch) + +The Cookbook surface lets you reproduce what a human would do in Odysseus → Cookbook: read which serves are running, tail their tmux output to see why they crashed, edit the launch command, relaunch, kill a stuck one. Use this when the user is debugging a model server that won't come up (compute-capability errors, OOM, missing kernels, wrong attention backend, etc.). + +- `GET /api/codex/cookbook/tasks` — list active serve/download/install tasks (sessionId, type, status, repo_id, remoteHost, payload._cmd). Requires `cookbook:read`. +- `GET /api/codex/cookbook/servers` — list configured servers (name, host, port, env type + path, model dirs). Requires `cookbook:read`. +- `GET /api/codex/cookbook/cached?host=` — list models already cached on the named server (HF cache + Ollama + extra modelDirs). Call BEFORE `serve` to see what's already on disk. Requires `cookbook:read`. +- `GET /api/codex/cookbook/presets` — list saved serve presets (model + host + port + cmd). The user's saved preset usually has a working cmd — try `preset NAME` before composing your own. Requires `cookbook:read`. +- `GET /api/codex/cookbook/output/{session_id}?tail=400` — read the last N lines of the task's persistent log file (preferred) or tmux pane (fallback). The log file persists across vllm crashes, so this returns the actual Python traceback even after the bash prompt + neofetch banner overwrites the pane. Default tail=400. Requires `cookbook:read`. +- `POST /api/codex/cookbook/serve` — launch a serve task. Body matches `ServeRequest`: `{ repo_id, cmd, remote_host?, ssh_port?, env_prefix?, gpus?, platform? }`. The `cmd` is validated: leading binary must be `vllm`/`python3`/`sglang`/`llama-server`/`ollama`/`node`/`npx`. NEVER prefix with `cd …`, `source …`, or chain with `&&`/`||`/`;`/`$(...)` — the validator rejects shell metacharacters. The venv activation (`env_prefix`) is added automatically from the host's saved settings, so pass the bare binary + args. Requires `cookbook:launch`. +- `POST /api/codex/cookbook/preset/{name}` — launch a saved preset by name. Reuses the working cmd + host the user already saved. Requires `cookbook:launch`. +- `POST /api/codex/cookbook/adopt` — register an externally-launched tmux session into cookbook tracking. Body: `{ tmux_session, model, host?, port? }`. Use this when serve_model rejected a cmd and you fell back to direct ssh+tmux — without adoption, the session is invisible to the UI. Requires `cookbook:launch`. +- `POST /api/codex/cookbook/stop/{session_id}` — kill the tmux session. Requires `cookbook:launch`. + +```bash +python3 ~/plugins/odysseus/scripts/odysseus_api.py cookbook tasks +python3 ~/plugins/odysseus/scripts/odysseus_api.py cookbook output serve-abc12345 400 +python3 ~/plugins/odysseus/scripts/odysseus_api.py cookbook stop serve-abc12345 +python3 ~/plugins/odysseus/scripts/odysseus_api.py cookbook serve \ + /mnt/HADES/models/Qwen3.5-397B-A17B-AWQ \ + "vllm serve /mnt/HADES/models/Qwen3.5-397B-A17B-AWQ --host 0.0.0.0 --port 8001 --tensor-parallel-size 8 --max-model-len 262144 --gpu-memory-utilization 0.90 --dtype auto --max-num-seqs 8 --trust-remote-code --enable-expert-parallel --enable-auto-tool-choice --tool-call-parser qwen3_coder --reasoning-parser qwen3" \ + pewds@192.168.1.12 +``` + +**Debug loop pattern:** `tasks` → `output SID 600` (find root cause; request larger `tail` if it references "above") → `stop SID` → `serve repo "new cmd"` → wait ~20s → `output` on the new sessionId. + +**Hard limits this surface enforces:** +- `cookbook serve` cmd allowlist + shell-metacharacter rejection. +- `cookbook stop` requires sessionIds matching `[a-zA-Z0-9_-]+`. +- Agent CAN spawn GPU-pinning long-lived processes — always `cookbook stop` your previous attempt before relaunching. + +## Forbidden Bypass Pattern + +If you are about to reach the Odysseus host/container, import app internals, query the database, or call MCP helper modules directly, stop. Those paths bypass Odysseus Settings and token scopes. Ask the user to enable the relevant Codex Agent tool toggle instead. diff --git a/launch-windows.ps1 b/launch-windows.ps1 index 827bfdcb4..88ede8d66 100644 --- a/launch-windows.ps1 +++ b/launch-windows.ps1 @@ -30,23 +30,80 @@ function Fail($msg) { exit 1 } -# 1. Locate a Python interpreter (3.11+ recommended) +function Find-GitBash { + $cmd = Get-Command bash -ErrorAction SilentlyContinue + if ($cmd) { return $cmd.Source } + + $roots = @() + foreach ($name in @("ProgramFiles", "ProgramW6432", "ProgramFiles(x86)", "LocalAppData")) { + $base = [Environment]::GetEnvironmentVariable($name) + if ($base) { $roots += (Join-Path $base "Git") } + } + $roots += @("C:\Program Files\Git", "C:\Program Files (x86)\Git") + + foreach ($root in ($roots | Select-Object -Unique)) { + foreach ($relative in @("bin\bash.exe", "usr\bin\bash.exe")) { + $candidate = Join-Path $root $relative + if (Test-Path $candidate) { return $candidate } + } + } + return $null +} + +# 1. Locate a Python interpreter (3.11+ required) Write-Step "Checking for Python" +function Get-PythonVersionText($launcher, $launcherArgs) { + try { + return (& $launcher @launcherArgs -c "import sys; print('.'.join(map(str, sys.version_info[:3])))" 2>$null).Trim() + } catch { + return $null + } +} + $pyExe = $null -foreach ($c in @("python", "py")) { - $cmd = Get-Command $c -ErrorAction SilentlyContinue - if ($cmd) { $pyExe = $cmd.Source; break } +$pyArgs = @() +$pyVersion = $null + +$pyLauncher = Get-Command py -ErrorAction SilentlyContinue +if ($pyLauncher) { + foreach ($v in @("-3.13", "-3.12", "-3.11")) { + $ver = Get-PythonVersionText $pyLauncher.Source @($v) + if ($ver) { + $pyExe = $pyLauncher.Source + $pyArgs = @($v) + $pyVersion = $ver + break + } + } } + if (-not $pyExe) { - Fail "Python not found on PATH. Install Python 3.11+ from https://www.python.org/downloads/ (check 'Add to PATH'), then re-run this script." + $pythonCmd = Get-Command python -ErrorAction SilentlyContinue + if ($pythonCmd) { + $ver = Get-PythonVersionText $pythonCmd.Source @() + if ($ver) { + $versionParts = $ver.Split('.') + $major = [int]$versionParts[0] + $minor = [int]$versionParts[1] + if ($major -gt 3 -or ($major -eq 3 -and $minor -ge 11)) { + $pyExe = $pythonCmd.Source + $pyVersion = $ver + } + } + } } -Write-Host ("Using Python: " + $pyExe) + +if (-not $pyExe) { + Fail "Couldn't find Python 3.11+ for Windows setup. Install Python 3.11+ (or open the Python launcher with 'py -3.11') from https://www.python.org/downloads/, then re-run this script." +} +$pythonLabel = ("Using Python {0}: {1} {2}" -f $pyVersion, $pyExe, ($pyArgs -join ' ')).TrimEnd() +Write-Host $pythonLabel # 2. Create the virtualenv if missing $venvPy = Join-Path $PSScriptRoot "venv\Scripts\python.exe" if (-not (Test-Path $venvPy)) { Write-Step "Creating virtual environment (venv)" - & $pyExe -m venv venv + & $pyExe @pyArgs -m venv venv if ($LASTEXITCODE -ne 0 -or -not (Test-Path $venvPy)) { Fail "Failed to create the virtual environment." } } else { Write-Host "venv already exists - skipping creation." @@ -64,7 +121,7 @@ Write-Step "Running first-time setup" if ($LASTEXITCODE -ne 0) { Fail "setup.py failed." } # 5. Friendly note about Git Bash (full Cookbook / agent-shell parity) -if (-not (Get-Command bash -ErrorAction SilentlyContinue)) { +if (-not (Find-GitBash)) { Write-Host "" Write-Host "NOTE: Git Bash (bash.exe) was not found on PATH." -ForegroundColor Yellow Write-Host " The core app works without it. For full Cookbook background" -ForegroundColor Yellow diff --git a/mcp_servers/_common.py b/mcp_servers/_common.py deleted file mode 100644 index 641c8522d..000000000 --- a/mcp_servers/_common.py +++ /dev/null @@ -1,18 +0,0 @@ -""" -_common.py - -Shared constants and helpers for built-in MCP servers. -""" - -MAX_OUTPUT_CHARS = 10_000 -MAX_READ_CHARS = 20_000 -SHELL_TIMEOUT = 60 -PYTHON_TIMEOUT = 30 -SEARCH_TIMEOUT = 30 - - -def truncate(text: str, limit: int = MAX_OUTPUT_CHARS) -> str: - """Truncate text to *limit* characters with a suffix note.""" - if len(text) > limit: - return text[:limit] + f"\n... (truncated, {len(text)} chars total)" - return text diff --git a/mcp_servers/email_server.py b/mcp_servers/email_server.py index bde4307fe..d1c2ac07e 100644 --- a/mcp_servers/email_server.py +++ b/mcp_servers/email_server.py @@ -31,13 +31,19 @@ sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) server = Server("email") EMAIL_SOCKET_TIMEOUT = float(os.environ.get("EMAIL_SOCKET_TIMEOUT", "20")) -DATA_DIR = Path(__file__).resolve().parent.parent / "data" +from src.constants import DATA_DIR as _DATA_DIR, APP_DB, EMAIL_CACHE_DB, SETTINGS_FILE as _SETTINGS_FILE, MAIL_ATTACHMENTS_DIR +DATA_DIR = Path(_DATA_DIR) def _b(value) -> bytes: return str(value).encode() +def _q(name: str) -> str: + """Quote an IMAP mailbox name for commands that take mailbox args.""" + return '"' + (name or "").replace("\\", "\\\\").replace('"', '\\"') + '"' + + def _uid_fetch_rows(data) -> list: return [d for d in (data or []) if isinstance(d, bytes) and b"UID " in d] @@ -58,7 +64,7 @@ def _clean_header_value(value) -> str: def _db_path() -> Path: - return DATA_DIR / "app.db" + return Path(APP_DB) def _list_accounts_raw() -> list: @@ -70,10 +76,12 @@ def _list_accounts_raw() -> list: try: conn = sqlite3.connect(str(path)) conn.row_factory = sqlite3.Row - rows = conn.execute(""" + columns = {r[1] for r in conn.execute("PRAGMA table_info(email_accounts)").fetchall()} + smtp_security_select = "smtp_security" if "smtp_security" in columns else "'' AS smtp_security" + rows = conn.execute(f""" SELECT id, name, is_default, enabled, imap_host, imap_port, imap_user, imap_password, imap_starttls, - smtp_host, smtp_port, smtp_user, smtp_password, from_address + smtp_host, smtp_port, {smtp_security_select}, smtp_user, smtp_password, from_address FROM email_accounts WHERE enabled = 1 ORDER BY is_default DESC, created_at ASC """).fetchall() @@ -145,6 +153,7 @@ def _load_config(account: str | None = None) -> dict: "imap_starttls": os.environ.get("IMAP_STARTTLS", "true").lower() == "true", "smtp_host": os.environ.get("SMTP_HOST", ""), "smtp_port": int(os.environ.get("SMTP_PORT", "465")), + "smtp_security": os.environ.get("SMTP_SECURITY", ""), "smtp_user": os.environ.get("SMTP_USER", ""), "smtp_password": os.environ.get("SMTP_PASSWORD", ""), "smtp_starttls": os.environ.get("SMTP_STARTTLS", "false").lower() == "true", @@ -154,7 +163,7 @@ def _load_config(account: str | None = None) -> dict: "trash_folder": os.environ.get("TRASH_FOLDER", "Trash"), "cache_db": os.environ.get( "EMAIL_CACHE_DB", - str(DATA_DIR / "email_cache.db"), + EMAIL_CACHE_DB, ), "account_id": None, "account_name": None, @@ -189,13 +198,14 @@ def _load_config(account: str | None = None) -> dict: cfg["imap_ssl"] = int(cfg["imap_port"]) == 993 and not cfg["imap_starttls"] cfg["smtp_host"] = row["smtp_host"] or cfg["smtp_host"] cfg["smtp_port"] = int(row["smtp_port"] or cfg["smtp_port"]) + cfg["smtp_security"] = row["smtp_security"] or cfg["smtp_security"] or ("starttls" if int(cfg["smtp_port"]) == 587 else "ssl") cfg["smtp_user"] = row["smtp_user"] or cfg["smtp_user"] cfg["smtp_password"] = _decrypt(row["smtp_password"]) if row["smtp_password"] else cfg["smtp_password"] cfg["from_address"] = row["from_address"] or row["imap_user"] or cfg["from_address"] else: # Legacy fallback: settings.json flat keys try: - settings_path = Path(__file__).resolve().parent.parent / "data" / "settings.json" + settings_path = Path(_SETTINGS_FILE) if settings_path.exists(): settings = json.loads(settings_path.read_text(encoding="utf-8")) for key in ( @@ -235,10 +245,27 @@ def _imap_connect(account: str | None = None): timeout=EMAIL_SOCKET_TIMEOUT, ) if cfg["imap_starttls"]: - conn.starttls() + try: + conn.starttls() + except Exception: + # Don't leak the open plain socket on a rejected STARTTLS. (#3174) + try: + conn.shutdown() + except Exception: + pass + raise if getattr(conn, "sock", None): conn.sock.settimeout(EMAIL_SOCKET_TIMEOUT) - conn.login(cfg["imap_user"], cfg["imap_password"]) + try: + conn.login(cfg["imap_user"], cfg["imap_password"]) + except Exception: + # A failed login otherwise orphans the connected socket; close it + # before propagating (shutdown() is the pre-auth low-level close). (#3174) + try: + conn.shutdown() + except Exception: + pass + raise return conn @@ -333,14 +360,25 @@ def _decode_header(raw): """Decode MIME encoded header.""" if not raw: return "" - parts = email.header.decode_header(raw) - decoded = [] - for data, charset in parts: - if isinstance(data, bytes): - decoded.append(data.decode(charset or "utf-8", errors="replace")) - else: - decoded.append(data) - return " ".join(decoded) + try: + # make_header concatenates per RFC 2047: no spurious space between an + # encoded-word and adjacent plain text (plain runs keep their own + # whitespace), and whitespace between two adjacent encoded-words is + # dropped. The old " ".join produced "Re: Jose" style double spaces + # on every non-ASCII subject or sender. + return str(email.header.make_header(email.header.decode_header(raw))) + except Exception: + # Malformed header or unknown charset: lossy per-part decode + decoded = [] + for data, charset in email.header.decode_header(raw): + if isinstance(data, bytes): + try: + decoded.append(data.decode(charset or "utf-8", errors="replace")) + except LookupError: + decoded.append(data.decode("utf-8", errors="replace")) + else: + decoded.append(data) + return "".join(decoded) def _extract_text(msg): @@ -403,63 +441,71 @@ def _list_emails(folder="INBOX", max_results=20, unresponded_only=False, Pass unread_only=True and/or unresponded_only=True for attention scans. account selects mailbox (None = default). """ - conn = _imap_connect(account) - select_status, _ = conn.select(folder, readonly=True) - if select_status != "OK": - conn.logout() - raise ValueError(f"IMAP folder not found: {folder}") + conn = None + try: + conn = _imap_connect(account) + select_status, _ = conn.select(_q(folder), readonly=True) + if select_status != "OK": + raise ValueError(f"IMAP folder not found: {folder}") - if unread_only and unresponded_only: - status, data = conn.uid("SEARCH", None, "(UNSEEN UNANSWERED)") - elif unread_only: - status, data = conn.uid("SEARCH", None, "(UNSEEN)") - else: - # Include read too — IMAP search "ALL" returns the entire folder - status, data = conn.uid("SEARCH", None, "ALL") + if unread_only and unresponded_only: + status, data = conn.uid("SEARCH", None, "(UNSEEN UNANSWERED)") + elif unread_only: + status, data = conn.uid("SEARCH", None, "(UNSEEN)") + elif unresponded_only: + # Was missing — unresponded_only=True (without unread_only) fell through + # to "ALL" and returned answered mail too, despite the documented + # "emails without replies" behaviour. + status, data = conn.uid("SEARCH", None, "(UNANSWERED)") + else: + # Include read too — IMAP search "ALL" returns the entire folder + status, data = conn.uid("SEARCH", None, "ALL") - if status != "OK" or not data[0]: - conn.logout() - return [] + if status != "OK" or not data[0]: + return [] - uid_list = list(reversed(data[0].split()))[:max_results] - cache = _get_cached_summaries() - results = [] + uid_list = list(reversed(data[0].split()))[:max_results] + cache = _get_cached_summaries() + results = [] - for uid in uid_list: - try: - status, msg_data = conn.uid("FETCH", uid, "(RFC822.HEADER)") - if status != "OK": + for uid in uid_list: + try: + status, msg_data = conn.uid("FETCH", uid, "(RFC822.HEADER)") + if status != "OK": + continue + raw_header = msg_data[0][1] + msg = email.message_from_bytes(raw_header) + + subject = _decode_header(msg.get("Subject", "(no subject)")) + sender = _decode_header(msg.get("From", "unknown")) + date_str = msg.get("Date", "") + message_id = msg.get("Message-ID", "") + + # Parse sender name + sender_name, sender_addr = email.utils.parseaddr(sender) + sender_display = sender_name or sender_addr + + # Check cache for summary + cached = cache.get(subject, {}) + summary = cached.get("summary", "") + + results.append({ + "uid": uid.decode(), + "message_id": message_id, + "subject": subject, + "from": sender_display, + "from_address": sender_addr, + "date": date_str, + "summary": summary, + }) + except Exception: continue - raw_header = msg_data[0][1] - msg = email.message_from_bytes(raw_header) - subject = _decode_header(msg.get("Subject", "(no subject)")) - sender = _decode_header(msg.get("From", "unknown")) - date_str = msg.get("Date", "") - message_id = msg.get("Message-ID", "") - - # Parse sender name - sender_name, sender_addr = email.utils.parseaddr(sender) - sender_display = sender_name or sender_addr - - # Check cache for summary - cached = cache.get(subject, {}) - summary = cached.get("summary", "") - - results.append({ - "uid": uid.decode(), - "message_id": message_id, - "subject": subject, - "from": sender_display, - "from_address": sender_addr, - "date": date_str, - "summary": summary, - }) - except Exception: - continue - - conn.logout() - return results + return results + finally: + if conn: + try: conn.logout() + except Exception: pass def _result_sort_time(result: dict) -> datetime: @@ -522,7 +568,7 @@ def _search_emails(query, folders=None, max_results=20, account=None): try: for folder in folders: try: - status, _ = conn.select(folder, readonly=True) + status, _ = conn.select(_q(folder), readonly=True) if status != "OK": continue status, data = conn.uid("SEARCH", None, search_cmd) @@ -632,54 +678,55 @@ def _extract_attachment_to_disk(msg, index, target_dir): def _read_email(uid=None, message_id=None, folder="INBOX", account=None): """Read full email content by UID or message-ID. account = mailbox selector.""" cfg = _load_config(account) - conn = _imap_connect(account) - conn.select(folder, readonly=True) + conn = None + try: + conn = _imap_connect(account) + conn.select(_q(folder), readonly=True) - if message_id and not uid: - status, data = conn.uid("SEARCH", None, f'(HEADER Message-ID "{message_id}")') - if status != "OK" or not data[0]: - conn.logout() - return {"error": f"Email not found with Message-ID: {message_id}"} - uid = data[0].split()[-1] + if message_id and not uid: + status, data = conn.uid("SEARCH", None, f'(HEADER Message-ID "{message_id}")') + if status != "OK" or not data[0]: + return {"error": f"Email not found with Message-ID: {message_id}"} + uid = data[0].split()[-1] - if not uid: - conn.logout() - return {"error": "No UID or Message-ID provided"} + if not uid: + return {"error": "No UID or Message-ID provided"} - status, msg_data = conn.uid("FETCH", _b(uid), "(RFC822)") - if status != "OK": - conn.logout() - return {"error": f"Failed to fetch email UID {uid}"} - if not msg_data or not msg_data[0] or not isinstance(msg_data[0], tuple) or len(msg_data[0]) < 2: - conn.logout() - return {"error": f"Email not found with UID {uid}"} + status, msg_data = conn.uid("FETCH", _b(uid), "(BODY.PEEK[])") + if status != "OK": + return {"error": f"Failed to fetch email UID {uid}"} + if not msg_data or not msg_data[0] or not isinstance(msg_data[0], tuple) or len(msg_data[0]) < 2: + return {"error": f"Email not found with UID {uid}"} - raw = msg_data[0][1] - msg = email.message_from_bytes(raw) + raw = msg_data[0][1] + msg = email.message_from_bytes(raw) - subject = _decode_header(msg.get("Subject", "(no subject)")) - sender = _decode_header(msg.get("From", "unknown")) - date_str = msg.get("Date", "") - message_id_header = msg.get("Message-ID", "") - body = _extract_text(msg) - attachments = _list_attachments_from_msg(msg) + subject = _decode_header(msg.get("Subject", "(no subject)")) + sender = _decode_header(msg.get("From", "unknown")) + date_str = msg.get("Date", "") + message_id_header = msg.get("Message-ID", "") + body = _extract_text(msg) + attachments = _list_attachments_from_msg(msg) - sender_name, sender_addr = email.utils.parseaddr(sender) + sender_name, sender_addr = email.utils.parseaddr(sender) - conn.logout() - return { - "uid": uid.decode() if isinstance(uid, bytes) else str(uid), - "account": cfg.get("account_name") or cfg.get("imap_user") or "default", - "account_email": cfg.get("imap_user") or cfg.get("from_address") or "", - "account_id": cfg.get("account_id"), - "message_id": message_id_header, - "subject": subject, - "from": sender_name or sender_addr, - "from_address": sender_addr, - "date": date_str, - "body": body[:8000], - "attachments": attachments, - } + return { + "uid": uid.decode() if isinstance(uid, bytes) else str(uid), + "account": cfg.get("account_name") or cfg.get("imap_user") or "default", + "account_email": cfg.get("imap_user") or cfg.get("from_address") or "", + "account_id": cfg.get("account_id"), + "message_id": message_id_header, + "subject": subject, + "from": sender_name or sender_addr, + "from_address": sender_addr, + "date": date_str, + "body": body[:8000], + "attachments": attachments, + } + finally: + if conn: + try: conn.logout() + except Exception: pass def _read_email_across_accounts(uid=None, message_id=None, folder="INBOX"): @@ -739,17 +786,26 @@ def _smtp_connect(account=None, cfg=None): if not _smtp_ready(cfg): raise ValueError(f"Email account {cfg.get('account_name') or account or 'default'} has no SMTP configured") port = int(cfg.get("smtp_port") or 465) - # Account rows only store host/port, not the legacy env-level smtp_ssl - # toggle. Infer the conventional TLS mode from the port so MCP tools match - # the web send path: 465 = implicit SSL, 587 = STARTTLS. - if port == 587: + security = str(cfg.get("smtp_security") or "").strip().lower() + if security not in {"ssl", "starttls", "none"}: + security = "starttls" if port == 587 else "ssl" + if security == "starttls": conn = smtplib.SMTP( cfg["smtp_host"], port, timeout=EMAIL_SOCKET_TIMEOUT, ) - conn.starttls() - elif cfg.get("smtp_ssl", True): + try: + conn.starttls() + except Exception: + # Don't leak the open plain socket on a rejected STARTTLS. SMTP has + # no shutdown(); close() is the low-level socket close (no QUIT). (#3174) + try: + conn.close() + except Exception: + pass + raise + elif security == "ssl": conn = smtplib.SMTP_SSL( cfg["smtp_host"], port, @@ -761,10 +817,17 @@ def _smtp_connect(account=None, cfg=None): port, timeout=EMAIL_SOCKET_TIMEOUT, ) - if cfg["smtp_starttls"]: - conn.starttls() if cfg["smtp_user"] and cfg["smtp_password"]: - conn.login(cfg["smtp_user"], cfg["smtp_password"]) + try: + conn.login(cfg["smtp_user"], cfg["smtp_password"]) + except Exception: + # A failed login otherwise orphans the connected socket; close it + # before propagating (SMTP has no shutdown(); close() = socket close). (#3174) + try: + conn.close() + except Exception: + pass + raise return conn @@ -809,7 +872,7 @@ def _send_email(to, subject, body, in_reply_to=None, references=None, cc=None, b imap = _imap_connect(send_account) try: sent_folder = _detect_sent_folder(imap) - append_st, append_data = imap.append(sent_folder, "\\Seen", None, msg.as_bytes()) + append_st, append_data = imap.append(_q(sent_folder), "\\Seen", None, msg.as_bytes()) if append_st == "OK" and append_data: m = re.search(rb"APPENDUID\s+\d+\s+(\d+)", append_data[0] or b"") if m: @@ -835,10 +898,15 @@ def _send_email(to, subject, body, in_reply_to=None, references=None, cc=None, b def _reply_to_email(uid, body, folder="INBOX", reply_all=False, account=None): """Reply to an existing email by UID. Threads via In-Reply-To/References.""" - conn = _imap_connect(account) - conn.select(folder, readonly=True) - status, msg_data = conn.uid("FETCH", _b(uid), "(RFC822)") - conn.logout() + conn = None + try: + conn = _imap_connect(account) + conn.select(_q(folder), readonly=True) + status, msg_data = conn.uid("FETCH", _b(uid), "(BODY.PEEK[])") + finally: + if conn: + try: conn.logout() + except Exception: pass if status != "OK" or not msg_data or not msg_data[0]: return {"error": f"Failed to fetch email UID {uid}"} raw = msg_data[0][1] @@ -878,7 +946,7 @@ def _reply_to_email(uid, body, folder="INBOX", reply_all=False, account=None): def _set_flag(uid, folder, flag, add=True, account=None): """Add or remove an IMAP flag (e.g. \\Seen, \\Answered, \\Deleted).""" conn = _imap_connect(account) - conn.select(folder) + conn.select(_q(folder)) op = "+FLAGS" if add else "-FLAGS" try: status, data = conn.uid("STORE", _b(uid), op, flag) @@ -900,7 +968,7 @@ def _bulk_set_flag(uids, folder, flag, add=True, account=None): conn = _imap_connect(account) touched = [] try: - conn.select(folder) + conn.select(_q(folder)) op = "+FLAGS" if add else "-FLAGS" msg_set = ",".join(str(u) for u in uids) try: @@ -927,7 +995,7 @@ def _bulk_move(uids, source_folder, dest_folder, account=None, role: str = ""): conn = _imap_connect(account) moved = 0 try: - conn.select(source_folder) + conn.select(_q(source_folder)) dest_folder = _resolve_folder(conn, dest_folder, role or _folder_role_from_name(dest_folder)) msg_set = ",".join(str(u) for u in uids) try: @@ -938,10 +1006,11 @@ def _bulk_move(uids, source_folder, dest_folder, account=None, role: str = ""): if not existing: return 0 moved = len(existing) - status, _ = conn.uid("MOVE", _b(msg_set), dest_folder) + dest_arg = _q(dest_folder) + status, _ = conn.uid("MOVE", _b(msg_set), dest_arg) if status != "OK": # Fallback: UID copy + flag-delete + expunge - status, _ = conn.uid("COPY", _b(msg_set), dest_folder) + status, _ = conn.uid("COPY", _b(msg_set), dest_arg) if status != "OK": return 0 status, _ = conn.uid("STORE", _b(msg_set), "+FLAGS", "\\Deleted") @@ -958,7 +1027,7 @@ def _search_uids(folder="INBOX", criteria="UNSEEN", account=None): ALL, ANSWERED). Used to resolve selectors like all_unread → uids.""" conn = _imap_connect(account) try: - conn.select(folder, readonly=True) + conn.select(_q(folder), readonly=True) status, data = conn.uid("SEARCH", None, criteria) if status != "OK" or not data or not data[0]: return [] @@ -970,7 +1039,7 @@ def _search_uids(folder="INBOX", criteria="UNSEEN", account=None): def _move_message(uid, source_folder, dest_folder, account=None, role: str = ""): """Move a message between folders. Tries IMAP MOVE, falls back to copy+delete.""" conn = _imap_connect(account) - conn.select(source_folder) + conn.select(_q(source_folder)) try: dest_folder = _resolve_folder(conn, dest_folder, role or _folder_role_from_name(dest_folder)) try: @@ -980,11 +1049,12 @@ def _move_message(uid, source_folder, dest_folder, account=None, role: str = "") existing = _uid_fetch_rows(data) if status != "OK" or not existing: return False - status, _ = conn.uid("MOVE", _b(uid), dest_folder) + dest_arg = _q(dest_folder) + status, _ = conn.uid("MOVE", _b(uid), dest_arg) if status == "OK": return True # Fallback: UID copy + delete - status, _ = conn.uid("COPY", _b(uid), dest_folder) + status, _ = conn.uid("COPY", _b(uid), dest_arg) if status != "OK": return False status, _ = conn.uid("STORE", _b(uid), "+FLAGS", "\\Deleted") @@ -1013,16 +1083,21 @@ def _archive_email(uid, folder="INBOX", account=None): def _download_attachment(uid, index, folder="INBOX", account=None): """Extract a specific attachment to disk and return its local path.""" - conn = _imap_connect(account) - conn.select(folder, readonly=True) - status, msg_data = conn.uid("FETCH", _b(uid), "(RFC822)") - conn.logout() + conn = None + try: + conn = _imap_connect(account) + conn.select(_q(folder), readonly=True) + status, msg_data = conn.uid("FETCH", _b(uid), "(BODY.PEEK[])") + finally: + if conn: + try: conn.logout() + except Exception: pass if status != "OK": return {"error": f"Failed to fetch email UID {uid}"} raw = msg_data[0][1] msg = email.message_from_bytes(raw) - target_dir = DATA_DIR / "mail-attachments" / f"{folder}_{uid}" + target_dir = Path(MAIL_ATTACHMENTS_DIR) / f"{folder}_{uid}" filepath = _extract_attachment_to_disk(msg, index, target_dir) if not filepath: return {"error": f"Attachment index {index} not found"} diff --git a/mcp_servers/image_gen_server.py b/mcp_servers/image_gen_server.py index 872ccd681..0c8d3884a 100644 --- a/mcp_servers/image_gen_server.py +++ b/mcp_servers/image_gen_server.py @@ -16,6 +16,8 @@ from mcp.types import Tool, TextContent sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) +from src.constants import GENERATED_IMAGES_DIR + server = Server("image_gen") @@ -115,14 +117,18 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]: img = images[0] image_url = None + # Prefix the instance's public base URL (existing app_public_url setting) so the + # link is fully-qualified and clickable when the model echoes it. Empty = relative + # same-origin path (unchanged default). + _pub_base = (get_setting("app_public_url", "") or "").rstrip("/") if img.get("b64_json"): - img_dir = Path("data/generated_images") + img_dir = Path(GENERATED_IMAGES_DIR) img_dir.mkdir(parents=True, exist_ok=True) filename = f"{uuid.uuid4().hex[:12]}.png" img_path = img_dir / filename img_path.write_bytes(base64.b64decode(img["b64_json"])) - image_url = f"/api/generated-image/{filename}" + image_url = f"{_pub_base}/api/generated-image/{filename}" # Save to gallery try: @@ -146,7 +152,13 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]: else: return [TextContent(type="text", text="Error: Unexpected image API response format")] - result = f"Generated image for: {prompt[:100]}\nimage_url: {image_url}\nmodel: {model_id}\nsize: {size}" + # "Direct link:" rather than an "image_url:" label — small models copied the + # label token ("image_url") into the link href, producing a broken link. + result = ( + f"Generated image for: {prompt[:100]}\n" + f"Direct link: {image_url}\n" + f"model: {model_id}\nsize: {size}" + ) return [TextContent(type="text", text=result)] except httpx.TimeoutException: diff --git a/mcp_servers/memory_server.py b/mcp_servers/memory_server.py index c2812e1c0..1f226ad1d 100644 --- a/mcp_servers/memory_server.py +++ b/mcp_servers/memory_server.py @@ -161,10 +161,9 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]: deleted_text = m.get("text", "") deleted_category = m.get("category", "") break - original_len = len(memories) - memories = [m for m in memories if not m.get("id", "").startswith(memory_id)] - if len(memories) == original_len: + if not full_id: return [TextContent(type="text", text=f"Error: Memory '{memory_id}' not found")] + memories = [m for m in memories if m.get("id") != full_id] _memory_manager.save(memories) if _memory_vector and _memory_vector.healthy and full_id: try: diff --git a/mcp_servers/rag_server.py b/mcp_servers/rag_server.py index 2d50b4b4f..71aa1b60b 100644 --- a/mcp_servers/rag_server.py +++ b/mcp_servers/rag_server.py @@ -101,10 +101,13 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]: return [TextContent(type="text", text=f"Error: {e}")] elif action == "add_directory": - directory = arguments.get("directory", "").strip() + _dir = arguments.get("directory") + directory = _dir.strip() if isinstance(_dir, str) else "" if not directory: return [TextContent(type="text", text="Error: add_directory needs a directory path")] - directory = os.path.expanduser(directory) + # Store an absolute path so indexed `source` metadata is absolute and + # remove_directory (which abspath-normalizes) can match it later (#1660). + directory = os.path.abspath(os.path.expanduser(directory)) if not os.path.isdir(directory): return [TextContent(type="text", text=f"Error: Directory not found: {directory}")] if not _rag_manager: @@ -112,14 +115,27 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]: try: result = _rag_manager.index_personal_documents(directory) indexed = result.get("indexed_count", 0) if isinstance(result, dict) else 0 + # Record the directory so `list` and `remove_directory` can see it. + # Indexing was just done above, so pass index=False to avoid a second + # (ownerless) pass. Without this the directory was indexed but never + # tracked in indexed_directories, so it was invisible/unremovable. + if _personal_docs_manager and hasattr(_personal_docs_manager, "add_directory"): + try: + _personal_docs_manager.add_directory(directory, index=False) + except Exception: + pass return [TextContent(type="text", text=f"Directory '{directory}' added to RAG index ({indexed} chunks indexed)")] except Exception as e: return [TextContent(type="text", text=f"Error: Failed to index directory: {e}")] elif action == "remove_directory": - directory = arguments.get("directory", "").strip() + _dir = arguments.get("directory") + directory = _dir.strip() if isinstance(_dir, str) else "" if not directory: return [TextContent(type="text", text="Error: remove_directory needs a directory path")] + # Expand ~ to match add_directory, which indexes the expanded path. + # Without this, removing "~/docs" never matches the stored absolute path. + directory = os.path.expanduser(directory) if not _personal_docs_manager: return [TextContent(type="text", text="Error: Personal docs manager not available")] try: diff --git a/odysseus-ui.service b/odysseus-ui.service index fea436398..835c8cc5a 100644 --- a/odysseus-ui.service +++ b/odysseus-ui.service @@ -9,7 +9,7 @@ Type=simple # CHANGE THESE to match your user and install path: User=YOURUSER WorkingDirectory=/home/YOURUSER/odysseus-ui -ExecStart=/home/YOURUSER/odysseus-ui/venv/bin/uvicorn app:app --port 8000 --host 0.0.0.0 +ExecStart=/home/YOURUSER/odysseus-ui/venv/bin/uvicorn app:app --port 7000 --host 0.0.0.0 Restart=always RestartSec=3 EnvironmentFile=-/home/YOURUSER/odysseus-ui/.env diff --git a/package-lock.json b/package-lock.json index 80eac7ebf..8e0812dd9 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,5 +1,5 @@ { - "name": "odysseus-ui", + "name": "odysseus", "lockfileVersion": 3, "requires": true, "packages": { diff --git a/pyproject.toml b/pyproject.toml index 116b1376c..58161958f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,18 @@ [tool.pytest.ini_options] testpaths = ["tests"] asyncio_mode = "auto" +# Test-taxonomy markers added at collection time by tests/conftest.py. The +# stable area_* markers are declared here; the dynamic sub_ +# markers are registered before collection by pytest_configure in +# tests/conftest.py, so unknown-mark warnings still flag genuine typos outside +# the taxonomy. See tests/_taxonomy.py and tests/README.md. +markers = [ + "area_security: tests covering auth, owner-scope, SSRF, XSS, confinement, redaction", + "area_routes: tests covering HTTP route / API behavior", + "area_services: tests covering service-layer behavior (llm, cookbook, email, calendar, ...)", + "area_cli: tests covering CLI / script behavior", + "area_js: JavaScript / Node-backed tests", + "area_helpers: self-tests for the shared test helpers in tests/helpers/", + "area_unit: pure parser / utility tests that do not clearly belong elsewhere", + "area_uncategorized: tests not yet matched by the taxonomy (fallback)", +] diff --git a/requirements-optional.txt b/requirements-optional.txt index 72d9f7e69..eeb57c151 100644 --- a/requirements-optional.txt +++ b/requirements-optional.txt @@ -4,6 +4,14 @@ # Note: chromadb-client + fastembed moved to requirements.txt — RAG, semantic # memory, and tool selection are core paths, so they ship by default now. +# Local speech-to-text (microphone -> text) via faster-whisper, for the +# "local" STT provider. Runs on CPU out of the box (CTranslate2 backend, no +# torch needed). Install if you want to dictate/transcribe with the mic +# without sending audio to an external endpoint. +# Optional extra: install `torch` too if you have a CUDA GPU and want +# GPU-accelerated transcription — it's auto-detected, CPU is used otherwise. +faster-whisper + # DuckDuckGo as a search provider option. # Install if you want DDG in the search-provider dropdown. # Alternatives: SearXNG, Brave, Tavily, Serper, Google PSE. @@ -15,3 +23,14 @@ duckduckgo-search # network-served app — see ACKNOWLEDGMENTS.md. The MIT core (PDF *text* # extraction via pypdf) works without it; this only unlocks form-filling. PyMuPDF + +# Office / EPUB document text extraction (chat attachments + the personal-docs +# RAG index). markitdown (MIT, Microsoft) converts .docx/.xlsx/.pptx/.xls/.epub +# to Markdown — more token-efficient and model-legible than a raw dump. Optional +# and lazy-imported via src/markitdown_runtime.py; without it those formats fall +# back to a friendly "install to extract" banner and the core stays pure-MIT. +# Extras pull mammoth/lxml/python-pptx/pandas/openpyxl/xlrd; the base also pulls +# magika (onnxruntime), already a core dep via fastembed. We avoid the +# [all]/Azure/audio extras (cloud + heavy). Pinned to a release >30 days old per +# the dependency-age discussion in issue #485. +markitdown[docx,pptx,xlsx,xls]==0.1.5 diff --git a/requirements.txt b/requirements.txt index e4630d17c..2c4072980 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,6 +21,10 @@ youtube-transcript-api # Markdown rendering for research reports (src/visual_report.py). # Imported at module-top so it's a hard core dep, not optional. markdown +# HTML sanitizer for rendered research reports (src/visual_report.py). Report +# content is untrusted (LLM output over crawled pages) and report pages run +# under a relaxed CSP, so the rendered HTML is allowlist-sanitized. +nh3 # Calendar .ics import/export (routes/calendar_routes.py). icalendar # Recurrence rule expansion for calendar events (routes/calendar_routes.py). diff --git a/routes/admin_wipe_routes.py b/routes/admin_wipe_routes.py index 668b02d92..212e2a768 100644 --- a/routes/admin_wipe_routes.py +++ b/routes/admin_wipe_routes.py @@ -27,10 +27,11 @@ from core.database import ( Document, DocumentVersion, GalleryImage, + GalleryAlbum, CalendarEvent, CalendarCal, ) -from src.constants import DATA_DIR +from src.constants import DATA_DIR, SKILLS_DIR, SKILLS_FILE, GALLERY_DIR, GALLERY_UPLOADS_DIR logger = logging.getLogger(__name__) @@ -106,7 +107,7 @@ def setup_admin_wipe_routes(session_manager): # Skills live as SKILL.md files under data/skills/. Drop # the entire directory; the SkillsManager re-creates the # tree on next write. - skills_dir = os.path.join(DATA_DIR, "skills") + skills_dir = SKILLS_DIR count = 0 if os.path.isdir(skills_dir): # Count SKILL.md files for the response — quick walk. @@ -114,7 +115,7 @@ def setup_admin_wipe_routes(session_manager): count += sum(1 for f in files if f == "SKILL.md") _rmtree_quiet(skills_dir) # Legacy fallback file - legacy = os.path.join(DATA_DIR, "skills.json") + legacy = SKILLS_FILE if os.path.exists(legacy): try: os.remove(legacy) @@ -145,12 +146,13 @@ def setup_admin_wipe_routes(session_manager): return {"status": "deleted", "kind": kind, "count": count} if kind == "gallery": - count = db.query(GalleryImage).count() + count = db.query(GalleryImage).count() + db.query(GalleryAlbum).count() db.query(GalleryImage).delete() + db.query(GalleryAlbum).delete() db.commit() # Also drop the upload dir so disk doesn't keep orphans. - _rmtree_quiet(os.path.join(DATA_DIR, "gallery")) - _rmtree_quiet(os.path.join(DATA_DIR, "gallery_uploads")) + _rmtree_quiet(GALLERY_DIR) + _rmtree_quiet(GALLERY_UPLOADS_DIR) return {"status": "deleted", "kind": kind, "count": count} if kind == "calendar": diff --git a/routes/api_token_routes.py b/routes/api_token_routes.py index ba412a48f..97c576d15 100644 --- a/routes/api_token_routes.py +++ b/routes/api_token_routes.py @@ -12,6 +12,61 @@ from src.auth_helpers import get_current_user MAX_NAME_LEN = 100 DEFAULT_SCOPES = "chat" +ALLOWED_SCOPES = { + "chat", + "todos:read", + "todos:write", + "documents:read", + "documents:write", + "email:read", + "email:draft", + "email:send", + "calendar:read", + "calendar:write", + "memory:read", + "memory:write", +} +TOKEN_PROFILES = { + "chat": ["chat"], + "codex_todos": ["todos:read", "todos:write"], + "codex_email_drafts": ["email:read", "email:draft", "documents:read", "documents:write"], +} + + +def _normalize_scopes(scopes: str | list[str] | None = None, profile: str | None = None) -> list[str]: + profile = profile if isinstance(profile, str) else None + profile_key = (profile or "").strip() + if profile_key: + if profile_key not in TOKEN_PROFILES: + raise HTTPException(400, "Unknown token profile") + requested = list(TOKEN_PROFILES[profile_key]) + elif isinstance(scopes, list): + requested = [str(s).strip() for s in scopes if str(s).strip()] + elif isinstance(scopes, str) and scopes: + requested = [s.strip() for s in scopes.replace(" ", ",").split(",") if s.strip()] + else: + requested = [DEFAULT_SCOPES] + + normalized = [] + for scope in requested: + if scope not in ALLOWED_SCOPES: + raise HTTPException(400, f"Unknown token scope: {scope}") + if scope not in normalized: + normalized.append(scope) + + def ensure_before(write_scope: str, read_scope: str): + if write_scope not in normalized or read_scope in normalized: + return + idx = normalized.index(write_scope) + normalized.insert(idx, read_scope) + + ensure_before("todos:write", "todos:read") + ensure_before("documents:write", "documents:read") + ensure_before("calendar:write", "calendar:read") + ensure_before("memory:write", "memory:read") + ensure_before("email:draft", "email:read") + + return normalized or [DEFAULT_SCOPES] def setup_api_token_routes() -> APIRouter: @@ -45,13 +100,28 @@ def setup_api_token_routes() -> APIRouter: except Exception: pass + @router.get("/tokens/profiles") + def token_profiles(request: Request): + require_admin(request) + return { + "profiles": TOKEN_PROFILES, + "allowed_scopes": sorted(ALLOWED_SCOPES), + } + @router.post("/tokens") - def create_token(request: Request, name: str = Form("")): + def create_token( + request: Request, + name: str = Form(""), + scopes: str = Form(None), + profile: str = Form(None), + ): require_admin(request) name = name.strip()[:MAX_NAME_LEN] if not name: raise HTTPException(400, "Token name is required") owner = get_current_user(request) + scope_list = _normalize_scopes(scopes, profile) + scopes_value = ",".join(scope_list) raw_token = "ody_" + secrets.token_urlsafe(32) token_hash = bcrypt.hashpw(raw_token.encode(), bcrypt.gensalt()).decode() @@ -64,7 +134,7 @@ def setup_api_token_routes() -> APIRouter: name=name, token_hash=token_hash, token_prefix=raw_token[:8], - scopes=DEFAULT_SCOPES, + scopes=scopes_value, is_active=True, )) _invalidate_cache(request) @@ -75,9 +145,44 @@ def setup_api_token_routes() -> APIRouter: "owner": owner, "token": raw_token, "token_prefix": raw_token[:8], - "scopes": DEFAULT_SCOPES.split(","), + "scopes": scope_list, } + @router.patch("/tokens/{token_id}") + async def update_token(request: Request, token_id: str): + require_admin(request) + try: + payload = await request.json() + except Exception: + payload = {} + with get_db_session() as db: + token = db.query(ApiToken).filter(ApiToken.id == token_id).first() + if not token: + raise HTTPException(404, "Token not found") + if isinstance(payload.get("name"), str) and payload["name"].strip(): + token.name = payload["name"].strip()[:MAX_NAME_LEN] + # Only touch scopes when the caller actually sent them. A partial + # update such as a rename ({"name": ...} with no "scopes" key) must + # not silently reset the token to the default scope — that dropped + # every previously granted scope. + if "scopes" in payload: + token.scopes = ",".join(_normalize_scopes(payload.get("scopes"))) + db.add(token) + current_scopes = [ + s.strip() + for s in (getattr(token, "scopes", "") or DEFAULT_SCOPES).split(",") + if s.strip() + ] + response = { + "id": token_id, + "name": getattr(token, "name", ""), + "owner": getattr(token, "owner", None), + "token_prefix": getattr(token, "token_prefix", ""), + "scopes": current_scopes, + } + _invalidate_cache(request) + return response + @router.delete("/tokens/{token_id}") def delete_token(request: Request, token_id: str): require_admin(request) diff --git a/routes/auth_routes.py b/routes/auth_routes.py index dca14c32e..9379bced8 100644 --- a/routes/auth_routes.py +++ b/routes/auth_routes.py @@ -3,11 +3,13 @@ from fastapi import APIRouter, Request, Response, HTTPException from pydantic import BaseModel from typing import Optional +import asyncio import logging import os from core.auth import AuthManager from src.rate_limiter import RateLimiter +from src.settings_scrub import scrub_settings from src.settings import ( load_settings as _load_settings, save_settings as _save_settings, @@ -21,6 +23,7 @@ from src.integrations import ( update_integration, delete_integration, get_integration, + mask_integration_secret, execute_api_call, INTEGRATION_PRESETS, migrate_from_settings, @@ -64,6 +67,8 @@ class DeleteUserRequest(BaseModel): class RenameUserRequest(BaseModel): username: str +class SetOpenRegistrationRequest(BaseModel): + enabled: bool SESSION_COOKIE = "odysseus_session" @@ -88,7 +93,7 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter: raise HTTPException(400, "Already configured") if len(body.password) < 8: raise HTTPException(400, "Password must be at least 8 characters") - ok = auth_manager.setup(body.username, body.password) + ok = await asyncio.to_thread(auth_manager.setup, body.username, body.password) if not ok: raise HTTPException(500, "Setup failed") return {"ok": True, "message": "Admin account created"} @@ -106,7 +111,7 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter: raise HTTPException(400, "Password must be at least 8 characters") if len(body.username.strip()) < 1: raise HTTPException(400, "Username is required") - ok = auth_manager.create_user(body.username, body.password, is_admin=False) + ok = await asyncio.to_thread(auth_manager.create_user, body.username, body.password, is_admin=False) if not ok: raise HTTPException(409, "Username already taken") return {"ok": True, "message": "Account created"} @@ -117,7 +122,7 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter: raise HTTPException(429, "Too many requests — try again later") # Verify password first username = body.username.strip().lower() - if not auth_manager.verify_password(username, body.password): + if not await asyncio.to_thread(auth_manager.verify_password, username, body.password): raise HTTPException(401, "Invalid credentials") # Check 2FA if enabled if auth_manager.totp_enabled(username): @@ -126,10 +131,8 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter: return {"ok": False, "requires_totp": True, "username": username} if not auth_manager.totp_verify(username, body.totp_code): raise HTTPException(401, "Invalid 2FA code") - # All checks passed — create session - token = auth_manager.create_session(username, body.password) - if not token: - raise HTTPException(401, "Invalid credentials") + # All checks passed — create session (password already verified above) + token = await asyncio.to_thread(auth_manager.create_session_trusted, username) cookie_kwargs = dict( key=SESSION_COOKIE, value=token, @@ -175,9 +178,11 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter: raise HTTPException(401, "Not authenticated") if len(body.new_password) < 8: raise HTTPException(400, "Password must be at least 8 characters") - ok = auth_manager.change_password(user, body.current_password, body.new_password) + current_token = request.cookies.get(SESSION_COOKIE) + ok = await asyncio.to_thread(auth_manager.change_password, user, body.current_password, body.new_password) if not ok: raise HTTPException(400, "Current password is incorrect") + await asyncio.to_thread(auth_manager.revoke_user_sessions, user, current_token) return {"ok": True} # ------------------------------------------------------------------ @@ -290,6 +295,7 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter: # owner-scoped DB rows before changing auth so the account keeps # access to its sessions, docs, email accounts, tasks, etc. try: + from sqlalchemy import func from core.database import Base, SessionLocal db = SessionLocal() try: @@ -299,7 +305,7 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter: continue ( db.query(model) - .filter(model.owner == old_username) + .filter(func.lower(model.owner) == old_username) .update({"owner": new_username}, synchronize_session=False) ) db.commit() @@ -317,26 +323,56 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter: from routes.prefs_routes import _load as _load_prefs, _save as _save_prefs prefs = _load_prefs() users = prefs.get("_users") if isinstance(prefs, dict) else None - if isinstance(users, dict) and old_username in users and new_username not in users: - users[new_username] = users.pop(old_username) - _save_prefs(prefs) + if isinstance(users, dict): + prefs_key = next( + (k for k in users if str(k).strip().lower() == old_username), + None, + ) + new_taken = any(str(k).strip().lower() == new_username for k in users) + if prefs_key is not None and not new_taken: + users[new_username] = users.pop(prefs_key) + _save_prefs(prefs) except Exception as e: logger.warning("Failed to rename user prefs %s -> %s: %s", old_username, new_username, e) ok = auth_manager.rename_user(old_username, new_username, user) if not ok: raise HTTPException(400, "Cannot rename user") + # The owner-rename loop above updated ApiToken.owner in the DB, but the + # bearer-token cache still maps each token to the OLD owner. Without + # refreshing it, the renamed user's API tokens resolve to the old (now + # non-existent) owner and stop reaching their data until the cache next + # goes dirty. Invalidate it now, like the token CRUD routes do. + invalidator = getattr(request.app.state, "invalidate_token_cache", None) + if callable(invalidator): + invalidator() return {"ok": True, "username": new_username, "renamed_self": old_username == user} - @router.post("/signup-toggle") + @router.post("/signup-toggle", deprecated=True) async def toggle_signup(request: Request): - """Toggle open registration on/off. Admin only.""" + """ + Toggle open registration on/off. Admin only. + + DEPRECATED: This endpoint uses toggle semantics which can lead to unsafe state changes. + Use PUT /open-signup instead. + + This endpoint is kept for backward compatibility and may be removed in future versions. + """ user = _get_current_user(request) if not user or not auth_manager.is_admin(user): raise HTTPException(403, "Admin only") auth_manager.signup_enabled = not auth_manager.signup_enabled return {"ok": True, "signup_enabled": auth_manager.signup_enabled} + @router.put("/open-signup") + async def set_signup_enabled(body: SetOpenRegistrationRequest, request: Request): + """Set open signup enabled state. Admin only.""" + user = _get_current_user(request) + if not user or not auth_manager.is_admin(user): + raise HTTPException(403, "Admin only") + auth_manager.signup_enabled = body.enabled + return {"ok": True,"signup_enabled": auth_manager.signup_enabled} + @router.delete("/users") async def admin_delete_user(body: DeleteUserRequest, request: Request): user = _get_current_user(request) @@ -345,6 +381,17 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter: ok = auth_manager.delete_user(body.username, user) if not ok: raise HTTPException(400, "Cannot delete user") + # delete_user removes the user's ApiToken rows, but the bearer-auth + # middleware serves from an in-memory prefix->token cache that only + # rebuilds when flagged dirty. Without this, a deleted user's already + # cached token keeps authenticating until some other token op or a + # restart clears the cache. Mirror what the token routes do. + try: + invalidator = getattr(request.app.state, "invalidate_token_cache", None) + if invalidator: + invalidator() + except Exception: + pass return {"ok": True} # ---- Feature visibility (admin-managed) ---- @@ -370,29 +417,6 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter: # ---- App settings (admin-managed) ---- - _SECRET_KEY_PATTERNS = ("_api_key", "_password", "_secret", "_token", "_key") - - def _is_secret_key(name: str) -> bool: - n = (name or "").lower() - if n in ("google_pse_cx",): # public identifier, not a secret - return False - return any(n.endswith(p) or n == p.lstrip("_") for p in _SECRET_KEY_PATTERNS) - - def _scrub_settings(settings: dict) -> dict: - """Return a copy of settings with secret-shaped values masked. - - Frontend reads /settings without auth for things like keybinds + TTS - prefs. Secrets (search-provider keys, IMAP/SMTP passwords) must NOT - be exposed to non-admin callers. - """ - scrubbed = {} - for k, v in (settings or {}).items(): - if _is_secret_key(k) and isinstance(v, str) and v: - scrubbed[k] = "" # presence preserved, value blanked - else: - scrubbed[k] = v - return scrubbed - @router.get("/settings") async def get_settings(request: Request): """Returns app settings. Admins get the full set; non-admins get @@ -402,7 +426,7 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter: settings = _load_settings() if user and auth_manager.is_admin(user): return settings - return _scrub_settings(settings) + return scrub_settings(settings) @router.post("/settings") async def set_settings(request: Request): @@ -412,9 +436,24 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter: raise HTTPException(403, "Admin only") body = await request.json() current = _load_settings() + # Per-key validation for numeric settings: coerce to int and clamp to a + # sane range so a bad value can't disable the agent or let it run away. + _INT_RANGES = { + "agent_max_rounds": (1, 200), + "agent_max_tool_calls": (0, 1000), # 0 = unlimited + } for key in DEFAULT_SETTINGS: - if key in body: - current[key] = body[key] + if key not in body: + continue + val = body[key] + if key in _INT_RANGES: + lo, hi = _INT_RANGES[key] + try: + val = int(val) + except (TypeError, ValueError): + raise HTTPException(400, f"{key} must be an integer") + val = max(lo, min(val, hi)) + current[key] = val _save_settings(current) return current @@ -431,12 +470,7 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter: raise HTTPException(403, "Admin only") items = load_integrations() # Mask API keys for frontend display - safe = [] - for item in items: - copy = dict(item) - if copy.get("api_key"): - copy["api_key"] = copy["api_key"][:4] + "****" - safe.append(copy) + safe = [mask_integration_secret(item) for item in items] return {"integrations": safe} @router.get("/integrations/presets") @@ -452,7 +486,7 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter: raise HTTPException(403, "Admin only") body = await request.json() item = add_integration(body) - return {"ok": True, "integration": item} + return {"ok": True, "integration": mask_integration_secret(item)} @router.put("/integrations/{integration_id}") async def update_integration_route(integration_id: str, request: Request): @@ -464,7 +498,7 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter: item = update_integration(integration_id, body) if not item: raise HTTPException(404, "Integration not found") - return {"ok": True, "integration": item} + return {"ok": True, "integration": mask_integration_secret(item)} @router.delete("/integrations/{integration_id}") async def delete_integration_route(integration_id: str, request: Request): @@ -549,6 +583,27 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter: hint = " If this is Docker Compose ntfy, set NTFY_BIND to that host/Tailscale IP and NTFY_BASE_URL to the same server URL in .env, then recreate ntfy." return {"ok": False, "message": f"ntfy publish to {full_url} failed: {e}.{hint}"[:500]} + if preset == "discord_webhook": + import httpx + webhook_url = (integ.get("base_url") or "").strip() + if not webhook_url: + return {"ok": False, "message": "No webhook URL set — paste the full Discord webhook URL into the Base URL field."} + payload = { + "embeds": [{ + "title": "Odysseus connectivity test", + "description": "If you see this, your Discord Webhook integration is wired up correctly.", + "color": 5793266, + }] + } + try: + async with httpx.AsyncClient(timeout=8.0) as client: + r = await client.post(webhook_url, json=payload) + if r.is_success: + return {"ok": True, "message": "Test embed sent — check your Discord channel to confirm it arrived."} + return {"ok": False, "message": f"Discord returned HTTP {r.status_code}: {r.text[:200]}"} + except Exception as e: + return {"ok": False, "message": f"Request failed: {e}"[:400]} + # All other presets: GET against a known health endpoint. # Fall back to detecting from name if preset is missing. health_paths = { diff --git a/routes/backup_routes.py b/routes/backup_routes.py index b165fcce7..5ca403f81 100644 --- a/routes/backup_routes.py +++ b/routes/backup_routes.py @@ -77,7 +77,12 @@ def setup_backup_routes(memory_manager, preset_manager, skills_manager) -> APIRo # ── Memories ── if "memories" in body and isinstance(body["memories"], list): existing = memory_manager.load_all() - existing_texts = {e.get("text", "").strip().lower() for e in existing} + # Dedup against THIS user's own memories only. Using every tenant's + # rows (load_all) meant a memory whose text matched any other + # user's was silently skipped, so the importing user lost their own + # data. The full store is still saved back below. + existing_texts = {e.get("text", "").strip().lower() + for e in existing if e.get("owner") == user} added = 0 for mem in body["memories"]: if not isinstance(mem, dict) or not mem.get("text"): @@ -96,24 +101,68 @@ def setup_backup_routes(memory_manager, preset_manager, skills_manager) -> APIRo # ── Skills ── if "skills" in body and isinstance(body["skills"], list): existing = skills_manager.load_all() - existing_ids = {s.get("id") for s in existing} - existing_titles = {s.get("title", "").strip().lower() for s in existing} + existing_names = {s.get("name") for s in existing if s.get("name")} + existing_ids = {s.get("id") for s in existing if s.get("id")} + existing_titles = { + (s.get("title") or s.get("description") or "").strip().lower() + for s in existing + } added = 0 for skill in body["skills"]: - if not isinstance(skill, dict) or not skill.get("title"): + if not isinstance(skill, dict): continue - # Skip if same id or same title already exists - if skill.get("id") in existing_ids: + title = ( + skill.get("title") or skill.get("description") + or skill.get("name") or "" + ).strip() + if not title: continue - if skill["title"].strip().lower() in existing_titles: + sid = skill.get("id") or skill.get("name") + if sid and sid in existing_ids: continue - if user and not skill.get("owner"): - skill["owner"] = user - existing.append(skill) - existing_ids.add(skill.get("id")) - existing_titles.add(skill["title"].strip().lower()) + nm = skill.get("name") + if nm and nm in existing_names: + continue + if title.lower() in existing_titles: + continue + owner = skill.get("owner") + if user and not owner: + owner = user + # Skills live on disk as SKILL.md files; the old JSON-era + # skills_manager.save() no longer exists. Write each new skill + # via add_skill (source="user" skips auto-dedup — this is an + # explicit backup restore). + result = skills_manager.add_skill( + title=title, + name=skill.get("name"), + description=skill.get("description"), + problem=skill.get("problem", ""), + solution=skill.get("solution", ""), + steps=skill.get("steps"), + tags=skill.get("tags"), + source="user", + teacher_model=skill.get("teacher_model"), + confidence=skill.get("confidence", 0.8), + owner=owner, + category=skill.get("category", "general"), + when_to_use=skill.get("when_to_use"), + procedure=skill.get("procedure"), + pitfalls=skill.get("pitfalls"), + verification=skill.get("verification"), + platforms=skill.get("platforms"), + requires_toolsets=skill.get("requires_toolsets"), + fallback_for_toolsets=skill.get("fallback_for_toolsets"), + status=skill.get("status", "draft"), + version=skill.get("version", "1.0.0"), + ) + if result.get("_deduped"): + continue + if result.get("name"): + existing_names.add(result["name"]) + if result.get("id"): + existing_ids.add(result["id"]) + existing_titles.add(title.lower()) added += 1 - skills_manager.save(existing) imported.append(f"{added} skills") # ── Presets ── diff --git a/routes/calendar_routes.py b/routes/calendar_routes.py index 3c767f233..345280528 100644 --- a/routes/calendar_routes.py +++ b/routes/calendar_routes.py @@ -1,21 +1,39 @@ """Calendar routes — local SQLite-backed calendar CRUD.""" import logging +import re import uuid from datetime import datetime, date, timedelta -from typing import Optional, List, Tuple +from typing import Optional, List from fastapi import APIRouter, HTTPException, Request, UploadFile, File from pydantic import BaseModel from sqlalchemy import or_, and_ -from dateutil.rrule import rrulestr, rruleset -from dateutil.rrule import DAILY, WEEKLY, MONTHLY, YEARLY +from dateutil.rrule import rrulestr from core.database import SessionLocal, CalendarCal, CalendarEvent -from src.auth_helpers import get_current_user +from src.auth_helpers import require_user +from src.upload_limits import read_upload_limited, ICS_MAX_BYTES logger = logging.getLogger(__name__) + +def _ics_naive_dtstart(dt): + """Naive value matching how import_ics STORES CalendarEvent.dtstart. + + Timed tz-aware events are stored as UTC with tzinfo stripped, all-day + dates as midnight datetimes, naive datetimes unchanged. The ICS dedup + must compute the same value or a re-import never matches the stored row. + """ + if isinstance(dt, datetime): + if dt.tzinfo is not None: + from datetime import timezone as _tz + return dt.astimezone(_tz.utc).replace(tzinfo=None) + return dt + if isinstance(dt, date): + return datetime(dt.year, dt.month, dt.day) + return dt + # Single-user fallback identity. Used only when: # 1. The app is configured for single-user (no auth middleware), AND # 2. The request didn't resolve to an authenticated user. @@ -28,16 +46,17 @@ _SINGLE_USER_MODE = _os.environ.get("ODYSSEUS_SINGLE_USER", "1") != "0" def _require_user(request: Request) -> str: - """Return the authenticated user. In multi-user mode an unauthenticated - request raises 401; in single-user mode it falls through to - FALLBACK_OWNER. Prevents the silent cross-user data write that would - happen if a request slipped past auth middleware in a real deployment.""" - u = get_current_user(request) - if u: - return u - if _SINGLE_USER_MODE: - return FALLBACK_OWNER - raise HTTPException(401, "Authentication required") + """Return the authenticated user. Uses require_user so AUTH_ENABLED=false + and single-user mode both work: require_user returns "" when auth is + disabled or unconfigured, and only raises 401 when auth is configured but + the caller is unauthenticated. Falls back to FALLBACK_OWNER for calendar + writes so data isn't stored under an empty owner in single-user mode.""" + user = require_user(request) + if user: + return user + # require_user returned "" — auth is off or unconfigured (single-user). + # Use FALLBACK_OWNER so calendar rows have a stable owner for filtering. + return FALLBACK_OWNER def _get_or_404_calendar(db, cal_id: str, owner: str) -> CalendarCal: @@ -64,6 +83,33 @@ def _get_or_404_event(db, uid: str, owner: str) -> CalendarEvent: return ev +def _ics_escape(text: str) -> str: + """Escape a value for an iCalendar TEXT field (RFC 5545 §3.3.11). + + Backslash, semicolon and comma are structural in TEXT values and must be + escaped, and newlines become a literal ``\\n``. Backslash is escaped first + so the escapes we add aren't re-escaped. + """ + return ( + (text or "") + .replace("\\", "\\\\") + .replace(";", "\\;") + .replace(",", "\\,") + .replace("\r\n", "\\n") + .replace("\n", "\\n") + .replace("\r", "\\n") + ) + + +def _safe_ics_filename(name: str) -> str: + """Return a conservative .ics filename safe for Content-Disposition.""" + stem = name if isinstance(name, str) else "" + stem = re.sub(r"[^A-Za-z0-9._-]", "_", stem).strip("._-") + if not stem: + stem = "calendar" + return f"{stem[:128]}.ics" + + def _resolve_base_uid(uid: str) -> str: """Extract the base series UID from a compound occurrence UID. @@ -125,26 +171,18 @@ def _ensure_default_calendar(db, owner: str = None) -> CalendarCal: return cal -# Per-request user UTC offset (in minutes east of UTC). chat_routes sets this -# from the `X-Tz-Offset` header so naive natural-language times the LLM -# emits ("today at 9pm") are parsed in the USER's timezone, not the server's -# clock. None = unknown, fall back to legacy server-local behavior. -from contextvars import ContextVar -_USER_TZ_OFFSET_MIN: ContextVar = ContextVar("user_tz_offset_min", default=None) - - -def set_user_tz_offset(offset_min): - """Set the current user's UTC offset for this async context.""" - try: - v = int(offset_min) - except (TypeError, ValueError): - return - _USER_TZ_OFFSET_MIN.set(v) - - -def get_user_tz_offset(): - """Read the current user's UTC offset (minutes east of UTC), or None.""" - return _USER_TZ_OFFSET_MIN.get() +# Per-request user time context. chat_routes sets this from browser timezone +# headers so natural-language times the LLM emits ("today at 9pm") are parsed +# in the user's timezone, not the server's clock. None = unknown, fall back to +# legacy server-local behavior. +from src.user_time import ( + get_user_tz_name, + get_user_tz_offset, + now_user_local, + set_user_tz_name, + set_user_tz_offset, + user_timezone, +) def parse_due_for_user(s: str) -> str: @@ -163,6 +201,7 @@ def parse_due_for_user(s: str) -> str: """ from datetime import timezone as _tz, timedelta as _td offset = get_user_tz_offset() + tz_name = get_user_tz_name() s = (s or "").strip() if not s: return s @@ -176,11 +215,11 @@ def parse_due_for_user(s: str) -> str: except ValueError: parsed = None - if offset is None: + if offset is None and not tz_name: # No user tz known — preserve legacy behavior (naive server-local). return _parse_dt(s).isoformat() - user_tz = _tz(_td(minutes=offset)) + user_tz = user_timezone() # Naive ISO → tag with user tz. if parsed is not None and parsed.tzinfo is None: @@ -188,7 +227,7 @@ def parse_due_for_user(s: str) -> str: # Natural language — evaluate against user's "now". server_now_utc = datetime.now(_tz.utc) - user_now = server_now_utc.astimezone(user_tz) + user_now = now_user_local(server_now_utc) # Patch datetime.now() inside _parse_dt by leveraging the user's clock: # we re-implement the small natural-language phrases here against user_now # so the result is naturally in the user's tz. @@ -196,6 +235,7 @@ def parse_due_for_user(s: str) -> str: lower = s.lower().strip() def _parse_time(t): + t = _re.sub(r'\b([ap])\s*\.?\s*m\.?\b', r'\1m', t.strip(), flags=_re.IGNORECASE) m = _re.match(r'^\s*(\d{1,2})(?::(\d{2}))?\s*(am|pm)?\s*$', t, _re.IGNORECASE) if not m: return None h = int(m.group(1)); mn = int(m.group(2) or 0); ampm = (m.group(3) or "").lower() @@ -218,6 +258,17 @@ def parse_due_for_user(s: str) -> str: if t is not None: return base.replace(hour=t[0], minute=t[1]).isoformat() + # Time-first: "3pm today", "11pm today", "9am tomorrow" + m = _re.match(r'^(.+?)\s+(today|tonight|tomorrow|tmrw|yesterday)$', lower) + if m: + time_part, word = m.group(1).strip(), m.group(2) + base = today + if word in ("tomorrow", "tmrw"): base = today + _td(days=1) + elif word == "yesterday": base = today - _td(days=1) + t = _parse_time(time_part) + if t is not None: + return base.replace(hour=t[0], minute=t[1]).isoformat() + m = _re.match(r'^in\s+(\d+)\s*(hour|hr|minute|min|day)s?\s*$', lower) if m: n = int(m.group(1)); unit = m.group(2) @@ -305,6 +356,7 @@ def _parse_dt(s: str) -> datetime: def _parse_time(t: str): """Return (hour, minute) from '1pm', '1:30 PM', '13:00', etc., or None.""" + t = _re.sub(r'\b([ap])\s*\.?\s*m\.?\b', r'\1m', t.strip(), flags=_re.IGNORECASE) m = _re.match(r'^\s*(\d{1,2})(?::(\d{2}))?\s*(am|pm)?\s*$', t, _re.IGNORECASE) if not m: return None @@ -319,8 +371,8 @@ def _parse_dt(s: str) -> datetime: return None return h, mn - # today/tomorrow/yesterday [at] TIME - m = _re.match(r'^(today|tomorrow|tmrw|yesterday)(?:\s+at)?\s*(.*)$', lower) + # today/tonight/tomorrow/yesterday [at] TIME + m = _re.match(r'^(today|tonight|tomorrow|tmrw|yesterday)(?:\s+at)?\s*(.*)$', lower) if m: word, rest = m.group(1), m.group(2).strip() base = today @@ -368,7 +420,17 @@ def _parse_dt(s: str) -> datetime: # Last resort: dateutil's fuzzy parser try: from dateutil import parser as _du - return _du.parse(s) + parsed = _du.parse(s) + # Strip tz like every other return path above — this function's + # contract is naive datetimes (CalendarEvent.dtstart is naive). An + # offset-bearing non-ISO input (e.g. RFC-2822 "Mon, 05 Jan 2026 + # 14:00:00 +0900") otherwise leaked tz-aware into the naive column and + # crashed read-back comparisons in _expand_rrule with "can't compare + # offset-naive and offset-aware datetimes". + if parsed.tzinfo is not None: + from datetime import timezone as _tz + return parsed.astimezone(_tz.utc).replace(tzinfo=None) + return parsed except Exception: raise ValueError(f"could not parse datetime: {s!r}") @@ -409,6 +471,9 @@ def _event_to_dict(ev: CalendarEvent) -> dict: # ── Recurrence expansion ── +_RRULE_EXPANSION_LIMIT = 1000 + + def _expand_rrule( ev: CalendarEvent, start: datetime, end: datetime ) -> List[dict]: @@ -431,11 +496,25 @@ def _expand_rrule( d = _event_to_dict(ev) d["is_recurrence"] = False d["series_uid"] = ev.uid + d["truncated"] = False return [d] # Parse the rrule, applying it to the base dtstart. + rrule_str = ev.rrule + if ev.dtstart is not None and getattr(ev.dtstart, "tzinfo", None) is None: + # Events are stored with a naive (UTC) dtstart, but standard .ics + # exporters (Google/Apple/Outlook/Fastmail) write the bound as an + # absolute UTC value, e.g. UNTIL=20240105T090000Z. dateutil refuses to + # mix a tz-aware UNTIL with a naive DTSTART ("RRULE UNTIL values must be + # specified in UTC when DTSTART is timezone-aware"), so the except branch + # below would silently collapse the whole series to a single event. + # Drop the trailing Z so UNTIL matches the naive DTSTART. + import re as _re + rrule_str = _re.sub( + r"(UNTIL=\d{8}(?:T\d{6})?)Z", r"\1", rrule_str, flags=_re.IGNORECASE + ) try: - rule = rrulestr(ev.rrule, dtstart=ev.dtstart) + rule = rrulestr(rrule_str, dtstart=ev.dtstart) except Exception as ex: logger.warning( "Failed to parse rrule=%r for event %s: %s", ev.rrule, ev.uid, ex @@ -443,6 +522,7 @@ def _expand_rrule( d = _event_to_dict(ev) d["is_recurrence"] = False d["series_uid"] = ev.uid + d["truncated"] = False # Malformed RRULE rows are fetched by the recurring SQL branch # with only dtstart < end_dt — the base event may not actually # overlap the window. Only return if it does. @@ -455,22 +535,26 @@ def _expand_rrule( # (matching non-recurring overlap semantics: dtstart < end AND # dtend > start). expand_start = start - duration - occurrences = rule.between(expand_start, end, inc=True) - if not occurrences: - return [] - results = [] + truncated = False base = _event_to_dict(ev) - for occ_start in occurrences: + for occ_start in rule.xafter(expand_start, inc=True): + if occ_start >= end: + break + occ_end = occ_start + duration # Overlap filter: occurrence must intersect [start, end). # This enforces exclusive-end semantics (occ_start >= end is # excluded) and includes multi-day crossings (occ_end > start). - if occ_start >= end or occ_end <= start: + if occ_end <= start: continue + if len(results) >= _RRULE_EXPANSION_LIMIT: + truncated = True + break + # Build the compound uid: {base_uid}::{date} or ::{datetime} if ev.all_day: occ_uid = f"{ev.uid}::{occ_start.strftime('%Y-%m-%d')}" @@ -481,6 +565,7 @@ def _expand_rrule( d["uid"] = occ_uid d["series_uid"] = ev.uid d["is_recurrence"] = True + d["truncated"] = False if ev.all_day: d["dtstart"] = occ_start.strftime("%Y-%m-%d") @@ -493,6 +578,10 @@ def _expand_rrule( results.append(d) + if truncated: + for d in results: + d["truncated"] = True + return results @@ -501,57 +590,178 @@ def _expand_rrule( def setup_calendar_routes() -> APIRouter: router = APIRouter(prefix="/api/calendar", tags=["calendar"]) - # CalDAV connect form (Integrations → Calendar). Storage is local - # SQLite; sync (src/caldav_sync.py) pulls remote events into it on - # calendar open and periodically via the scheduler. + # ── CalDAV multi-account helpers ───────────────────────────────────────── + + def _get_caldav_accounts(owner: str) -> list: + from src.caldav_sync import _load_caldav_accounts + return _load_caldav_accounts(owner) + + def _save_caldav_accounts(owner: str, accounts: list) -> None: + from routes.prefs_routes import _load_for_user, _save_for_user + prefs = _load_for_user(owner) or {} + prefs["caldav_accounts"] = accounts + prefs.pop("caldav", None) + _save_for_user(owner, prefs) + + # ── CalDAV config routes (backward-compat single-account API) ──────────── + @router.get("/config") async def get_config(request: Request): + """Legacy single-account endpoint — returns the first configured account.""" owner = _require_user(request) - from routes.prefs_routes import _load_for_user - cfg = (_load_for_user(owner) or {}).get("caldav", {}) or {} - # Surface url+username but never hand the password back to the - # client — saved-state UI shouldn't leak the credential. + accounts = _get_caldav_accounts(owner) + if not accounts: + return {"url": "", "username": "", "password": "", "has_password": False, "local": True} + first = accounts[0] + pw = first.get("password") or "" + has_pw = False + if pw: + try: + from src.secret_storage import decrypt + has_pw = bool(decrypt(pw)) + except Exception: + has_pw = bool(pw) return { - "url": cfg.get("url", "") or "", - "username": cfg.get("username", "") or "", + "url": first.get("url", "") or "", + "username": first.get("username", "") or "", "password": "", - "has_password": bool(cfg.get("password")), - "local": not bool(cfg.get("url")), + "has_password": has_pw, + "local": not bool(first.get("url")), } @router.post("/config") async def save_config(request: Request): + """Legacy single-account endpoint — upserts the first account.""" owner = _require_user(request) - from routes.prefs_routes import _load_for_user, _save_for_user try: body = await request.json() except Exception: body = {} - prefs = _load_for_user(owner) or {} - cfg = dict(prefs.get("caldav") or {}) - # Empty url => clear the whole entry (treat as "remove integration"). + accounts = _get_caldav_accounts(owner) if not (body.get("url") or "").strip(): - prefs.pop("caldav", None) - _save_for_user(owner, prefs) + _save_caldav_accounts(owner, []) return {"ok": True, "cleared": True} - cfg["url"] = body.get("url", "").strip() - cfg["username"] = (body.get("username") or "").strip() - # Preserve the stored password when the client sends an empty - # one (edit form re-submitted without re-typing the password). + from src.caldav_sync import validate_caldav_url + try: + validated_url = validate_caldav_url(body.get("url", "")) + except ValueError as e: + raise HTTPException(400, str(e)) + if accounts: + acc = dict(accounts[0]) + else: + import uuid as _uuid + acc = {"id": str(_uuid.uuid4()), "label": "CalDAV"} + acc["url"] = validated_url + acc["username"] = (body.get("username") or "").strip() if body.get("password"): - cfg["password"] = body["password"] - prefs["caldav"] = cfg - _save_for_user(owner, prefs) + from src.secret_storage import encrypt + acc["password"] = encrypt(body["password"]) + new_accounts = [acc] + (accounts[1:] if len(accounts) > 1 else []) + _save_caldav_accounts(owner, new_accounts) + return {"ok": True} + + # ── CalDAV multi-account CRUD ───────────────────────────────────────────── + + @router.get("/config/accounts") + async def list_caldav_accounts(request: Request): + """Return all configured CalDAV accounts (passwords never returned).""" + owner = _require_user(request) + accounts = _get_caldav_accounts(owner) + safe = [] + for acc in accounts: + pw = acc.get("password") or "" + has_pw = False + if pw: + try: + from src.secret_storage import decrypt + has_pw = bool(decrypt(pw)) + except Exception: + has_pw = bool(pw) + safe.append({ + "id": acc.get("id", ""), + "label": acc.get("label", "") or acc.get("url", ""), + "url": acc.get("url", "") or "", + "username": acc.get("username", "") or "", + "has_password": has_pw, + }) + return {"accounts": safe} + + @router.post("/config/accounts") + async def add_caldav_account(request: Request): + """Add a new CalDAV account.""" + import uuid as _uuid + owner = _require_user(request) + try: + body = await request.json() + except Exception: + body = {} + from src.caldav_sync import validate_caldav_url + try: + url = validate_caldav_url(body.get("url", "")) + except ValueError as e: + raise HTTPException(400, str(e)) + if not body.get("password"): + raise HTTPException(400, "Password is required") + from src.secret_storage import encrypt + new_acc = { + "id": str(_uuid.uuid4()), + "label": (body.get("label") or "").strip() or "CalDAV", + "url": url, + "username": (body.get("username") or "").strip(), + "password": encrypt(body["password"]), + } + accounts = _get_caldav_accounts(owner) + accounts.append(new_acc) + _save_caldav_accounts(owner, accounts) + return {"ok": True, "id": new_acc["id"]} + + @router.put("/config/accounts/{account_id}") + async def update_caldav_account(account_id: str, request: Request): + """Update an existing CalDAV account by id.""" + owner = _require_user(request) + try: + body = await request.json() + except Exception: + body = {} + accounts = _get_caldav_accounts(owner) + idx = next((i for i, a in enumerate(accounts) if a.get("id") == account_id), None) + if idx is None: + raise HTTPException(404, "Account not found") + acc = dict(accounts[idx]) + if body.get("url"): + from src.caldav_sync import validate_caldav_url + try: + acc["url"] = validate_caldav_url(body["url"]) + except ValueError as e: + raise HTTPException(400, str(e)) + if body.get("label") is not None: + acc["label"] = (body.get("label") or "").strip() or "CalDAV" + if body.get("username") is not None: + acc["username"] = (body.get("username") or "").strip() + if body.get("password"): + from src.secret_storage import encrypt + acc["password"] = encrypt(body["password"]) + accounts[idx] = acc + _save_caldav_accounts(owner, accounts) + return {"ok": True} + + @router.delete("/config/accounts/{account_id}") + async def delete_caldav_account(account_id: str, request: Request): + """Remove a CalDAV account by id.""" + owner = _require_user(request) + accounts = _get_caldav_accounts(owner) + new_accounts = [a for a in accounts if a.get("id") != account_id] + if len(new_accounts) == len(accounts): + raise HTTPException(404, "Account not found") + _save_caldav_accounts(owner, new_accounts) return {"ok": True} @router.post("/test") async def test_connection(request: Request): - """Actually probe the configured CalDAV server with a PROPFIND - request (the same handshake every CalDAV client uses). Accepts - an optional {url, username, password} body so the user can test - a configuration BEFORE saving it; falls back to the stored - creds otherwise. Returns {ok, error?} with a useful message on - failure (status code, auth issue, network error).""" + """Probe a CalDAV server with a PROPFIND. Accepts an optional body: + {url, username, password} to test before saving, or {account_id} to + test an already-saved account. Falls back to the first saved account + when nothing is provided.""" owner = _require_user(request) try: body = await request.json() @@ -561,14 +771,31 @@ def setup_calendar_routes() -> APIRouter: user = (body.get("username") or "").strip() pw = body.get("password") or "" if not (url and user and pw): - # Fall back to saved settings for this user. - from routes.prefs_routes import _load_for_user - cfg = (_load_for_user(owner) or {}).get("caldav", {}) or {} - url = url or (cfg.get("url") or "") - user = user or (cfg.get("username") or "") - pw = pw or (cfg.get("password") or "") + # Look up a saved account: by id if supplied, else first account. + accounts = _get_caldav_accounts(owner) + acc = None + if body.get("account_id"): + acc = next((a for a in accounts if a.get("id") == body["account_id"]), None) + if acc is None and accounts: + acc = accounts[0] + if acc: + url = url or (acc.get("url") or "") + user = user or (acc.get("username") or "") + if not pw: + pw = acc.get("password") or "" + if pw: + try: + from src.secret_storage import decrypt + pw = decrypt(pw) + except Exception: + pass if not (url and user and pw): return {"ok": False, "error": "Missing URL, username, or password"} + from src.caldav_sync import validate_caldav_url + try: + url = validate_caldav_url(url) + except ValueError as e: + return {"ok": False, "error": str(e)} import httpx propfind_body = ( '\n' @@ -576,13 +803,25 @@ def setup_calendar_routes() -> APIRouter: '' ) try: - async with httpx.AsyncClient(timeout=8.0, follow_redirects=True) as cx: + async with httpx.AsyncClient(timeout=8.0, follow_redirects=False, trust_env=False) as cx: r = await cx.request( "PROPFIND", url, auth=(user, pw), headers={"Depth": "0", "Content-Type": "application/xml"}, content=propfind_body, ) + # If the server demands Digest (Baïkal default, SabreDAV-based + # servers, Radicale with htdigest), the Basic attempt above + # 401s. Retry once with httpx.DigestAuth so this test matches + # what the real sync does via caldav.DAVClient in + # src/caldav_sync.py (which negotiates the scheme). + if r.status_code == 401 and "digest" in r.headers.get("www-authenticate", "").lower(): + r = await cx.request( + "PROPFIND", url, + auth=httpx.DigestAuth(user, pw), + headers={"Depth": "0", "Content-Type": "application/xml"}, + content=propfind_body, + ) # 207 = Multi-Status — standard CalDAV success. 200 also # acceptable. Anything else (401/403/404/5xx) means trouble. if r.status_code in (200, 207): @@ -593,6 +832,8 @@ def setup_calendar_routes() -> APIRouter: return {"ok": False, "error": "Forbidden — user can't access that URL"} if r.status_code == 404: return {"ok": False, "error": "Not found — check the URL path"} + if 300 <= r.status_code < 400: + return {"ok": False, "error": "Redirects are not followed for CalDAV safety; use the final URL"} return {"ok": False, "error": f"HTTP {r.status_code}"} except httpx.ConnectError as e: return {"ok": False, "error": f"Connection refused: {e}"[:200]} @@ -610,6 +851,28 @@ def setup_calendar_routes() -> APIRouter: from src.caldav_sync import sync_caldav return await sync_caldav(owner) + @router.delete("/calendars/{cal_id}") + async def delete_calendar(cal_id: str, request: Request): + owner = _require_user(request) + db = SessionLocal() + try: + cal = db.query(CalendarCal).filter( + CalendarCal.id == cal_id, + CalendarCal.owner == owner, + ).first() + if not cal: + raise HTTPException(404, "Calendar not found") + db.delete(cal) + db.commit() + return {"ok": True} + except HTTPException: + raise + except Exception as e: + logger.error("Failed to delete calendar %s: %s", cal_id, e) + raise HTTPException(500, "Failed to delete calendar") + finally: + db.close() + @router.get("/calendars") async def list_calendars(request: Request): owner = _require_user(request) @@ -618,7 +881,7 @@ def setup_calendar_routes() -> APIRouter: _ensure_default_calendar(db, owner) cals = db.query(CalendarCal).filter(CalendarCal.owner == owner).all() return {"calendars": [ - {"name": c.name, "href": c.id, "color": c.color} + {"name": c.name, "href": c.id, "color": c.color, "source": c.source} for c in cals ]} except HTTPException: @@ -681,8 +944,12 @@ def setup_calendar_routes() -> APIRouter: expanded.extend(_expand_rrule(e, start_dt, end_dt)) # Sort by occurrence start time for consistent frontend ordering. + truncated = any(e.get("truncated") for e in expanded) expanded.sort(key=lambda d: d["dtstart"]) - return {"events": expanded} + response: dict = {"events": expanded} + if truncated: + response["truncated"] = True + return response except HTTPException: raise except Exception as e: @@ -739,6 +1006,16 @@ def setup_calendar_routes() -> APIRouter: ) db.add(ev) db.commit() + if cal.source == "caldav": + # Push the new event to the remote so it appears on the user's + # other devices — the sync is otherwise pull-only (#800). + from src.caldav_writeback import writeback_event + await writeback_event(owner, cal.source, cal.id, { + "uid": uid, "summary": data.summary, "description": data.description, + "location": data.location, "dtstart": dtstart, "dtend": dtend, + "all_day": data.all_day, "is_utc": _is_utc and not data.all_day, + "rrule": data.rrule or "", + }) return {"ok": True, "uid": uid} except HTTPException: raise @@ -785,6 +1062,14 @@ def setup_calendar_routes() -> APIRouter: if data.color is not None: ev.color = data.color if data.color else None db.commit() + cal = db.query(CalendarCal).filter(CalendarCal.id == ev.calendar_id).first() + if cal and cal.source == "caldav": + from src.caldav_writeback import writeback_event + await writeback_event(owner, cal.source, cal.id, { + "uid": ev.uid, "summary": ev.summary, "description": ev.description, + "location": ev.location, "dtstart": ev.dtstart, "dtend": ev.dtend, + "all_day": ev.all_day, "is_utc": ev.is_utc, "rrule": ev.rrule or "", + }) return {"ok": True} except HTTPException: raise @@ -805,8 +1090,15 @@ def setup_calendar_routes() -> APIRouter: db = SessionLocal() try: ev = _get_or_404_event(db, base_uid, owner) + # Capture what the remote push needs BEFORE the row is gone. + _cal = db.query(CalendarCal).filter(CalendarCal.id == ev.calendar_id).first() + _is_caldav = bool(_cal and _cal.source == "caldav") + _cal_id, _ev_uid = ev.calendar_id, ev.uid db.delete(ev) db.commit() + if _is_caldav: + from src.caldav_writeback import writeback_event + await writeback_event(owner, "caldav", _cal_id, {"uid": _ev_uid}, delete=True) return {"ok": True} except HTTPException: raise @@ -878,9 +1170,9 @@ def setup_calendar_routes() -> APIRouter: finally: db.close() - # 10 MB hard cap on ICS upload. Loading the whole file into memory is - # unavoidable with python-icalendar, so an unbounded upload would OOM. - _ICS_MAX_BYTES = 10 * 1024 * 1024 + # Hard cap on ICS upload (ICS_MAX_BYTES, default 10 MB). Loading the whole + # file into memory is unavoidable with python-icalendar, so an unbounded + # upload would OOM. @router.post("/import") async def import_ics(request: Request, file: UploadFile = File(...), calendar_name: str = ""): @@ -890,9 +1182,7 @@ def setup_calendar_routes() -> APIRouter: owner = _require_user(request) db = SessionLocal() try: - content = await file.read() - if len(content) > _ICS_MAX_BYTES: - raise HTTPException(413, f"ICS file too large (max {_ICS_MAX_BYTES // (1024*1024)} MB)") + content = await read_upload_limited(file, ICS_MAX_BYTES, "ICS file") try: cal_data = iCal.from_ical(content) except Exception as e: @@ -938,7 +1228,12 @@ def setup_calendar_routes() -> APIRouter: source_uid = str(comp.get("uid", "")) or None if source_uid: src_dtstart = dtstart.dt - naive_src = src_dtstart.replace(tzinfo=None) if hasattr(src_dtstart, 'tzinfo') and src_dtstart.tzinfo else src_dtstart + # Normalize to the SAME naive form import_ics stores, so a + # re-import of a tz-aware event matches the existing row. + # The old code stripped tzinfo WITHOUT converting to UTC + # (wall clock), while storage converts to UTC first, so + # every re-import of a TZID event created a duplicate. + naive_src = _ics_naive_dtstart(src_dtstart) existing = ( db.query(CalendarEvent) .filter( @@ -1032,34 +1327,37 @@ def setup_calendar_routes() -> APIRouter: "BEGIN:VCALENDAR", "VERSION:2.0", "PRODID:-//Odysseus//Calendar//EN", - f"X-WR-CALNAME:{cal.name}", + f"X-WR-CALNAME:{_ics_escape(cal.name)}", ] for ev in events: lines.append("BEGIN:VEVENT") lines.append(f"UID:{ev.uid}") - lines.append(f"SUMMARY:{ev.summary or ''}") + lines.append(f"SUMMARY:{_ics_escape(ev.summary or '')}") if ev.all_day: lines.append(f"DTSTART;VALUE=DATE:{ev.dtstart.strftime('%Y%m%d')}") lines.append(f"DTEND;VALUE=DATE:{ev.dtend.strftime('%Y%m%d')}") else: - lines.append(f"DTSTART:{ev.dtstart.strftime('%Y%m%dT%H%M%S')}") - lines.append(f"DTEND:{ev.dtend.strftime('%Y%m%dT%H%M%S')}") + _dt_suffix = "Z" if getattr(ev, "is_utc", False) else "" + lines.append(f"DTSTART:{ev.dtstart.strftime('%Y%m%dT%H%M%S')}{_dt_suffix}") + lines.append(f"DTEND:{ev.dtend.strftime('%Y%m%dT%H%M%S')}{_dt_suffix}") if ev.description: - desc = ev.description.replace(chr(10), '\\n') - lines.append(f"DESCRIPTION:{desc}") + lines.append(f"DESCRIPTION:{_ics_escape(ev.description)}") if ev.location: - lines.append(f"LOCATION:{ev.location}") + lines.append(f"LOCATION:{_ics_escape(ev.location)}") if ev.rrule: lines.append(f"RRULE:{ev.rrule}") lines.append("END:VEVENT") lines.append("END:VCALENDAR") ics_data = "\r\n".join(lines) - safe_name = cal.name.replace(" ", "_").replace("/", "_") + download_name = _safe_ics_filename(cal.name) return Response( content=ics_data, media_type="text/calendar", - headers={"Content-Disposition": f'attachment; filename="{safe_name}.ics"'}, + headers={ + "Content-Disposition": f'attachment; filename="{download_name}"', + "X-Content-Type-Options": "nosniff", + }, ) except HTTPException: raise @@ -1081,7 +1379,7 @@ def setup_calendar_routes() -> APIRouter: "tomorrow", "next Tuesday", "in 30 minutes" resolve correctly. Uses the "utility" endpoint (small / fast model) to keep latency low. """ - _require_user(request) + owner = _require_user(request) from src.endpoint_resolver import resolve_endpoint from src.llm_core import llm_call_async from src.text_helpers import strip_think @@ -1092,23 +1390,36 @@ def setup_calendar_routes() -> APIRouter: text = (body.get("text") or "").strip() if not text: raise HTTPException(400, "text is required") - tz_hint = (body.get("tz") or "").strip() + from src.user_time import ( + clear_user_time_context, + current_datetime_prompt, + now_user_local, + set_user_tz_name, + set_user_tz_offset, + ) - url, model, headers = resolve_endpoint("utility") + clear_user_time_context() + tz_hint = (body.get("tz") or "").strip() + if body.get("tz_offset") is not None: + set_user_tz_offset(body.get("tz_offset")) + if tz_hint: + set_user_tz_name(tz_hint) + + url, model, headers = resolve_endpoint("utility", owner=owner or None) if not url: - url, model, headers = resolve_endpoint("default") + url, model, headers = resolve_endpoint("default", owner=owner or None) if not url or not model: return {"ok": False, "error": "No LLM endpoint configured"} - now = datetime.now() + now = now_user_local() now_iso = now.strftime("%Y-%m-%dT%H:%M:%S") # The model gets only the schema it needs to fill out; we re-validate # everything client-side too. system_prompt = ( - "You are a calendar event parser. Read the user's one-line " + current_datetime_prompt() + + "You are a calendar event parser. Read the user's one-line " "description and emit STRICT JSON describing the event. " - f"Today is {now.strftime('%A, %Y-%m-%d')} ({now_iso}). " - + (f"User timezone: {tz_hint}. " if tz_hint else "") + f"The current user-local timestamp is {now_iso}. " + "Resolve relative dates (\"tomorrow\", \"friday\", \"next monday\", " "\"in 30 minutes\") against today. Default duration is 60 minutes " "when no end time is given. If the text mentions a date with no " diff --git a/routes/chat_helpers.py b/routes/chat_helpers.py index 7e7a76432..0b1c5d8ba 100644 --- a/routes/chat_helpers.py +++ b/routes/chat_helpers.py @@ -3,6 +3,7 @@ import asyncio import json import logging +import os import re from dataclasses import dataclass, field from typing import Any, Optional @@ -11,6 +12,7 @@ from core.models import ChatMessage from core.database import SessionLocal from core.database import Session as DBSession, ModelEndpoint from src.llm_core import normalize_model_id +from src.endpoint_resolver import normalize_base from src.context_compactor import maybe_compact, trim_for_context from src.auth_helpers import get_current_user from src.prompt_security import untrusted_context_message @@ -73,7 +75,7 @@ def _enforce_chat_privileges(request, sess) -> None: allowlist, or HTTPException(429) if the user has hit their daily message cap. No-op for unauthenticated callers or when auth_manager is absent (single-user mode). Admins receive ADMIN_PRIVILEGES from get_privileges, - which means empty allowed_models / zero cap → no-op for them. + which means unrestricted allowed_models / zero cap -> no-op for them. """ try: user = get_current_user(request) @@ -86,8 +88,18 @@ def _enforce_chat_privileges(request, sess) -> None: return privs = auth_manager.get_privileges(user) or {} - allowed = privs.get("allowed_models") or [] - if allowed and sess.model and sess.model not in allowed: + + # Explicit "block everything" sentinel takes precedence over the + # allowlist — it's the only way to distinguish "user clicked [None]" + # (block all) from "user clicked [All]" (no restriction), since both + # otherwise produce an empty `allowed_models` list. + if privs.get("block_all_models"): + raise HTTPException(403, f"Your account is not allowed to use model '{sess.model}'.") + + allowed_raw = privs.get("allowed_models") + allowed = allowed_raw if isinstance(allowed_raw, list) else [] + restricted = bool(privs.get("allowed_models_restricted")) or bool(allowed) + if restricted and sess.model and sess.model not in allowed: raise HTTPException(403, f"Your account is not allowed to use model '{sess.model}'.") cap = int(privs.get("max_messages_per_day") or 0) @@ -119,7 +131,7 @@ def needs_auto_name(name: str) -> bool: if name.startswith("Chat:") or name == "Chat": return True # Default frontend name: "modelname HH:MM:SS AM/PM" - if re.match(r'^.+ \d{1,2}:\d{2}:\d{2}\s*(AM|PM)$', name): + if re.match(r"^.+ \d{1,2}:\d{2}:\d{2}(\s*(AM|PM))?$", name, re.IGNORECASE): return True return False @@ -146,9 +158,13 @@ async def auto_name_session(session_manager, sess): if not first_msg: return + owner = getattr(sess, "owner", None) t_url, t_model, t_headers = resolve_task_endpoint( - sess.endpoint_url, sess.model, sess.headers, + sess.endpoint_url, sess.model, sess.headers, owner=owner, ) + if not t_model: + logger.debug("[auto-name] No model provided, skipping") + return # max_tokens big enough that reasoning models (Minimax M2, # DeepSeek R1, QwQ, etc.) have headroom for @@ -188,14 +204,26 @@ def try_fallback_endpoint(sess, session_id: str) -> dict | None: Returns {"model": ..., "endpoint_url": ..., "endpoint_name": ...} or None. """ import requests as _req - from src.endpoint_resolver import build_chat_url, build_headers, build_models_url, normalize_base + from src.endpoint_resolver import ( + build_chat_url, + build_headers, + build_models_url, + normalize_base, + resolve_endpoint_runtime, + ) + from src.chatgpt_subscription import is_chatgpt_subscription_base current_url = sess.endpoint_url or "" + owner = getattr(sess, "owner", None) db = SessionLocal() try: - endpoints = db.query(ModelEndpoint).filter( + q = db.query(ModelEndpoint).filter( ModelEndpoint.is_enabled == True - ).all() + ) + if owner: + from src.auth_helpers import owner_filter + q = owner_filter(q, ModelEndpoint, owner) + endpoints = q.all() finally: db.close() @@ -204,26 +232,33 @@ def try_fallback_endpoint(sess, session_id: str) -> dict | None: # Skip current endpoint if current_url and base in current_url: continue - # Quick ping - ping_url = build_models_url(base) - headers = build_headers(ep.api_key, base) try: - r = _req.get(ping_url, headers=headers, timeout=5) - r.raise_for_status() - data = r.json() - models = [m.get("id") for m in (data.get("data") or []) if m.get("id")] - if not models: - models = [ - m.get("name") or m.get("model") - for m in (data.get("models") or []) - if m.get("name") or m.get("model") - ] + base, api_key = resolve_endpoint_runtime(ep, owner=owner) + except Exception: + continue + ping_url = build_models_url(base) + headers = build_headers(api_key, base) + try: + if ping_url: + r = _req.get(ping_url, headers=headers, timeout=5) + r.raise_for_status() + data = r.json() + models = [m.get("id") for m in (data.get("data") or []) if m.get("id")] + if not models: + models = [ + m.get("name") or m.get("model") + for m in (data.get("models") or []) + if m.get("name") or m.get("model") + ] + else: + models = json.loads(ep.cached_models or "[]") if not models: continue # Found a working endpoint — update session new_model = models[0] chat_url = build_chat_url(base) - new_headers = build_headers(ep.api_key, base) + new_headers = build_headers(api_key, base) + persisted_headers = {} if is_chatgpt_subscription_base(base) else new_headers sess.model = new_model sess.endpoint_url = chat_url @@ -235,7 +270,7 @@ def try_fallback_endpoint(sess, session_id: str) -> dict | None: _db.query(DBSession).filter(DBSession.id == session_id).update({ "model": new_model, "endpoint_url": chat_url, - "headers": json.dumps(new_headers), + "headers": persisted_headers, }) _db.commit() finally: @@ -269,11 +304,16 @@ def extract_preset(chat_handler, preset_id) -> PresetInfo: async def preprocess( chat_handler, message, att_ids, sess, auto_opened_docs: Optional[list] = None, + allow_tool_preprocessing: bool = True, ) -> PreprocessedMessage: """Run chat_handler.preprocess_message and wrap the result.""" enhanced, user_content, text_ctx, yt_transcripts, att_meta = ( await chat_handler.preprocess_message( - message, att_ids, sess, auto_opened_docs=auto_opened_docs + message, + att_ids, + sess, + auto_opened_docs=auto_opened_docs, + allow_tool_preprocessing=allow_tool_preprocessing, ) ) return PreprocessedMessage( @@ -306,34 +346,157 @@ def fire_message_event(request, webhook_manager, session_id: str, sess, message: fire_event("message_sent", user) -def resolve_session_auth(sess, session_id: str): - """Ensure session has auth headers — resolve from endpoint DB if missing.""" - has_auth = sess.headers and isinstance(sess.headers, dict) and any( - k.lower() in ('authorization', 'x-api-key') for k in sess.headers +def _session_url_matches_endpoint(session_url: str, endpoint_base: str) -> bool: + if not session_url or not endpoint_base: + return False + try: + from src.endpoint_resolver import build_chat_url, normalize_base + + sess_url = session_url.rstrip("/") + base = normalize_base(endpoint_base).rstrip("/") + return sess_url in { + base, + base + "/chat/completions", + build_chat_url(base).rstrip("/"), + } + except Exception: + return False + + +def _has_auth_keys(headers) -> bool: + """True if a headers dict carries an Authorization/x-api-key entry.""" + return isinstance(headers, dict) and any( + k.lower() in ('authorization', 'x-api-key') for k in headers ) - if has_auth: + + +def resolve_session_auth(sess, session_id: str, owner: Optional[str] = None): + """Ensure session has auth headers — resolve from endpoint DB if missing.""" + try: + from src.chatgpt_subscription import is_chatgpt_subscription_base + is_chatgpt_subscription = is_chatgpt_subscription_base(getattr(sess, "endpoint_url", "") or "") + except Exception: + is_chatgpt_subscription = False + has_auth = _has_auth_keys(sess.headers) + if has_auth and not is_chatgpt_subscription: return try: - from src.endpoint_resolver import build_headers + from src.endpoint_resolver import build_headers, resolve_endpoint_runtime db = SessionLocal() try: - domain = sess.endpoint_url.split("//")[1].split("/")[0] if "//" in sess.endpoint_url else "" - if domain: - ep = db.query(ModelEndpoint).filter(ModelEndpoint.base_url.contains(domain)).first() - if ep and ep.api_key: - sess.headers = build_headers(ep.api_key, ep.base_url) - db.query(DBSession).filter(DBSession.id == session_id).update( - {"headers": json.dumps(sess.headers)} - ) - db.commit() - logger.info(f"Resolved and persisted auth headers for session {session_id} from endpoint {ep.name}") + target_url = getattr(sess, "endpoint_url", "") or "" + if not target_url: + return + q = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True) + if owner: + # Missing headers usually means "recover from the saved endpoint". + # Scope that lookup to the session owner, otherwise two users + # with similar endpoint URLs can borrow each other's API key. + from src.auth_helpers import owner_filter + q = owner_filter(q, ModelEndpoint, owner) + for ep in q.all(): + if not _session_url_matches_endpoint(target_url, ep.base_url or ""): + continue + try: + base, api_key = resolve_endpoint_runtime(ep, owner=owner) + except Exception as e: + logger.warning("Failed to resolve provider auth for session %s: %s", session_id, e) + return + if not api_key: + # No usable key (e.g. ChatGPT Subscription needs re-auth). + return + sess.headers = build_headers(api_key, base) + if is_chatgpt_subscription: + # The bearer is short-lived and re-resolved per request, so it + # stays request-local and is never written to the plaintext + # sessions.headers column. Proactively strip any bearer an + # older code path may have persisted so it does not linger. + stale_q = db.query(DBSession).filter(DBSession.id == session_id) + if owner: + stale_q = stale_q.filter(DBSession.owner == owner) + stored = stale_q.first() + if stored is not None and _has_auth_keys(stored.headers): + stale_q.update({"headers": {}}) + db.commit() + logger.info(f"Cleared persisted ChatGPT Subscription bearer from session {session_id}") + logger.debug(f"Resolved request-local ChatGPT Subscription auth for session {session_id}") + return + update_q = db.query(DBSession).filter(DBSession.id == session_id) + if owner: + update_q = update_q.filter(DBSession.owner == owner) + update_q.update({"headers": sess.headers}) + db.commit() + logger.info(f"Resolved and persisted auth headers for session {session_id} from endpoint {ep.name}") + return finally: db.close() except Exception as e: logger.warning(f"Failed to resolve session headers: {e}") +def _match_cached_model_id(requested: str, models) -> Optional[str]: + if not requested or not models: + return None + model_ids = [str(m) for m in models if m] + if requested in model_ids: + return requested + + req_base = os.path.basename(requested.rstrip("/")) + for model_id in model_ids: + if os.path.basename(model_id.rstrip("/")) == req_base: + return model_id + return None + + +def _normalize_model_id_from_cache(sess) -> Optional[str]: + """Use stored endpoint model IDs before falling back to a live /models probe.""" + endpoint_url = getattr(sess, "endpoint_url", "") or "" + requested = getattr(sess, "model", "") or "" + if not endpoint_url or not requested: + return None + + try: + session_base = normalize_base(endpoint_url) + except Exception: + session_base = endpoint_url.rstrip("/") + if not session_base: + return None + + db = SessionLocal() + try: + q = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True) + owner = getattr(sess, "owner", None) + if owner: + from src.auth_helpers import owner_filter + q = owner_filter(q, ModelEndpoint, owner) + endpoints = q.all() + for ep in endpoints: + try: + if normalize_base(getattr(ep, "base_url", "") or "") != session_base: + continue + except Exception: + continue + + raw_models = getattr(ep, "cached_models", None) + if not raw_models: + continue + try: + models = json.loads(raw_models) if isinstance(raw_models, str) else raw_models + except Exception: + continue + + matched = _match_cached_model_id(requested, models) + if matched: + return matched + except Exception as e: + logger.debug("Cached model normalization skipped: %s", e) + finally: + db.close() + + return None + + async def build_chat_context( sess, request, @@ -354,6 +517,7 @@ async def build_chat_context( webhook_manager=None, use_enhanced_message: bool = False, agent_mode: bool = False, + allow_tool_preprocessing: bool = True, ) -> ChatContext: """Build the full context (preface + messages) for an LLM call. @@ -371,6 +535,7 @@ async def build_chat_context( preprocessed = await preprocess( chat_handler, message, att_ids or [], sess, auto_opened_docs=auto_opened_docs, + allow_tool_preprocessing=allow_tool_preprocessing, ) # Add user message to history @@ -389,6 +554,9 @@ async def build_chat_context( # Skills injection respects its own enable toggle (mirrors memory_enabled). # When off, the "Available skills" index is not added to the prompt. skills_enabled = not incognito and uprefs.get("skills_enabled", True) + if not allow_tool_preprocessing: + mem_enabled = False + skills_enabled = False logger.debug( "Memory enabled=%s for user=%s (incognito=%s, no_memory=%s, pref=%s)", mem_enabled, user, incognito, no_memory, uprefs.get("memory_enabled", "NOT_SET"), @@ -396,11 +564,11 @@ async def build_chat_context( # Use RAG? use_rag_val = (str(use_rag).lower() != "false") if use_rag is not None else True - if incognito: + if incognito or not allow_tool_preprocessing: use_rag_val = False # If pre-fetched search context was provided (compare mode), skip live web search - skip_web = bool(search_context) + skip_web = bool(search_context) or not allow_tool_preprocessing # Build context preface # The stream path uses enhanced_message (with CoT/preprocessing applied), @@ -427,15 +595,20 @@ async def build_chat_context( used_memories = getattr(chat_processor, '_last_used_memories', []) # Inject pre-fetched search context (compare mode) - if search_context: + if search_context and allow_tool_preprocessing: preface.append(untrusted_context_message("prefetched search context", search_context)) # YouTube transcripts for transcript in preprocessed.youtube_transcripts: preface.append(untrusted_context_message("youtube transcript", transcript)) - # Normalize model ID - norm = normalize_model_id(sess.endpoint_url, sess.model) + # Normalize model ID. Prefer cached endpoint models so group chat does not + # re-hit slow local /models endpoints on every participant turn. + norm = _normalize_model_id_from_cache(sess) or normalize_model_id( + sess.endpoint_url, + sess.model, + owner=getattr(sess, "owner", None), + ) if norm: sess.model = norm @@ -444,7 +617,7 @@ async def build_chat_context( # Auto-compact messages, context_length, was_compacted = await maybe_compact( - sess, sess.endpoint_url, sess.model, messages, sess.headers, + sess, sess.endpoint_url, sess.model, messages, sess.headers, owner=user, ) messages = trim_for_context(messages, context_length) @@ -494,6 +667,8 @@ def _normalize_thinking(text: str) -> str: import re if not text: return text + from src.text_helpers import normalize_thinking_markup + text = normalize_thinking_markup(text) reasoning_prefix_re = re.compile( r'^\s*(?:thinking(?:\s+process)?\s*:|the user |i need |i should |i will |they are |the question |i can )', re.IGNORECASE, @@ -604,6 +779,10 @@ def _extract_thinking_meta(text: str) -> dict | None: import re if not text: return None + from src.text_helpers import normalize_thinking_markup + original_text = text + text = normalize_thinking_markup(text) + normalized_changed = text != original_text # Check for tags (native or injected) time_match = re.search(r' dict | None: if thinking and reply: return {"thinking": thinking, "reply": reply, "time": think_time} + if normalized_changed and text.strip() and text.strip() != original_text.strip(): + return {"thinking": "", "reply": text.strip(), "time": think_time} + return None @@ -642,7 +824,8 @@ def clean_thinking_for_save(content: str, metadata: dict | None = None) -> tuple md = dict(metadata) if metadata else {} info = _extract_thinking_meta(content) if info: - md["thinking"] = info["thinking"] + if info.get("thinking"): + md["thinking"] = info["thinking"] if info.get("time"): md["thinking_time"] = info["time"] return info["reply"], md @@ -667,7 +850,19 @@ def save_assistant_response( ): """Add assistant response to session history. In incognito mode, keeps in-memory context but skips DB persistence.""" md = dict(last_metrics) if last_metrics else {} - md["model"] = sess.model + def _model_value(value) -> str: + if value is None: + return "" + if not isinstance(value, str): + value = str(value) + return value.strip() + + requested_model = _model_value(md.get("requested_model") or md.get("selected_model") or getattr(sess, "model", "")) + actual_model = _model_value(md.get("model") or md.get("actual_model") or requested_model) + if requested_model: + md["requested_model"] = requested_model + if actual_model: + md["model"] = actual_model if character_name: md["character_name"] = character_name if web_sources: @@ -686,8 +881,10 @@ def save_assistant_response( # Extract thinking into metadata (don't pollute message content with tags) _think_info = _extract_thinking_meta(full_response) if _think_info: - md["thinking"] = _think_info["thinking"] - md["thinking_time"] = _think_info.get("time") + if _think_info.get("thinking"): + md["thinking"] = _think_info["thinking"] + if _think_info.get("time"): + md["thinking_time"] = _think_info.get("time") _content = _think_info["reply"] else: _content = full_response @@ -734,16 +931,17 @@ def run_post_response_tasks( skills_manager=None, owner: str = None, extract_skills: bool = True, + allow_background_extraction: bool = True, ): """Fire background tasks after a completed response: memory extraction, webhooks, auto-name, skill extraction.""" # Memory extraction — only every 4th message pair to avoid excess LLM calls _msg_count = len(sess.history) if hasattr(sess, 'history') else 0 _should_extract = (_msg_count >= 4) and (_msg_count % 4 == 0) - if not incognito and not compare_mode and _should_extract and uprefs.get("auto_memory", True): + if allow_background_extraction and not incognito and not compare_mode and _should_extract and uprefs.get("auto_memory", True): from services.memory.memory_extractor import extract_and_store from src.task_endpoint import resolve_task_endpoint t_url, t_model, t_headers = resolve_task_endpoint( - sess.endpoint_url, sess.model, sess.headers, + sess.endpoint_url, sess.model, sess.headers, owner=owner, ) asyncio.create_task(extract_and_store( sess, memory_manager, memory_vector, @@ -766,6 +964,7 @@ def run_post_response_tasks( ) if ( extract_skills + and allow_background_extraction and auto_skills_enabled and not incognito and not compare_mode @@ -780,7 +979,7 @@ def run_post_response_tasks( from services.memory.skill_extractor import maybe_extract_skill from src.task_endpoint import resolve_task_endpoint s_url, s_model, s_headers = resolve_task_endpoint( - sess.endpoint_url, sess.model, sess.headers, + sess.endpoint_url, sess.model, sess.headers, owner=owner, ) logger.debug("[skill-extract] dispatching extractor (model=%s)", s_model) asyncio.create_task(maybe_extract_skill( diff --git a/routes/chat_routes.py b/routes/chat_routes.py index 3cdcb8586..a718d3fbe 100644 --- a/routes/chat_routes.py +++ b/routes/chat_routes.py @@ -2,6 +2,7 @@ import asyncio import json +import os import time import logging from datetime import datetime @@ -19,14 +20,17 @@ from src import agent_runs from src.model_context import estimate_tokens from src.chat_helpers import coerce_message_and_session from src.endpoint_resolver import normalize_base as _normalize_base, build_chat_url +from src.session_search import search_session_messages from src.prompt_security import untrusted_context_message from core.exceptions import SessionNotFoundError from src.auth_helpers import get_current_user from routes.session_routes import _verify_session_owner +from routes.document_helpers import _owner_session_filter from core.database import SessionLocal, get_session_mode, set_session_mode from core.database import Session as DBSession, ChatMessage as DBChatMessage from core.database import Document as DBDocument, ModelEndpoint from routes.research_routes import _resolve_research_endpoint +from routes.model_routes import _visible_models from routes.chat_helpers import ( resolve_session_auth, build_chat_context, @@ -35,12 +39,14 @@ from routes.chat_helpers import ( clean_thinking_for_save, _enforce_chat_privileges, ) -from src.action_intents import message_needs_tools as _message_needs_tools +from src.action_intents import classify_tool_intent as _classify_tool_intent +from src.tool_policy import build_effective_tool_policy logger = logging.getLogger(__name__) # Track active streams for partial-save safety net _active_streams: Dict[str, dict] = {} +_IMAGE_MODEL_PREFIXES = ("gpt-image", "dall-e", "chatgpt-image") def _stream_set(session_id: str, **fields) -> None: @@ -69,13 +75,17 @@ def _session_url_matches_endpoint(session_url: str, endpoint_base: str) -> bool: return sess in variants or sess.startswith(base + "/") -def _clear_orphaned_session_endpoint(sess) -> bool: +def _clear_orphaned_session_endpoint(sess, owner: str | None = None) -> bool: """Clear a session model if its endpoint was deleted from ModelEndpoint.""" if not getattr(sess, "endpoint_url", ""): return False db = SessionLocal() try: - endpoints = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True).all() + q = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True) + if owner: + from src.auth_helpers import owner_filter + q = owner_filter(q, ModelEndpoint, owner) + endpoints = q.all() for ep in endpoints: if _session_url_matches_endpoint(sess.endpoint_url or "", ep.base_url or ""): return False @@ -96,6 +106,197 @@ def _clear_orphaned_session_endpoint(sess) -> bool: db.close() +def _endpoint_cache_contains_model(endpoint, model: str) -> bool: + """Return True when a populated endpoint model cache includes ``model``. + + Empty/malformed caches are treated as unknown rather than a negative match + so older image endpoints without cached models still work. + """ + raw = getattr(endpoint, "cached_models", None) + if not raw: + return True + try: + models = json.loads(raw) if isinstance(raw, str) else raw + except Exception: + return True + if not isinstance(models, list) or not models: + return True + wanted = (model or "").strip() + return wanted in {str(item).strip() for item in models} + + +def _is_image_generation_session(sess, owner: str | None = None) -> bool: + """Whether this chat session should bypass text chat and generate images. + + Model-name prefixes are explicit image models. Endpoint type is only used + when the current session endpoint actually matches that image endpoint, and + when a populated endpoint model cache includes the selected model. This + prevents an image endpoint on the same host from misrouting ordinary text + models into the image-generation path. + """ + model = (getattr(sess, "model", "") or "").strip() + if any(model.lower().startswith(prefix) for prefix in _IMAGE_MODEL_PREFIXES): + return True + + endpoint_url = (getattr(sess, "endpoint_url", "") or "").strip() + if not endpoint_url: + return False + + db = SessionLocal() + try: + q = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True) + if owner: + from src.auth_helpers import owner_filter + q = owner_filter(q, ModelEndpoint, owner) + endpoints = q.all() + for endpoint in endpoints: + if (getattr(endpoint, "model_type", None) or "llm") != "image": + continue + if not _session_url_matches_endpoint(endpoint_url, getattr(endpoint, "base_url", "") or ""): + continue + if _endpoint_cache_contains_model(endpoint, model): + return True + except Exception: + return False + finally: + db.close() + return False + + +def _recover_empty_session_model(sess, session_id: str, owner: str | None = None) -> bool: + """Re-populate sess.model from the matching endpoint's cached models. + + Covers the window between endpoint setup and the first chat send: the + picker showed a model in the dropdown but the session record never got + written (Issue #587 — UI uses the cached endpoint list, not s.model). + For ChatGPT Subscription, also repairs stale OpenAI API model names such as + ``gpt-5`` that are not accepted by the Codex-backed ChatGPT account route. + """ + current_model = (getattr(sess, "model", "") or "").strip() + endpoint_url = (getattr(sess, "endpoint_url", "") or "").strip() + is_chatgpt_subscription = False + if current_model: + try: + from src.chatgpt_subscription import is_chatgpt_subscription_base + is_chatgpt_subscription = is_chatgpt_subscription_base(endpoint_url) + if not is_chatgpt_subscription: + return False + except Exception: + return False + db = SessionLocal() + try: + # Prefer the endpoint whose base URL matches the session — we know the + # user already pointed this session at that endpoint, so its first + # cached model is the most defensible default. + ep = None + if getattr(sess, "endpoint_url", ""): + q = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True) + if owner: + from src.auth_helpers import owner_filter + q = owner_filter(q, ModelEndpoint, owner) + endpoints = q.all() + for cand in endpoints: + if _session_url_matches_endpoint(sess.endpoint_url or "", cand.base_url or ""): + ep = cand + break + if not ep: + return False + if not is_chatgpt_subscription: + try: + from src.chatgpt_subscription import is_chatgpt_subscription_base + is_chatgpt_subscription = is_chatgpt_subscription_base(getattr(ep, "base_url", "") or endpoint_url) + except Exception: + is_chatgpt_subscription = False + try: + cached = json.loads(ep.cached_models) if isinstance(ep.cached_models, str) else (ep.cached_models or []) + except Exception: + cached = [] + if not cached: + visible = [] + else: + try: + visible = _visible_models(cached, getattr(ep, "hidden_models", None)) + except Exception: + visible = cached + if current_model and current_model in {str(item).strip() for item in visible}: + return False + if is_chatgpt_subscription: + live_models = [] + if getattr(ep, "provider_auth_id", None): + try: + from src.chatgpt_subscription import fetch_available_models + from src.endpoint_resolver import resolve_endpoint_runtime + _base, api_key = resolve_endpoint_runtime(ep, owner=owner) + if api_key: + live_models = fetch_available_models(api_key) + if live_models: + ep.cached_models = json.dumps(live_models) + db.commit() + except Exception: + live_models = [] + # ChatGPT Subscription recovery must use the live Codex catalog. + # Cached rows are only trusted above to avoid revalidating a model + # that is already present in the visible picker list. + cached = live_models + if not cached: + return False + try: + visible = _visible_models(cached, getattr(ep, "hidden_models", None)) + except Exception: + visible = cached + if current_model and current_model in {str(item).strip() for item in visible}: + return False + if not visible: + return False + model = visible[0] + if not isinstance(model, str) or not model.strip(): + return False + model = model.strip() + # Persist so the next request, websocket reconnect, or page reload + # picks up the same model (we'd otherwise re-pick on every send + # and silently switch on the user if the cached order shifts). + db_session_q = db.query(DBSession).filter(DBSession.id == session_id) + if owner: + db_session_q = db_session_q.filter(DBSession.owner == owner) + db_session = db_session_q.first() + if db_session: + db_session.model = model + db_session.updated_at = datetime.utcnow() + db.commit() + sess.model = model + logger.info( + "Recovered session model for %s — picked %r from endpoint %s", + session_id, model, ep.id, + ) + return True + except Exception as e: + db.rollback() + logger.warning("Failed to recover empty session model for %s: %s", session_id, e) + return False + finally: + db.close() + + +def _set_user_time_from_request(request: Request) -> None: + """Copy browser timezone headers into the per-request context. + + This is intentionally ephemeral: it is used only while building prompts + and running tools for this request. It is not persisted or logged. + """ + try: + tz_offset = request.headers.get("x-tz-offset") + tz_name = request.headers.get("x-tz-name") + from src.user_time import clear_user_time_context, set_user_tz_name, set_user_tz_offset + + clear_user_time_context() + if tz_offset is not None: + set_user_tz_offset(tz_offset) + if tz_name: + set_user_tz_name(tz_name) + except Exception: + pass + + def setup_chat_routes( session_manager, chat_handler, @@ -114,6 +315,8 @@ def setup_chat_routes( # ------------------------------------------------------------------ # @router.post("/api/chat", response_model=Dict[str, str]) async def chat_endpoint(request: Request, chat_request: ChatRequest) -> Dict[str, str]: + _set_user_time_from_request(request) + message = chat_request.message session = chat_request.session att_ids = chat_request.attachments or [] @@ -130,15 +333,31 @@ def setup_chat_routes( sess = session_manager.get_session(session) except KeyError: raise HTTPException(404, f"Session '{session}' not found") - if _clear_orphaned_session_endpoint(sess): + owner = get_current_user(request) + if _clear_orphaned_session_endpoint(sess, owner=owner): raise HTTPException(400, "Selected model endpoint was removed. Pick another model in Settings.") + # Empty model + live endpoint = setup race (Issue #587). Repair from + # the endpoint's cached model list before privilege checks, which + # otherwise see "" and behave inconsistently with the allowlist. + _recover_empty_session_model(sess, session, owner=owner) + if not getattr(sess, "model", "").strip(): + raise HTTPException( + 400, + "No model selected for this chat. Open the model picker and choose one before sending.", + ) + # Same allowed_models + daily-cap gate as chat_stream (mirror so the # non-streaming path can't be used to bypass). _enforce_chat_privileges(request, sess) + tool_policy = build_effective_tool_policy(last_user_message=message) + allow_tool_preprocessing = not tool_policy.block_all_tool_calls + # Inline memory command - memory_response = await chat_handler.handle_memory_command(sess, message) + memory_response = None + if not tool_policy.blocks("manage_memory"): + memory_response = await chat_handler.handle_memory_command(sess, message) if memory_response: return {"response": memory_response} @@ -152,10 +371,15 @@ def setup_chat_routes( use_web=use_web, time_filter=time_filter, webhook_manager=webhook_manager, + allow_tool_preprocessing=allow_tool_preprocessing, ) # Research injection - if use_research: + research_blocked_by_policy = ( + tool_policy.blocks("trigger_research") + or tool_policy.blocks("manage_research") + ) + if use_research and not research_blocked_by_policy: try: _r_ep, _r_model, _r_headers = _resolve_research_endpoint(sess) research_ctx = await research_handler.call_research_service( @@ -190,6 +414,7 @@ def setup_chat_routes( ctx.uprefs, memory_manager, memory_vector, webhook_manager, character_name=ctx.preset.character_name, owner=ctx.user, + allow_background_extraction=not tool_policy.block_all_tool_calls, ) return {"response": reply} @@ -211,16 +436,7 @@ def setup_chat_routes( except Exception as e: raise HTTPException(400, f"Request parsing error: {e}") - # Stash the user's UTC offset (in minutes east of UTC) from the - # frontend so tools like manage_notes interpret natural-language - # times in the USER's tz, not the server's. See calendar_routes. - try: - _tz_hdr = request.headers.get("x-tz-offset") - if _tz_hdr is not None: - from routes.calendar_routes import set_user_tz_offset - set_user_tz_offset(_tz_hdr) - except Exception: - pass + _set_user_time_from_request(request) form_data = await request.form() message = form_data.get("message") @@ -236,7 +452,25 @@ def setup_chat_routes( search_context = form_data.get("search_context") # pre-fetched web search results (compare mode) compare_mode = str(form_data.get("compare_mode", "")).lower() == "true" incognito = str(form_data.get("incognito", "")).lower() == "true" + plan_mode = str(form_data.get("plan_mode", "")).lower() == "true" chat_mode = str(form_data.get("mode", "")).lower() # 'chat' or 'agent' + # Workspace: confine the agent's file/shell tools to this folder. Validate + # it's a real directory; ignore (no confinement) otherwise. + workspace = (form_data.get("workspace") or "").strip() + if workspace: + _ws_real = os.path.realpath(os.path.expanduser(workspace)) + workspace = _ws_real if os.path.isdir(_ws_real) else "" + # Plan mode is a modifier on agent mode — it only makes sense with tools. + if plan_mode: + chat_mode = "agent" + # An approved plan being EXECUTED: the frontend sends the checklist back + # on each turn so we can pin it in context. This way a long plan on a + # weak model survives history truncation — the agent can always re-read + # the plan. Ignored while still proposing (plan_mode on). Capped so a + # huge plan can't blow the prompt. + approved_plan = "" + if not plan_mode: + approved_plan = (form_data.get("approved_plan") or "").strip()[:8192] # Did the USER explicitly pick agent mode? (vs. us auto-escalating # below). Skill extraction should only learn from real agent sessions, # not chats we quietly promoted for a notes/calendar intent. @@ -249,10 +483,15 @@ def setup_chat_routes( # its way through a plain chat request (and fail, especially with the # shell disabled). auto_escalated = False - if chat_mode == "chat" and isinstance(message, str) and _message_needs_tools(message): + _tool_intent = _classify_tool_intent(message) if isinstance(message, str) else None + if chat_mode == "chat" and _tool_intent and _tool_intent.needs_tools: chat_mode = "agent" auto_escalated = True - logger.info("chat→agent auto-escalation: message matched tool-intent pattern") + logger.info( + "chat→agent auto-escalation: category=%s reason=%s", + _tool_intent.category, + _tool_intent.reason, + ) active_doc_id = form_data.get("active_doc_id", "").strip() logger.info(f"[doc-inject] chat_mode={chat_mode}, active_doc_id={active_doc_id!r}") @@ -270,8 +509,21 @@ def setup_chat_routes( # but BEFORE loading. Prevents cross-user session hijack. _verify_session_owner(request, session) sess = session_manager.get_session(session) - if _clear_orphaned_session_endpoint(sess): + owner = get_current_user(request) + if _clear_orphaned_session_endpoint(sess, owner=owner): raise HTTPException(400, "Selected model endpoint was removed. Pick another model in Settings.") + # Issue #587: picker shows a model from the endpoint cache but + # s.model never made it onto the DB row (first-send race after + # endpoint setup, or a previous endpoint delete/recreate). Pull + # the first cached model off the matching endpoint so the + # upstream isn't called with model="" (which surfaces as a + # generic 401/503). + _recover_empty_session_model(sess, session, owner=owner) + if not getattr(sess, "model", "").strip(): + raise HTTPException( + 400, + "No model selected for this chat. Open the model picker and choose one before sending.", + ) except SessionNotFoundError as e: raise HTTPException(404, str(e)) except (ValueError, ValidationError): @@ -288,7 +540,7 @@ def setup_chat_routes( _enforce_chat_privileges(request, sess) # Ensure session has auth headers - resolve_session_auth(sess, session) + resolve_session_auth(sess, session, owner=get_current_user(request)) # Check for research_pending BEFORE mode persist overwrites it do_research = str(use_research).lower() == "true" @@ -297,11 +549,6 @@ def setup_chat_routes( do_research = True logger.info(f"Session {session} in research_pending — auto-triggering research") - # Persist session mode (research > agent > chat) - _effective_mode = 'research' if do_research else (chat_mode or 'chat') - if _effective_mode in ('agent', 'research', 'chat'): - set_session_mode(session, _effective_mode) - att_ids = [] if body and isinstance(body.get("attachments"), list): att_ids = [str(x) for x in body["attachments"]] @@ -312,6 +559,10 @@ def setup_chat_routes( pass no_memory = str(form_data.get("no_memory", "")).lower() == "true" + pre_context_tool_policy = build_effective_tool_policy( + last_user_message=message, + ) + allow_tool_preprocessing = not pre_context_tool_policy.block_all_tool_calls # Build shared context (stream path uses enhanced_message for context preface) ctx = await build_chat_context( @@ -333,6 +584,7 @@ def setup_chat_routes( # manage_skills (agent mode). In plain chat or incognito the # index would be useless / unwanted noise. agent_mode=(chat_mode == "agent"), + allow_tool_preprocessing=allow_tool_preprocessing, ) _research_flags = {"do": do_research} # Mutable container for generator scope @@ -343,18 +595,39 @@ def setup_chat_routes( try: if active_doc_id: logger.info(f"[doc-inject] active_doc_id from frontend: {active_doc_id}") - active_doc = _doc_db.query(DBDocument).filter( - DBDocument.id == active_doc_id, - ).first() + # Scope to the caller's documents. The session and in-memory + # fallbacks below are already owner/session-bound; this + # explicit-id path looked up by id alone, so a user could + # inject another user's document by passing its id. + _doc_q = _doc_db.query(DBDocument).filter(DBDocument.id == active_doc_id) + active_doc = _owner_session_filter(_doc_q, ctx.user).first() if active_doc: - logger.info(f"[doc-inject] found by ID: title={active_doc.title!r}, lang={active_doc.language!r}, is_active={active_doc.is_active}, content_len={len(active_doc.current_content or '')}") + doc_session = active_doc.session_id + doc_owner = getattr(active_doc, "owner", None) + if doc_owner and ctx.user and doc_owner != ctx.user: + logger.warning( + "[doc-inject] ignoring active_doc_id %s owned by another user", + active_doc_id, + ) + active_doc = None + elif doc_session and doc_session != session: + logger.warning( + "[doc-inject] ignoring stale active_doc_id %s from session %s while in session %s", + active_doc_id, + doc_session, + session, + ) + active_doc = None + else: + logger.info(f"[doc-inject] found by ID: title={active_doc.title!r}, lang={active_doc.language!r}, is_active={active_doc.is_active}, content_len={len(active_doc.current_content or '')}") else: logger.warning(f"[doc-inject] NOT FOUND by ID {active_doc_id}") if not active_doc: - active_doc = _doc_db.query(DBDocument).filter( + _session_doc_q = _doc_db.query(DBDocument).filter( DBDocument.session_id == session, DBDocument.is_active == True - ).order_by(DBDocument.updated_at.desc()).first() + ) + active_doc = _owner_session_filter(_session_doc_q, ctx.user).order_by(DBDocument.updated_at.desc()).first() if active_doc: logger.info(f"[doc-inject] found by session fallback: title={active_doc.title!r}") # Last resort: the document the agent itself just created/edited @@ -368,7 +641,8 @@ def setup_chat_routes( from src.tool_implementations import get_active_document _mem_id = get_active_document() if _mem_id: - cand = _doc_db.query(DBDocument).filter(DBDocument.id == _mem_id).first() + _mem_q = _doc_db.query(DBDocument).filter(DBDocument.id == _mem_id) + cand = _owner_session_filter(_mem_q, ctx.user).first() if cand and (not cand.session_id or cand.session_id == session): active_doc = cand logger.info(f"[doc-inject] found by in-memory active id: title={active_doc.title!r} (session_id={cand.session_id!r})") @@ -455,6 +729,32 @@ def setup_chat_routes( if chat_mode == 'chat': disabled_tools.update({"bash", "python", "read_file", "write_file", "web_search", "web_fetch", "search_chats", "manage_tasks"}) + # Plan mode: investigate read-only, propose a plan, don't mutate. Block + # every tool not on the read-only allowlist. (stream_agent_loop enforces + # this again + drops MCP, so this is belt-and-suspenders.) + if plan_mode: + from src.tool_security import plan_mode_disabled_tools + disabled_tools.update(plan_mode_disabled_tools()) + + tool_policy = build_effective_tool_policy( + disabled_tools=disabled_tools, + last_user_message=message, + ) + disabled_tools = tool_policy.all_disabled_names() + research_blocked_by_policy = bool( + tool_policy.blocks("trigger_research") + or tool_policy.blocks("manage_research") + ) + effective_do_research = bool( + do_research and _research_flags["do"] and not research_blocked_by_policy + ) + + # Persist session mode after policy/privilege gates so blocked research + # turns remain ordinary chat/agent streams and saved messages. + _effective_mode = 'research' if effective_do_research else (chat_mode or 'chat') + if _effective_mode in ('agent', 'research', 'chat'): + set_session_mode(session, _effective_mode) + async def stream_with_save() -> AsyncGenerator[str, None]: # _effective_mode is read-only here; closure captures it from # the outer scope. (Was `nonlocal` but never reassigned.) @@ -462,7 +762,7 @@ def setup_chat_routes( web_sources = ctx.web_sources # Register active stream for partial-save safety net - _active_streams[session] = {"status": "streaming", "partial": "", "query": message, "is_research": do_research, "mode": _effective_mode} + _active_streams[session] = {"status": "streaming", "partial": "", "query": message, "is_research": effective_do_research, "mode": _effective_mode} if ctx.preprocessed.attachment_meta: yield f"data: {json.dumps({'type': 'attachments', 'data': ctx.preprocessed.attachment_meta})}\n\n" @@ -486,7 +786,7 @@ def setup_chat_routes( yield f"data: {json.dumps({'type': 'memories_used', 'data': ctx.used_memories})}\n\n" # Run research as a background task (survives page refresh) - if do_research and _research_flags["do"]: + if effective_do_research: _r_ep, _r_model, _r_headers = _resolve_research_endpoint(sess) _auth_keys = list(_r_headers.keys()) if _r_headers else [] logger.info(f"Research endpoint resolved: model={_r_model}, endpoint={_r_ep}, auth_keys={_auth_keys}, sess_headers_keys={list(sess.headers.keys()) if isinstance(sess.headers, dict) else type(sess.headers)}") @@ -563,6 +863,7 @@ def setup_chat_routes( prior_findings=_prior_findings, prior_urls=_prior_urls, on_complete=_on_research_done, + owner=_user, ) _heartbeat_counter = 0 @@ -619,12 +920,12 @@ def setup_chat_routes( # output. Resolved once per request. try: from src.endpoint_resolver import resolve_chat_fallback_candidates - _fallback_candidates = resolve_chat_fallback_candidates() + _fallback_candidates = resolve_chat_fallback_candidates(owner=_user) except Exception: _fallback_candidates = [] # Send model name early so the frontend can show it during streaming - _model_suffix = "Research" if do_research else None + _model_suffix = "Research" if effective_do_research else None _model_info = {"type": "model_info", "model": sess.model} if _model_suffix: _model_info["suffix"] = _model_suffix @@ -632,29 +933,14 @@ def setup_chat_routes( _model_info["character_name"] = ctx.preset.character_name yield f'data: {json.dumps(_model_info)}\n\n' - # Detect image models and route directly to image generation - _IMAGE_MODEL_PREFIXES = ("gpt-image", "dall-e", "chatgpt-image") - _is_image_model = any(sess.model.lower().startswith(p) for p in _IMAGE_MODEL_PREFIXES) - - # Also check if the endpoint is registered as an image-type endpoint - if not _is_image_model: - try: - from src.endpoint_resolver import normalize_base as _nb - _ep_base = _nb(sess.endpoint_url) - _db = SessionLocal() - try: - _is_image_model = _db.query(ModelEndpoint).filter( - ModelEndpoint.model_type == "image", - ModelEndpoint.is_enabled == True, - ModelEndpoint.base_url.contains(_ep_base.split("://")[-1].split("/")[0]), - ).first() is not None - finally: - _db.close() - except Exception: - pass - - if _is_image_model: + if _is_image_generation_session(sess, owner=_user): from src.settings import get_setting + if tool_policy.blocks("generate_image"): + _blocked_msg = tool_policy.reason_for("generate_image") + yield f'data: {json.dumps({"delta": _blocked_msg})}\n\n' + yield "data: [DONE]\n\n" + _active_streams.pop(session, None) + return if not get_setting("image_gen_enabled", True): yield f'data: {json.dumps({"delta": "Image generation is disabled by the administrator."})}\n\n' yield "data: [DONE]\n\n" @@ -664,7 +950,7 @@ def setup_chat_routes( _user_msg = message or "" yield f'data: {json.dumps({"type": "tool_start", "tool": "generate_image", "command": _user_msg[:100]})}\n\n' yield ": heartbeat\n\n" - _img_result = await do_generate_image(f"{_user_msg}\n{sess.model}", session) + _img_result = await do_generate_image(f"{_user_msg}\n{sess.model}", session, owner=_user) _img_output = _img_result.get("results", _img_result.get("error", "")) _img_tool_data = {"type": "tool_output", "tool": "generate_image", "command": _user_msg[:100], "output": _img_output, "exit_code": 0 if "error" not in _img_result else 1} for _k in ("image_url", "image_id", "image_prompt", "image_model", "image_size", "image_quality"): @@ -688,6 +974,9 @@ def setup_chat_routes( return elif chat_mode == "chat": _chat_start = time.time() + _answered_by = None # set if the selected model failed and a fallback answered + _requested_model = sess.model + _actual_model = None # ── Chat mode: call stream_llm directly, NO tools, NO document access ── try: _chat_candidates = [(sess.endpoint_url, sess.model, sess.headers)] + _fallback_candidates @@ -708,16 +997,43 @@ def setup_chat_routes( try: data = json.loads(chunk[6:]) if "delta" in data: - full_response += data["delta"] - _stream_set(session, partial=full_response) + # Reasoning tokens arrive flagged thinking:true. + # Forward them so the client can show a thinking + # indicator, but don't fold them into the saved + # reply (mirrors the rewrite path below). + if not data.get("thinking"): + full_response += data["delta"] + _stream_set(session, partial=full_response) yield chunk + elif data.get("type") == "fallback": + # Selected model failed; a fallback answered. + # Forward the notice and remember the real model. + _answered_by = data.get("answered_by") or _answered_by + _actual_model = _actual_model or _answered_by + data["selected_model"] = data.get("selected_model") or _requested_model + yield chunk + elif data.get("type") == "model_actual": + _actual_model = data.get("model") or _actual_model + data["requested_model"] = _requested_model + yield f'data: {json.dumps(data)}\n\n' elif data.get("type") == "usage": last_metrics = data.get("data", {}) - last_metrics["model"] = sess.model + _reported_model = last_metrics.get("model") + last_metrics["requested_model"] = _requested_model + last_metrics["model"] = _reported_model or _actual_model or _answered_by or _requested_model if ctx.context_length and last_metrics.get("input_tokens"): pct = min(round((last_metrics["input_tokens"] / ctx.context_length) * 100, 1), 100.0) last_metrics["context_percent"] = pct last_metrics["context_length"] = ctx.context_length + # The frontend reads `tokens_per_second`; the raw usage event + # carries the backend's true gen speed as `gen_tps` (llama.cpp + # timings). Map it through so this direct-chat path shows real + # t/s instead of "n/a" → falling back to a bare token count. + if last_metrics.get("gen_tps") and not last_metrics.get("tokens_per_second"): + last_metrics["tokens_per_second"] = last_metrics["gen_tps"] + last_metrics["tps_source"] = "backend" + # Wall-clock response time for the stats popup ("Time"). + last_metrics.setdefault("response_time", round(time.time() - _chat_start, 2)) yield f'data: {json.dumps({"type": "metrics", "data": last_metrics})}\n\n' except json.JSONDecodeError: yield chunk @@ -741,7 +1057,8 @@ def setup_chat_routes( "tokens_per_second": _tps, "context_percent": _ctx_pct, "context_length": ctx.context_length, - "model": sess.model, + "model": _actual_model or _answered_by or _requested_model, + "requested_model": _requested_model, "usage_source": "estimated", } yield f'data: {json.dumps({"type": "metrics", "data": last_metrics})}\n\n' @@ -753,7 +1070,7 @@ def setup_chat_routes( rag_sources=ctx.rag_sources, research_sources=research_sources, used_memories=ctx.used_memories, - do_research=do_research, + do_research=effective_do_research, incognito=incognito, ) if _saved_id: @@ -763,14 +1080,22 @@ def setup_chat_routes( last_metrics, ctx.uprefs, memory_manager, memory_vector, webhook_manager, incognito=incognito, compare_mode=compare_mode, character_name=ctx.preset.character_name, - owner=_user, + owner=_user, + allow_background_extraction=not tool_policy.block_all_tool_calls, ) _stream_set(session, status="done") yield chunk except (asyncio.CancelledError, GeneratorExit): if full_response: logger.info("Client disconnected mid-stream (chat mode) for session %s, saving partial (%d chars)", session, len(full_response)) - _stopped_content, _stopped_md = clean_thinking_for_save(full_response, {"stopped": True, "model": sess.model}) + _stopped_content, _stopped_md = clean_thinking_for_save( + full_response, + { + "stopped": True, + "model": _actual_model or _answered_by or _requested_model, + "requested_model": _requested_model, + }, + ) sess.add_message(ChatMessage("assistant", _stopped_content, metadata=_stopped_md)) if not incognito: session_manager.save_sessions() @@ -781,9 +1106,20 @@ def setup_chat_routes( # ── Agent mode: full agent loop with tools ── _agent_rounds = 0 _agent_tool_calls = 0 + _answered_by = None # set if the selected model failed and a fallback answered + _requested_model = sess.model + _actual_model = None try: from src.settings import get_setting + from src.agent_tools import MAX_AGENT_ROUNDS as _DEFAULT_ROUNDS _tool_budget = int(get_setting("agent_max_tool_calls", 0)) + # Per-message round cap from settings; clamp defensively in + # case settings.json was hand-edited to a bad value. + try: + _max_rounds = int(get_setting("agent_max_rounds", _DEFAULT_ROUNDS) or _DEFAULT_ROUNDS) + except (TypeError, ValueError): + _max_rounds = _DEFAULT_ROUNDS + _max_rounds = max(1, min(_max_rounds, 200)) async for chunk in stream_agent_loop( sess.endpoint_url, @@ -794,19 +1130,28 @@ def setup_chat_routes( max_tokens=ctx.preset.max_tokens, prompt_type=preset_id, max_tool_calls=_tool_budget, + max_rounds=_max_rounds, context_length=ctx.context_length, active_document=active_doc, session_id=session, disabled_tools=disabled_tools if disabled_tools else None, + tool_policy=tool_policy, owner=_user, fallbacks=_fallback_candidates, + workspace=workspace or None, + plan_mode=plan_mode, + approved_plan=approved_plan or None, ): if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"): try: data = json.loads(chunk[6:]) if "delta" in data: - full_response += data["delta"] - _stream_set(session, partial=full_response) + # Reasoning tokens arrive flagged thinking:true. + # Forward them for the live indicator, but keep + # them out of the saved reply (same as chat mode). + if not data.get("thinking"): + full_response += data["delta"] + _stream_set(session, partial=full_response) yield chunk elif data.get("type") == "web_sources": web_sources = data.get("data", []) @@ -815,15 +1160,33 @@ def setup_chat_routes( "tool_start", "tool_output", "agent_step", "doc_stream_open", "doc_stream_delta", "doc_update", "doc_suggestions", "ui_control", + "rounds_exhausted", + "ask_user", + "plan_update", ): if data.get("type") == "agent_step": _agent_rounds = max(_agent_rounds, data.get("round", 1)) elif data.get("type") == "tool_start": _agent_tool_calls += 1 yield chunk + elif data.get("type") == "fallback": + # Selected model failed; a fallback answered. + # Forward the notice and remember the real + # model so metrics reflect it, not the masked + # selected model. + _answered_by = data.get("answered_by") or _answered_by + _actual_model = _actual_model or _answered_by + data["selected_model"] = data.get("selected_model") or _requested_model + yield chunk + elif data.get("type") == "model_actual": + _actual_model = data.get("model") or _actual_model + data["requested_model"] = _requested_model + yield f'data: {json.dumps(data)}\n\n' elif data.get("type") == "metrics": last_metrics = data.get("data", {}) - last_metrics["model"] = sess.model + _reported_model = last_metrics.get("model") + last_metrics["requested_model"] = last_metrics.get("requested_model") or _requested_model + last_metrics["model"] = _reported_model or _actual_model or _answered_by or _requested_model yield f'data: {json.dumps({"type": "metrics", "data": last_metrics})}\n\n' except json.JSONDecodeError: yield chunk @@ -851,6 +1214,7 @@ def setup_chat_routes( skills_manager=skills_manager, owner=_user, extract_skills=user_requested_agent, + allow_background_extraction=not tool_policy.block_all_tool_calls, ) _stream_set(session, status="done") yield chunk @@ -864,7 +1228,14 @@ def setup_chat_routes( try: if full_response: logger.info("Client disconnected mid-stream for session %s, saving partial response (%d chars)", session, len(full_response)) - _stopped_content2, _stopped_md2 = clean_thinking_for_save(full_response, {"stopped": True, "model": sess.model}) + _stopped_content2, _stopped_md2 = clean_thinking_for_save( + full_response, + { + "stopped": True, + "model": _actual_model or _answered_by or _requested_model, + "requested_model": _requested_model, + }, + ) sess.add_message(ChatMessage("assistant", _stopped_content2, metadata=_stopped_md2)) if not incognito: session_manager.save_sessions() @@ -883,11 +1254,30 @@ def setup_chat_routes( finally: _active_streams.pop(session, None) - # Run the stream as a DETACHED background task so it survives the client - # closing the tab / navigating away (true terminal-agent behavior). The - # SSE response just subscribes (replay buffered output + live); dropping - # the SSE only removes a subscriber — the run keeps going and saves the - # assistant message on completion regardless. Reconnect via /api/chat/resume. + # Compare panes are short-lived, single-shot generations whose sessions + # exist only to drive that one pane — there's nothing to "resume" and + # the user expects the pane's Stop button (which aborts the fetch, + # closing this SSE) to promptly cancel the upstream LLM call. Detaching + # them would keep burning upstream tokens/compute after the pane is + # stopped or the comparison is abandoned, and would surface a stale + # "still streaming" /resume target for a session nobody will revisit. + # + # So: stream them directly (no agent_runs wrapping). Starlette cancels + # the underlying async generator (raising CancelledError/GeneratorExit + # inside it) as soon as it notices the client disconnected — which the + # mode-specific except blocks above already handle by saving the + # partial response exactly once. This stops the upstream call promptly + # without waiting on the next streamed chunk. + # + # Normal chat/agent streams keep the DETACHED behavior below: they + # survive the client closing the tab / navigating away (true + # terminal-agent semantics). The SSE response just subscribes (replay + # buffered output + live); dropping the SSE only removes a subscriber — + # the run keeps going and saves the assistant message on completion + # regardless. Reconnect via /api/chat/resume. + if compare_mode: + return StreamingResponse(_safe_stream(), media_type="text/event-stream") + agent_runs.start(session, _safe_stream()) return StreamingResponse(agent_runs.subscribe(session), media_type="text/event-stream") @@ -920,11 +1310,15 @@ def setup_chat_routes( _verify_session_owner(request, session_id) # A detached run can still be going even if _active_streams was popped; # report it as active so the client knows to reconnect via /resume. - if session_id not in _active_streams: + # Read once via .get() to avoid a KeyError race between the membership + # check and the indexed read if a sibling stream's finally pops the + # entry in between (same pattern _stream_set already uses). + rec = _active_streams.get(session_id) + if rec is None: if agent_runs.is_active(session_id): return {"status": "streaming", "detached": True} raise HTTPException(404, "No active stream for this session") - return _active_streams[session_id] + return rec # ------------------------------------------------------------------ # # POST /api/inject_context @@ -954,45 +1348,16 @@ def setup_chat_routes( return [] _user = get_current_user(request) - query_term = q.strip() - db = SessionLocal() - try: - base_q = ( - db.query(DBChatMessage, DBSession.name) - .join(DBSession, DBChatMessage.session_id == DBSession.id) - .filter( - DBSession.archived == False, - DBChatMessage.content.ilike(f"%{query_term}%"), - DBChatMessage.role.in_(["user", "assistant"]), - ) + return [ + result.to_dict() + for result in search_session_messages( + q, + limit=limit, + owner=_user, + restrict_owner=_user is not None, + include_legacy_owner=False, ) - if _user: - base_q = base_q.filter(DBSession.owner == _user) - rows = base_q.order_by(DBChatMessage.timestamp.desc()).limit(limit).all() - - results = [] - for msg, session_name in rows: - content = msg.content or "" - lower_content = content.lower() - idx = lower_content.find(query_term.lower()) - if idx == -1: - snippet = content[:120] - else: - start = max(0, idx - 50) - end = min(len(content), idx + len(query_term) + 50) - snippet = ("..." if start > 0 else "") + content[start:end] + ("..." if end < len(content) else "") - - results.append({ - "session_id": msg.session_id, - "session_name": session_name or "Untitled", - "role": msg.role, - "content_snippet": snippet, - "timestamp": msg.timestamp.isoformat() if msg.timestamp else None, - }) - - return results - finally: - db.close() + ] # ------------------------------------------------------------------ # # POST /api/rewrite — lightweight rewrite of last AI message (no tools) @@ -1088,7 +1453,7 @@ def setup_chat_routes( db_msg = ( db.query(DBChatMessage) .filter(DBChatMessage.session_id == session_id, DBChatMessage.role == 'assistant') - .order_by(DBChatMessage.created_at.desc()) + .order_by(DBChatMessage.timestamp.desc()) .first() ) if db_msg: diff --git a/routes/chatgpt_subscription_routes.py b/routes/chatgpt_subscription_routes.py new file mode 100644 index 000000000..9c695b371 --- /dev/null +++ b/routes/chatgpt_subscription_routes.py @@ -0,0 +1,170 @@ +"""ChatGPT Subscription device-flow setup routes.""" + +import json +import logging +import uuid +from typing import Dict, Optional + +from fastapi import HTTPException, Request + +from core.database import ModelEndpoint, ProviderAuthSession, SessionLocal, utcnow_naive +from routes.device_flow import ( + DeviceFlowPoll, + DeviceFlowStart, + PendingDeviceFlowStore, + create_device_flow_router, +) +from src.auth_helpers import get_current_user +from src import chatgpt_subscription + +logger = logging.getLogger(__name__) + +_DEVICE_FLOW_STORE = PendingDeviceFlowStore() + + +def _provision_endpoint(tokens: Dict, owner: Optional[str]) -> Dict: + access_token = tokens.get("access_token") + refresh_token = tokens.get("refresh_token") + if not access_token or not refresh_token: + raise ValueError("ChatGPT token response was missing access_token or refresh_token") + + base = chatgpt_subscription.DEFAULT_CHATGPT_SUBSCRIPTION_BASE_URL + models = chatgpt_subscription.fetch_available_models(access_token) + if not models: + raise ValueError("ChatGPT Subscription connected, but no usable Codex models were discovered for this account.") + db = SessionLocal() + try: + auth = ( + db.query(ProviderAuthSession) + .filter( + ProviderAuthSession.provider == chatgpt_subscription.CHATGPT_SUBSCRIPTION_PROVIDER, + ProviderAuthSession.owner == owner, + ) + .first() + ) + if auth is None: + auth = ProviderAuthSession( + id=str(uuid.uuid4())[:8], + provider=chatgpt_subscription.CHATGPT_SUBSCRIPTION_PROVIDER, + owner=owner, + label="ChatGPT Subscription", + base_url=base, + auth_mode="chatgpt", + ) + db.add(auth) + auth.base_url = base + auth.access_token = access_token + auth.refresh_token = refresh_token + auth.last_refresh = utcnow_naive() + auth.auth_mode = "chatgpt" + + ep = ( + db.query(ModelEndpoint) + .filter( + ModelEndpoint.base_url == base, + ModelEndpoint.provider_auth_id == auth.id, + ModelEndpoint.owner == owner, + ) + .first() + ) + if ep is None: + ep = ModelEndpoint( + id=str(uuid.uuid4())[:8], + name="ChatGPT Subscription", + base_url=base, + model_type="llm", + endpoint_kind="api", + owner=owner, + ) + db.add(ep) + ep.name = "ChatGPT Subscription" + ep.base_url = base + ep.api_key = None + ep.provider_auth_id = auth.id + ep.is_enabled = True + ep.supports_tools = False + ep.model_type = "llm" + ep.endpoint_kind = "api" + ep.model_refresh_mode = "manual" + ep.cached_models = json.dumps(models) + db.commit() + result = { + "id": ep.id, + "name": ep.name, + "base_url": ep.base_url, + "models": models, + } + finally: + db.close() + + try: + from routes.model_routes import _invalidate_models_cache + + _invalidate_models_cache() + except Exception: + pass + return result + + +def _start_device_flow(request: Request, _form) -> DeviceFlowStart: + try: + data = chatgpt_subscription.request_device_code() + except Exception as exc: + raise chatgpt_subscription.to_http_exception(exc) + + device_auth_id = data.get("device_auth_id") + user_code = data.get("user_code") + if not device_auth_id or not user_code: + raise HTTPException(502, "ChatGPT did not return a complete device code") + verification_uri = data.get("verification_uri") or f"{chatgpt_subscription.CHATGPT_OAUTH_ISSUER}/codex/device" + return DeviceFlowStart( + pending={ + "device_auth_id": device_auth_id, + "user_code": user_code, + "owner": get_current_user(request) or None, + }, + response={ + "user_code": user_code, + "verification_uri": verification_uri, + }, + interval=int(data.get("interval") or 5), + expires_in=int(data.get("expires_in") or 900), + ) + + +def _poll_device_flow(_request: Request, pending: Dict) -> DeviceFlowPoll: + try: + data = chatgpt_subscription.poll_device_auth(pending["device_auth_id"], pending["user_code"]) + except Exception as exc: + logger.debug("ChatGPT device poll failed: %s", exc) + return DeviceFlowPoll.pending(str(exc)) + + authorization_code = data.get("authorization_code") + code_verifier = data.get("code_verifier") + if authorization_code and code_verifier: + try: + tokens = chatgpt_subscription.exchange_authorization_code(authorization_code, code_verifier) + result = _provision_endpoint(tokens, pending["owner"]) + except Exception as exc: + logger.exception("ChatGPT Subscription endpoint provisioning failed") + raise chatgpt_subscription.to_http_exception(exc) + return DeviceFlowPoll.authorized(result) + + err = data.get("error") or data.get("status") + if err in ("authorization_pending", "pending", None): + return DeviceFlowPoll.pending() + if err == "slow_down": + return DeviceFlowPoll.slow_down(int(data.get("interval") or 0) or None) + if err in ("expired_token", "access_denied", "denied"): + return DeviceFlowPoll.failed(err) + return DeviceFlowPoll.pending(err or "unknown") + + +def setup_chatgpt_subscription_routes(): + return create_device_flow_router( + prefix="/api/chatgpt-subscription", + tags=["chatgpt-subscription"], + store=_DEVICE_FLOW_STORE, + start_flow=_start_device_flow, + poll_flow=_poll_device_flow, + ) diff --git a/routes/codex_routes.py b/routes/codex_routes.py new file mode 100644 index 000000000..1afac02b9 --- /dev/null +++ b/routes/codex_routes.py @@ -0,0 +1,792 @@ +"""Codex integration routes. + +These are small HTTP surfaces intended for the Codex plugin/MCP bridge. They +reuse existing Odysseus helpers and enforce API-token scopes before touching +user data. +""" + +import asyncio +import json +import zipfile +from io import BytesIO +from pathlib import Path +from typing import Any + +from fastapi import APIRouter, BackgroundTasks, Body, HTTPException, Request +from fastapi.responses import StreamingResponse + +from src.auth_helpers import require_authenticated_request, require_user +from src.tool_implementations import do_manage_notes +from src.constants import COOKBOOK_STATE_FILE + + +COOKBOOK_READ_SCOPES = {"cookbook:read", "cookbook:launch"} +COOKBOOK_LAUNCH_SCOPES = {"cookbook:launch"} +TODO_READ_SCOPES = {"todos:read", "todos:write"} +TODO_WRITE_SCOPES = {"todos:write"} +EMAIL_READ_SCOPES = {"email:read", "email:draft", "email:send"} +EMAIL_DRAFT_SCOPES = {"email:draft", "email:send"} +EMAIL_SEND_SCOPES = {"email:send"} +MEMORY_READ_SCOPES = {"memory:read", "memory:write"} +MEMORY_WRITE_SCOPES = {"memory:write"} +CALENDAR_READ_SCOPES = {"calendar:read", "calendar:write"} +CALENDAR_WRITE_SCOPES = {"calendar:write"} +DOCS_READ_SCOPES = {"documents:read", "documents:write"} +DOCS_WRITE_SCOPES = {"documents:write"} +WRITE_ACTIONS = {"add", "create", "new", "save", "remind", "update", "delete", "toggle_item", "remove", "remove_item"} + + +async def _as_owner(request: Request, owner: str, fn, *args, **kwargs): + """Run an existing route handler with request.state.current_user temporarily + set to ``owner`` so its internal get_current_user/require_user calls see + the scope-gated owner (not the "api" pseudo-user the bearer middleware sets). + Restores the original value when done. Works for sync and async handlers.""" + orig = getattr(request.state, "current_user", None) + orig_api_token = getattr(request.state, "api_token", None) + request.state.current_user = owner + request.state.api_token = False + try: + result = fn(*args, **kwargs) + if asyncio.iscoroutine(result): + result = await result + return result + finally: + request.state.current_user = orig + if orig_api_token is None: + try: + delattr(request.state, "api_token") + except AttributeError: + pass + else: + request.state.api_token = orig_api_token + + +def _scope_owner(request: Request, allowed: set[str]) -> str: + """Return the data owner if the caller is allowed for this Codex action.""" + if getattr(request.state, "api_token", False): + scopes = set(getattr(request.state, "api_token_scopes", []) or []) + if not scopes.intersection(allowed): + required = " or ".join(sorted(allowed)) + raise HTTPException(403, f"API token missing required scope: {required}") + owner = getattr(request.state, "api_token_owner", None) + if not owner: + raise HTTPException(403, "API token has no owner") + return owner + return require_user(request) + + +def _find_endpoint(router: APIRouter | None, method: str, path: str): + if router is None: + return None + for route in getattr(router, "routes", []): + if getattr(route, "path", "") == path and method in getattr(route, "methods", set()): + return route.endpoint + return None + + +def setup_codex_routes( + email_router: APIRouter | None = None, + memory_router: APIRouter | None = None, + calendar_router: APIRouter | None = None, + document_router: APIRouter | None = None, +) -> APIRouter: + router = APIRouter(prefix="/api/codex", tags=["codex"]) + email_list_endpoint = _find_endpoint(email_router, "GET", "/api/email/list") + email_read_endpoint = _find_endpoint(email_router, "GET", "/api/email/read/{uid}") + email_send_endpoint = _find_endpoint(email_router, "POST", "/api/email/send") + email_draft_endpoint = _find_endpoint(email_router, "POST", "/api/email/draft") + memory_list_endpoint = _find_endpoint(memory_router, "GET", "/api/memory") + memory_add_endpoint = _find_endpoint(memory_router, "POST", "/api/memory/add") + calendar_list_events = _find_endpoint(calendar_router, "GET", "/api/calendar/events") + calendar_create_event = _find_endpoint(calendar_router, "POST", "/api/calendar/events") + documents_library_endpoint = _find_endpoint(document_router, "GET", "/api/documents/library") + documents_get_endpoint = _find_endpoint(document_router, "GET", "/api/document/{doc_id}") + documents_create_endpoint = _find_endpoint(document_router, "POST", "/api/document") + + @router.get("/capabilities") + def capabilities(request: Request): + token_scopes = set(getattr(request.state, "api_token_scopes", []) or []) + has_token = bool(getattr(request.state, "api_token", False)) + def scoped(allowed): + return bool(token_scopes.intersection(allowed)) if has_token else True + return { + "integration": "codex", + "token_scopes": sorted(token_scopes), + "tools": { + "todos": { + "read": scoped(TODO_READ_SCOPES), + "write": scoped(TODO_WRITE_SCOPES), + "actions": ["list", "add", "update", "delete", "toggle_item"], + }, + "email": { + "read": scoped(EMAIL_READ_SCOPES), + "draft": scoped(EMAIL_DRAFT_SCOPES), + "send": scoped(EMAIL_SEND_SCOPES), + "actions": ["list", "read", "draft", "send"], + }, + "memory": { + "read": scoped(MEMORY_READ_SCOPES), + "write": scoped(MEMORY_WRITE_SCOPES), + "actions": ["list", "add", "delete"], + "available": memory_list_endpoint is not None, + }, + "calendar": { + "read": scoped(CALENDAR_READ_SCOPES), + "write": scoped(CALENDAR_WRITE_SCOPES), + "actions": ["list_events", "create_event", "delete_event"], + "available": calendar_list_events is not None, + }, + "documents": { + "read": scoped(DOCS_READ_SCOPES), + "write": scoped(DOCS_WRITE_SCOPES), + "actions": ["library", "read", "create", "delete"], + "available": documents_library_endpoint is not None, + }, + "cookbook": { + "read": scoped(COOKBOOK_READ_SCOPES), + "launch": scoped(COOKBOOK_LAUNCH_SCOPES), + "actions": ["tasks", "servers", "output", "serve", "stop"], + }, + }, + "safety": { + "email_send_requires_confirmation": True, + "destructive_actions_should_confirm": True, + }, + } + + @router.get("/plugin.zip") + def plugin_zip(request: Request): + require_authenticated_request(request) + root = Path(__file__).resolve().parent.parent / "integrations" / "codex" + if not root.exists(): + raise HTTPException(404, "Codex plugin bundle not found") + buf = BytesIO() + with zipfile.ZipFile(buf, "w", compression=zipfile.ZIP_DEFLATED) as zf: + for path in sorted(root.rglob("*")): + if path.is_dir() or "__pycache__" in path.parts or path.suffix == ".pyc": + continue + zf.write(path, Path("odysseus") / path.relative_to(root)) + buf.seek(0) + headers = {"Content-Disposition": 'attachment; filename="odysseus-codex-plugin.zip"'} + return StreamingResponse(buf, media_type="application/zip", headers=headers) + + @router.get("/todos") + async def list_todos(request: Request, archived: bool = False, label: str | None = None): + owner = _scope_owner(request, TODO_READ_SCOPES) + args: dict[str, Any] = {"action": "list", "archived": archived} + if label: + args["label"] = label + return await do_manage_notes(json.dumps(args), owner=owner) + + @router.post("/todos") + async def manage_todos(request: Request, body: dict[str, Any] = Body(default_factory=dict)): + action = str(body.get("action") or "add").replace("-", "_").strip().lower() + allowed = TODO_WRITE_SCOPES if action in WRITE_ACTIONS else TODO_READ_SCOPES + owner = _scope_owner(request, allowed) + args = dict(body) + args["action"] = action + return await do_manage_notes(json.dumps(args), owner=owner) + + @router.get("/emails") + async def list_emails( + request: Request, + folder: str = "INBOX", + limit: int = 10, + offset: int = 0, + filter: str = "all", + from_addr: str | None = None, + account_id: str | None = None, + has_attachments: int = 0, + ): + owner = _scope_owner(request, EMAIL_READ_SCOPES) + if email_list_endpoint is None: + raise HTTPException(503, "Email integration is not available") + limit = max(1, min(int(limit or 10), 50)) + offset = max(0, int(offset or 0)) + if account_id: + from routes.email_helpers import _assert_owns_account + + _assert_owns_account(account_id, owner) + return await email_list_endpoint( + folder=folder, + limit=limit, + offset=offset, + filter=filter, + from_addr=from_addr, + account_id=account_id, + has_attachments=has_attachments, + cache_bust=None, + owner=owner, + ) + + @router.get("/emails/{uid}") + async def read_email( + request: Request, + uid: str, + folder: str = "INBOX", + account_id: str | None = None, + mark_seen: bool = False, + ): + owner = _scope_owner(request, EMAIL_READ_SCOPES) + if email_read_endpoint is None: + raise HTTPException(503, "Email integration is not available") + if account_id: + from routes.email_helpers import _assert_owns_account + + _assert_owns_account(account_id, owner) + return await email_read_endpoint( + uid=uid, + folder=folder, + account_id=account_id, + mark_seen=mark_seen, + owner=owner, + ) + + # ── Email draft + send ──────────────────────────────────────────────── + # Both handlers in routes/email_routes.py already accept `owner=` via + # FastAPI Depends, so we call them directly without patching state. + + @router.post("/emails/draft") + async def codex_email_draft(request: Request, body: dict[str, Any] = Body(default_factory=dict)): + owner = _scope_owner(request, EMAIL_DRAFT_SCOPES) + if email_draft_endpoint is None: + raise HTTPException(503, "Email integration is not available") + from routes.email_routes import SendEmailRequest + + try: + req = SendEmailRequest(**body) + except Exception as exc: + raise HTTPException(400, f"Invalid draft payload: {exc}") + return await email_draft_endpoint(req=req, owner=owner) + + @router.post("/emails/send") + async def codex_email_send(request: Request, body: dict[str, Any] = Body(default_factory=dict)): + owner = _scope_owner(request, EMAIL_SEND_SCOPES) + if email_send_endpoint is None: + raise HTTPException(503, "Email integration is not available") + from routes.email_routes import SendEmailRequest + + try: + req = SendEmailRequest(**body) + except Exception as exc: + raise HTTPException(400, f"Invalid send payload: {exc}") + return await email_send_endpoint(req=req, background_tasks=BackgroundTasks(), owner=owner) + + # ── Memory ──────────────────────────────────────────────────────────── + + @router.get("/memory") + async def codex_memory_list(request: Request): + owner = _scope_owner(request, MEMORY_READ_SCOPES) + if memory_list_endpoint is None: + raise HTTPException(503, "Memory integration is not available") + return await _as_owner(request, owner, memory_list_endpoint, request) + + @router.post("/memory") + async def codex_memory_add(request: Request, body: dict[str, Any] = Body(default_factory=dict)): + owner = _scope_owner(request, MEMORY_WRITE_SCOPES) + if memory_add_endpoint is None: + raise HTTPException(503, "Memory integration is not available") + from src.request_models import MemoryAddRequest + + try: + memory_data = MemoryAddRequest( + text=str(body.get("text") or "").strip(), + category=body.get("category", "fact"), + source=body.get("source", "user"), + session_id=body.get("session_id"), + ) + except Exception as exc: + raise HTTPException(400, f"Invalid memory payload: {exc}") + if not memory_data.text: + raise HTTPException(400, "Empty memory text") + return await _as_owner(request, owner, memory_add_endpoint, request, memory_data) + + # ── Calendar ────────────────────────────────────────────────────────── + + @router.get("/calendar/events") + async def codex_calendar_list(request: Request, start: str, end: str, calendar: str = ""): + owner = _scope_owner(request, CALENDAR_READ_SCOPES) + if calendar_list_events is None: + raise HTTPException(503, "Calendar integration is not available") + return await _as_owner(request, owner, calendar_list_events, request, start, end, calendar) + + @router.post("/calendar/events") + async def codex_calendar_create(request: Request, body: dict[str, Any] = Body(default_factory=dict)): + owner = _scope_owner(request, CALENDAR_WRITE_SCOPES) + if calendar_create_event is None: + raise HTTPException(503, "Calendar integration is not available") + from routes.calendar_routes import EventCreate + + try: + data = EventCreate(**body) + except Exception as exc: + raise HTTPException(400, f"Invalid event payload: {exc}") + return await _as_owner(request, owner, calendar_create_event, request, data) + + # ── Documents ───────────────────────────────────────────────────────── + + @router.get("/documents") + async def codex_documents_library( + request: Request, + search: str | None = None, + language: str | None = None, + sort: str = "recent", + offset: int = 0, + limit: int = 50, + archived: bool = False, + ): + owner = _scope_owner(request, DOCS_READ_SCOPES) + if documents_library_endpoint is None: + raise HTTPException(503, "Documents integration is not available") + return await _as_owner( + request, owner, documents_library_endpoint, + request, search, language, sort, offset, limit, archived, + ) + + @router.get("/documents/{doc_id}") + async def codex_documents_get(request: Request, doc_id: str): + owner = _scope_owner(request, DOCS_READ_SCOPES) + if documents_get_endpoint is None: + raise HTTPException(503, "Documents integration is not available") + return await _as_owner(request, owner, documents_get_endpoint, request, doc_id) + + # ── DELETE endpoints so agents can clean up after themselves ────────── + + memory_delete_endpoint = _find_endpoint(memory_router, "DELETE", "/api/memory/{memory_id}") + calendar_delete_event = _find_endpoint(calendar_router, "DELETE", "/api/calendar/events/{uid}") + documents_delete_endpoint = _find_endpoint(document_router, "DELETE", "/api/document/{doc_id}") + + @router.delete("/memory/{memory_id}") + async def codex_memory_delete(request: Request, memory_id: str): + owner = _scope_owner(request, MEMORY_WRITE_SCOPES) + if memory_delete_endpoint is None: + raise HTTPException(503, "Memory delete not available") + return await _as_owner(request, owner, memory_delete_endpoint, request, memory_id) + + @router.delete("/calendar/events/{uid}") + async def codex_calendar_delete(request: Request, uid: str): + owner = _scope_owner(request, CALENDAR_WRITE_SCOPES) + if calendar_delete_event is None: + raise HTTPException(503, "Calendar delete not available") + return await _as_owner(request, owner, calendar_delete_event, request, uid) + + @router.delete("/documents/{doc_id}") + async def codex_documents_delete(request: Request, doc_id: str): + owner = _scope_owner(request, DOCS_WRITE_SCOPES) + if documents_delete_endpoint is None: + raise HTTPException(503, "Documents delete not available") + return await _as_owner(request, owner, documents_delete_endpoint, request, doc_id) + + @router.post("/documents") + async def codex_documents_create(request: Request, body: dict[str, Any] = Body(default_factory=dict)): + owner = _scope_owner(request, DOCS_WRITE_SCOPES) + if documents_create_endpoint is None: + raise HTTPException(503, "Documents integration is not available") + from routes.document_routes import DocumentCreate + + try: + req = DocumentCreate(**body) + except Exception as exc: + raise HTTPException(400, f"Invalid document payload: {exc}") + return await _as_owner(request, owner, documents_create_endpoint, request, req) + + # ── Cookbook surface ── + # Lets the agent run the same launch / monitor / kill loop the user + # would do by hand in the Cookbook UI: read the current task list + + # tmux output, launch a serve task, stop one. Two scopes: + # cookbook:read — list tasks + tail output + list servers + # cookbook:launch — also start/stop serves (host shell exec) + # `cookbook:launch` is genuinely powerful: /api/model/serve runs SSH'd + # commands on the user's hosts. The existing _validate_serve_cmd + # allowlist (vllm/python3/sglang/llama-server/etc., no shell metachars) + # keeps the agent inside the same sandbox the UI uses. + + async def _run_shell(cmd: str, timeout: float = 15.0) -> dict: + """Run a shell command, return {exit_code, stdout, stderr}.""" + import asyncio as _asyncio + try: + proc = await _asyncio.create_subprocess_shell( + cmd, + stdout=_asyncio.subprocess.PIPE, + stderr=_asyncio.subprocess.PIPE, + ) + try: + stdout_b, stderr_b = await _asyncio.wait_for(proc.communicate(), timeout=timeout) + except _asyncio.TimeoutError: + proc.kill() + return {"exit_code": -1, "stdout": "", "stderr": "timed out"} + return { + "exit_code": proc.returncode, + "stdout": stdout_b.decode(errors="replace"), + "stderr": stderr_b.decode(errors="replace"), + } + except Exception as exc: + return {"exit_code": -1, "stdout": "", "stderr": str(exc)} + + def _read_cookbook_state() -> dict: + from pathlib import Path as _Path + import json as _json + p = _Path(COOKBOOK_STATE_FILE) + if not p.exists(): + return {} + try: + return _json.loads(p.read_text(encoding="utf-8")) + except Exception: + return {} + + def _redact_task(t: dict) -> dict: + """Strip secrets before returning to the agent.""" + clean = {k: v for k, v in t.items() if k not in ("hf_token", "_secrets")} + if isinstance(clean.get("payload"), dict): + pl = clean["payload"] + clean["payload"] = {k: v for k, v in pl.items() + if k not in ("hf_token", "_secrets")} + return clean + + @router.get("/cookbook/tasks") + async def codex_cookbook_tasks(request: Request): + _scope_owner(request, COOKBOOK_READ_SCOPES) + state = _read_cookbook_state() + tasks = state.get("tasks") or [] + return {"tasks": [_redact_task(t) for t in tasks]} + + @router.get("/cookbook/servers") + async def codex_cookbook_servers(request: Request): + _scope_owner(request, COOKBOOK_READ_SCOPES) + state = _read_cookbook_state() + servers = state.get("env", {}).get("servers") or [] + # Strip ssh creds / passwords; keep only what's needed to pick a host. + cleaned = [] + for s in servers: + cleaned.append({ + "name": s.get("name"), + "host": s.get("host"), + "port": s.get("port"), + "env": s.get("env"), + "envPath": s.get("envPath"), + "platform": s.get("platform"), + "modelDirs": s.get("modelDirs"), + }) + return {"servers": cleaned} + + @router.get("/cookbook/output/{session_id}") + async def codex_cookbook_output(request: Request, session_id: str, tail: int = 400): + _scope_owner(request, COOKBOOK_READ_SCOPES) + # Defensive: session_id must be the tmux-style id we issue + # (`serve-XXXX` / `cookbook-XXXX` / `queue-XXXX`); anything else + # would let the agent run arbitrary `tmux capture-pane` targets. + import re as _re + if not _re.fullmatch(r"[a-zA-Z0-9_-]+", session_id): + raise HTTPException(400, "Invalid session id") + tail = max(20, min(int(tail or 400), 4000)) + # Resolve the task's host (if any) from cookbook state so we can + # ssh to the right box, exactly as the UI does in _reconnectTask. + state = _read_cookbook_state() + tasks = state.get("tasks") or [] + task = next((t for t in tasks if t.get("sessionId") == session_id), None) + if task is None: + raise HTTPException(404, "task not found") + host = (task.get("remoteHost") or "").strip() + ssh_port = (task.get("sshPort") or "").strip() + # Prefer the persisted log file over the tmux pane. The pane gets + # overwritten by the post-crash neofetch banner + bash prompt the + # moment vllm exits; the log file is the raw stdout/stderr and + # survives unchanged. Falls back to pane for older tasks predating + # the tee-to-log runner change. + log_path = f"/tmp/odysseus-tmux/{session_id}.log" + inner = ( + f"if [ -s {log_path} ]; then tail -n {tail} {log_path}; " + f"else tmux capture-pane -t {session_id} -p -S -{tail}; fi" + ) + if host: + port_flag = f"-p {ssh_port} " if ssh_port and ssh_port != "22" else "" + import shlex + cmd = f"ssh {port_flag}{host} {shlex.quote(inner)}" + else: + cmd = inner + result = await _run_shell(cmd, timeout=15) + return { + "session_id": session_id, + "host": host or "local", + "exit_code": result.get("exit_code"), + "output": result.get("stdout", ""), + "task": _redact_task(task), + } + + @router.post("/cookbook/serve") + async def codex_cookbook_serve(request: Request, body: dict[str, Any] = Body(default_factory=dict)): + _scope_owner(request, COOKBOOK_LAUNCH_SCOPES) + # Wraps /api/model/serve with the SAME validation the UI uses. + # _validate_serve_cmd (called inside model_serve) rejects shell + # metachars and requires the leading binary to be in the + # cookbook allowlist (vllm / python3 / sglang / llama-server / ...). + from routes.cookbook_helpers import ServeRequest + # Accept friendly aliases agents naturally reach for. Without these, + # passing `host` silently maps to nothing and the serve runs LOCAL + # instead of on the intended remote — exactly the bug an agent + # would never debug on its own. + norm = dict(body or {}) + if "host" in norm and "remote_host" not in norm: + norm["remote_host"] = norm.pop("host") + if "model" in norm and "repo_id" not in norm: + norm["repo_id"] = norm.pop("model") + if "ssh_port" not in norm and "port" in norm and (str(norm.get("port") or "").isdigit() and int(norm["port"]) >= 1000): + # Heuristic: if `port` looks like an SSH port (≥1000) and there's + # no explicit ssh_port, treat it as such. UI ports (8000, 8001, + # 30000) belong inside the cmd string, not here. + pass # leave as-is — user's `port` here is ambiguous; skip remap. + try: + req = ServeRequest(**norm) + except Exception as exc: + raise HTTPException(400, f"Invalid serve payload: {exc}") + serve_endpoint = _find_endpoint(None, "POST", "/api/model/serve") + # Fall back to importing from the cookbook router registered on app. + if serve_endpoint is None: + from fastapi import FastAPI + app: FastAPI = request.app + for route in app.routes: + if getattr(route, "path", None) == "/api/model/serve" and "POST" in getattr(route, "methods", set()): + serve_endpoint = route.endpoint + break + if serve_endpoint is None: + raise HTTPException(503, "model serve endpoint unavailable") + return await serve_endpoint(request, req) + + @router.post("/cookbook/stop/{session_id}") + async def codex_cookbook_stop(request: Request, session_id: str): + _scope_owner(request, COOKBOOK_LAUNCH_SCOPES) + import re as _re + if not _re.fullmatch(r"[a-zA-Z0-9_-]+", session_id): + raise HTTPException(400, "Invalid session id") + state = _read_cookbook_state() + tasks = state.get("tasks") or [] + task = next((t for t in tasks if t.get("sessionId") == session_id), None) + host = ((task or {}).get("remoteHost") or "").strip() + ssh_port = ((task or {}).get("sshPort") or "").strip() + if host: + port_flag = f"-p {ssh_port} " if ssh_port and ssh_port != "22" else "" + cmd = f"ssh {port_flag}{host} \"tmux kill-session -t {session_id}\"" + else: + cmd = f"tmux kill-session -t {session_id}" + result = await _run_shell(cmd, timeout=10) + return {"session_id": session_id, "exit_code": result.get("exit_code"), "host": host or "local"} + + @router.get("/cookbook/cached") + async def codex_cookbook_cached(request: Request, host: str | None = None): + """List cached models on a configured server (or local if host is omitted). + Mirrors `list_cached_models` from the chat agent so external agents have + the same inventory view before deciding what to serve/download.""" + _scope_owner(request, COOKBOOK_READ_SCOPES) + # Hit /api/model/cached internally, with the same modelDirs the chat + # agent's list_cached_models would resolve from cookbook state. + state = _read_cookbook_state() + env = state.get("env") if isinstance(state, dict) else {} + servers = (env.get("servers") if isinstance(env, dict) else None) or [] + HF_DEFAULTS = {"~/.cache/huggingface/hub", "~/.cache/huggingface"} + def _dirs_for(srv: dict) -> str: + mds = srv.get("modelDirs") if isinstance(srv, dict) else None + if isinstance(mds, list): + extras = [d for d in mds if isinstance(d, str) and d.strip() and d.strip() not in HF_DEFAULTS] + return ",".join(extras) + if isinstance(mds, str) and mds.strip() not in HF_DEFAULTS: + return mds + return "" + # Resolve friendly host name → real host (matches list_cached_models flow). + resolved_host = host or "" + srv: dict[str, Any] = {} + if host: + srv = next( + (s for s in servers if isinstance(s, dict) + and (s.get("name") == host or s.get("host") == host)), + {}, + ) + if srv and srv.get("host"): + resolved_host = srv["host"] + else: + srv = next((s for s in servers if isinstance(s, dict) and not (s.get("host") or "").strip()), {}) + params: dict[str, str] = {} + if resolved_host: + params["host"] = resolved_host + md = _dirs_for(srv) + if md: + params["model_dir"] = md + if srv.get("port"): + params["ssh_port"] = str(srv["port"]) + if srv.get("platform"): + params["platform"] = srv["platform"] + cached_endpoint = _find_endpoint(None, "GET", "/api/model/cached") + if cached_endpoint is None: + from fastapi import FastAPI + app: FastAPI = request.app + for route in app.routes: + if getattr(route, "path", None) == "/api/model/cached" and "GET" in getattr(route, "methods", set()): + cached_endpoint = route.endpoint + break + if cached_endpoint is None: + raise HTTPException(503, "model cached endpoint unavailable") + # The endpoint reads host/model_dir/ssh_port/platform as kwargs. + return await cached_endpoint( + request, + host=params.get("host") or None, + model_dir=params.get("model_dir") or None, + ssh_port=params.get("ssh_port") or None, + platform=params.get("platform") or None, + ) + + @router.get("/cookbook/presets") + async def codex_cookbook_presets(request: Request): + """List saved serve presets (model + host + port + launch cmd). + Counterpart to `list_serve_presets`. Use BEFORE composing a `serve` + body — the user's saved preset usually has the working cmd already.""" + _scope_owner(request, COOKBOOK_READ_SCOPES) + state = _read_cookbook_state() + presets = state.get("presets") or [] + out = [] + for p in presets: + if not isinstance(p, dict): + continue + out.append({ + "name": p.get("name"), + "model": p.get("model") or p.get("modelId"), + "host": p.get("host") or p.get("remoteHost"), + "port": p.get("port"), + "cmd": p.get("cmd"), + }) + return {"presets": out, "default_host": (state.get("env") or {}).get("defaultServer", "")} + + @router.post("/cookbook/preset/{name}") + async def codex_cookbook_serve_preset(request: Request, name: str): + """Launch a saved preset by name. Reuses the working cmd + host the + user already saved, avoiding the cmd-allowlist trial-and-error loop.""" + _scope_owner(request, COOKBOOK_LAUNCH_SCOPES) + import re as _re + if not _re.fullmatch(r"[A-Za-z0-9 _.:@\-]+", name): + raise HTTPException(400, "Invalid preset name") + state = _read_cookbook_state() + presets = state.get("presets") or [] + lname = name.lower().strip() + chosen = next( + (p for p in presets if isinstance(p, dict) and (p.get("name") or "").lower() == lname), + None, + ) + if chosen is None: + chosen = next( + (p for p in presets if isinstance(p, dict) and lname in (p.get("name") or "").lower()), + None, + ) + if chosen is None: + raise HTTPException(404, f"No preset matching {name!r}") + repo_id = chosen.get("model") or chosen.get("modelId") or "" + cmd = (chosen.get("cmd") or "").strip() + host = chosen.get("host") or chosen.get("remoteHost") or "" + if not repo_id or not cmd or cmd.startswith("(adopted"): + raise HTTPException(400, f"Preset {chosen.get('name')!r} has no launchable cmd " + "(adopted from external launch). Use POST /cookbook/serve " + "with the actual cmd instead.") + # Reuse the serve handler we already validated. + from routes.cookbook_helpers import ServeRequest + body = {"repo_id": repo_id, "cmd": cmd} + if host: + body["remote_host"] = host + try: + req = ServeRequest(**body) + except Exception as exc: + raise HTTPException(400, f"Preset payload invalid: {exc}") + serve_endpoint = _find_endpoint(None, "POST", "/api/model/serve") + if serve_endpoint is None: + from fastapi import FastAPI + app: FastAPI = request.app + for route in app.routes: + if getattr(route, "path", None) == "/api/model/serve" and "POST" in getattr(route, "methods", set()): + serve_endpoint = route.endpoint + break + if serve_endpoint is None: + raise HTTPException(503, "model serve endpoint unavailable") + return await serve_endpoint(request, req) + + @router.post("/cookbook/adopt") + async def codex_cookbook_adopt(request: Request, body: dict[str, Any] = Body(default_factory=dict)): + """Adopt an existing tmux session (one started via raw ssh+tmux) into + cookbook tracking. Needed when serve_model rejects a cmd and the + agent falls back to direct ssh — without adoption the session is + invisible to the UI. Body: {tmux_session, model, host?, port?}.""" + _scope_owner(request, COOKBOOK_LAUNCH_SCOPES) + norm = dict(body or {}) + sess = (norm.get("tmux_session") or norm.get("session_id") or "").strip() + model = (norm.get("model") or norm.get("repo_id") or "").strip() + host = (norm.get("host") or norm.get("remote_host") or "").strip() + port = norm.get("port") or 8000 + import re as _re + if not sess or not _re.fullmatch(r"[a-zA-Z0-9_-]+", sess): + raise HTTPException(400, "tmux_session required, [a-zA-Z0-9_-]+ only") + if not model: + raise HTTPException(400, "model required") + # Verify the tmux session exists on the target host before adopting. + import shlex + if host: + check = f"ssh {shlex.quote(host)} 'tmux has-session -t {shlex.quote(sess)}'" + else: + check = f"tmux has-session -t {shlex.quote(sess)}" + chk = await _run_shell(check, timeout=8) + if chk.get("exit_code") not in (0, None): + raise HTTPException(404, f"tmux session {sess!r} not found on {host or 'local'}") + # Write into cookbook_state.json. + import time as _t, json as _json + from core.atomic_io import atomic_write_json + from pathlib import Path as _Path + cookbook_state_path = _Path(COOKBOOK_STATE_FILE) + try: + state = _json.loads(cookbook_state_path.read_text(encoding="utf-8")) + except Exception: + state = {} + tasks = state.setdefault("tasks", []) + if any(isinstance(t, dict) and t.get("sessionId") == sess for t in tasks): + return {"ok": True, "already_tracked": True, "session_id": sess} + tasks.append({ + "id": sess, "sessionId": sess, + "name": model.split("/")[-1] if "/" in model else model, + "type": "serve", "status": "running", + "output": f"Adopted externally-launched session {sess!r} on {host or 'local'}.", + "ts": int(_t.time() * 1000), + "payload": {"repo_id": model, "remote_host": host, "_cmd": "(adopted — launched outside cookbook)", "port": int(port)}, + "remoteHost": host, "sshPort": "", "platform": "linux", + "_serveReady": False, "_endpointAdded": False, "_adoptedExternally": True, + }) + try: + atomic_write_json(cookbook_state_path, state) + except Exception as exc: + raise HTTPException(500, f"state write failed: {exc}") + return {"ok": True, "session_id": sess, "host": host or "local"} + + return router + + +def setup_claude_routes() -> APIRouter: + """Serve the Claude Code skill bundle. + + Claude Code uses the same scope-gated `/api/codex/*` endpoints at runtime; + this router only exists to deliver the skill zip via `/api/claude/plugin.zip` + so the user-facing setup commands stay in the Claude namespace. + """ + router = APIRouter(prefix="/api/claude", tags=["claude"]) + + @router.get("/plugin.zip") + def plugin_zip(request: Request): + require_authenticated_request(request) + # Only ship the skills/ subtree so extracting at ~/.claude/ doesn't dump + # README.md or other bundle metadata into the user's claude config dir. + skills_root = Path(__file__).resolve().parent.parent / "integrations" / "claude" / "skills" + if not skills_root.exists(): + raise HTTPException(404, "Claude skill bundle not found") + bundle_root = skills_root.parent + buf = BytesIO() + with zipfile.ZipFile(buf, "w", compression=zipfile.ZIP_DEFLATED) as zf: + for path in sorted(skills_root.rglob("*")): + if path.is_dir() or "__pycache__" in path.parts or path.suffix == ".pyc": + continue + zf.write(path, path.relative_to(bundle_root)) + buf.seek(0) + headers = {"Content-Disposition": 'attachment; filename="odysseus-claude-skill.zip"'} + return StreamingResponse(buf, media_type="application/zip", headers=headers) + + return router diff --git a/routes/compare_routes.py b/routes/compare_routes.py index 2d06e95a1..ad42f1a89 100644 --- a/routes/compare_routes.py +++ b/routes/compare_routes.py @@ -12,12 +12,51 @@ import logging from core.database import Comparison, SessionLocal from core.session_manager import SessionManager from src.auth_helpers import get_current_user +from routes.session_routes import _reject_raw_endpoint_url_for_non_admin logger = logging.getLogger(__name__) router = APIRouter(prefix="/api/compare", tags=["compare"]) +def _owned_endpoint_by_url(db, base_url, owner): + """ModelEndpoint whose base_url == `base_url` and is VISIBLE to `owner` + (their own rows + legacy null-owner "shared" rows); None otherwise. + + Owner-scoped on purpose. ModelEndpoint is per-user (core/database.py: non-null + owner = private, "the model picker only shows the endpoint to that user") and + holds a decrypted `api_key`. start_comparison copies the matched row's api_key + into the caller-owned [CMP] session's headers, which then drives that session's + /api/chat_stream calls — so an UNSCOPED base_url match would let a user mint a + comparison bound to ANOTHER user's private endpoint and spend that owner's + api_key / reach whatever base_url they configured. Mirrors + session_routes._owned_endpoint. A null/empty owner is a no-op (single-user / + legacy mode). + """ + from core.database import ModelEndpoint + from src.auth_helpers import owner_filter + q = db.query(ModelEndpoint).filter(ModelEndpoint.base_url == base_url) + return owner_filter(q, ModelEndpoint, owner).first() + + +def _owned_endpoint_by_id(db, endpoint_id, owner): + """ModelEndpoint whose id == `endpoint_id` and is VISIBLE to `owner` (their + own rows + legacy null-owner "shared" rows); None otherwise. + + Preferred over _owned_endpoint_by_url for credential resolution: two visible + endpoints can share the same base_url but hold DIFFERENT api_keys (e.g. two + accounts on the same provider). A base_url-only match returns whichever row + sorts first, so it can copy the WRONG owner-scoped key into the [CMP] session. + An id pins the exact registered endpoint, so /api/compare/start prefers it and + only falls back to URL matching for legacy / admin raw-URL callers. Owner + scoping is identical to _owned_endpoint_by_url (a null/empty owner is a no-op). + """ + from core.database import ModelEndpoint + from src.auth_helpers import owner_filter + q = db.query(ModelEndpoint).filter(ModelEndpoint.id == endpoint_id) + return owner_filter(q, ModelEndpoint, owner).first() + + class RecordVoteRequest(BaseModel): prompt: str models: List[str] @@ -34,8 +73,10 @@ def setup_compare_routes(session_manager: SessionManager): prompt: str = Form(...), model_a: str = Form(...), model_b: str = Form(...), - endpoint_a: str = Form(...), - endpoint_b: str = Form(...), + endpoint_a: str = Form(""), + endpoint_b: str = Form(""), + endpoint_a_id: str = Form(""), + endpoint_b_id: str = Form(""), is_blind: str = Form("true"), ): """Create two ephemeral sessions and a comparison record. @@ -43,38 +84,11 @@ def setup_compare_routes(session_manager: SessionManager): Returns the comparison ID and the two session IDs so the client can fire two independent SSE streams to /api/chat_stream. """ + user = getattr(request.state, 'current_user', None) comp_id = str(uuid.uuid4()) sid_a = str(uuid.uuid4()) sid_b = str(uuid.uuid4()) - # Create ephemeral sessions (prefixed [CMP]) - for sid, model, endpoint in [(sid_a, model_a, endpoint_a), (sid_b, model_b, endpoint_b)]: - user = getattr(request.state, 'current_user', None) - session_manager.create_session( - session_id=sid, - name=f"[CMP] {model.split('/')[-1]}", - endpoint_url=endpoint, - model=model, - rag=False, - owner=user, - ) - # Copy API key from endpoint config - db = SessionLocal() - try: - from core.database import ModelEndpoint - from src.endpoint_resolver import build_headers, normalize_base - # Find matching endpoint by URL - base = normalize_base(endpoint) - ep = db.query(ModelEndpoint).filter( - ModelEndpoint.base_url == base - ).first() - if ep and ep.api_key: - s = session_manager.sessions.get(sid) - if s: - s.headers = build_headers(ep.api_key, ep.base_url) - finally: - db.close() - # Blind mapping: randomly assign left/right blind = str(is_blind).lower() == "true" if blind: @@ -84,6 +98,105 @@ def setup_compare_routes(session_manager: SessionManager): else: mapping = {"left": "a", "right": "b"} + # Map session IDs to left/right based on blind mapping + session_left = sid_a if mapping["left"] == "a" else sid_b + session_right = sid_a if mapping["right"] == "a" else sid_b + + # In blind mode, name the helper sessions by their neutral slot + # ("Model A" / "Model B") instead of the real model. Otherwise the + # session name leaks the model in the sidebar and GET /api/sessions, + # de-anonymizing the comparison before the user votes (issue #1285). + slot_name = {session_left: "Model A", session_right: "Model B"} + + # SECURITY: resolve and validate BOTH endpoints before creating any + # session. Compare copies a registered endpoint's Authorization header + # into the [CMP] session, so validating one endpoint while creating its + # session, then rejecting the other, would leave a partial compare + # session behind with that header attached. Doing all the owner-scope + # resolution + raw-URL rejection up front means a 403 on either endpoint + # aborts the whole request with nothing created and no header copied. + from src.endpoint_resolver import build_chat_url, build_headers, normalize_base + resolved = [] + db = SessionLocal() + try: + for sid, model, endpoint, endpoint_id in [ + (sid_a, model_a, endpoint_a, endpoint_a_id), + (sid_b, model_b, endpoint_b, endpoint_b_id), + ]: + # Prefer an explicit endpoint id: it pins the EXACT registered + # endpoint (and its api_key), even when two endpoints visible to + # the caller share a base_url with different keys — a URL-only + # match would copy whichever row sorts first, i.e. possibly the + # wrong key. Fall back to URL resolution only for legacy / admin + # raw-URL callers that don't send an id. + eid = endpoint_id.strip() if isinstance(endpoint_id, str) else "" + if eid: + ep = _owned_endpoint_by_id(db, eid, user) + if ep is None: + # An id the caller can't see (wrong owner / deleted) must + # NOT silently fall back to a same-URL row with a different + # key — that's exactly the mix-up ids exist to prevent. + raise HTTPException(404, "Model endpoint not found") + # The id already resolved the endpoint; ignore any raw URL the + # caller also sent and dial the stored config instead. + endpoint = ep.base_url + elif not endpoint: + raise HTTPException( + 422, "endpoint_a/endpoint_b or endpoint_a_id/endpoint_b_id is required" + ) + else: + # Resolve the supplied URL to a ModelEndpoint the caller owns + # (their own rows + legacy null-owner shared rows), scoped so a + # comparison can't borrow another user's private endpoint key. + base = normalize_base(endpoint) + ep = _owned_endpoint_by_url(db, base, user) + # Reject *unregistered* raw URLs for signed-in non-admins; a + # matched registered endpoint supplies an id so the caller can + # still compare endpoints they own. Blanket-rejecting here (the + # earlier `endpoint_id=None` call) locked non-admins out of + # compare entirely, since compare resolves endpoints by URL with + # no endpoint_id. Mirrors the gallery inpaint/harmonize checks. + # Raised here (phase 1), before any session exists. + _reject_raw_endpoint_url_for_non_admin( + request, user, str(ep.id) if ep is not None else None, endpoint + ) + # Bind the [CMP] session to the RESOLVED endpoint, not the raw + # caller-supplied string. When the URL matches a registered + # endpoint visible to the caller, use that row's own normalized + # base URL (the same value owner scoping + endpoint validation + # already vetted) so the session dials exactly where the stored + # config points. The raw `endpoint` only survives for callers + # allowed to pass one — admins / single-user mode, where + # `_reject_raw_endpoint_url_for_non_admin` is a no-op and `ep` + # is None. Mirrors the registered-endpoint path in session_routes. + session_endpoint_url = ( + build_chat_url(normalize_base(ep.base_url)) if ep is not None else endpoint + ) + # Headers come only from a matched endpoint's key; None when + # `ep` is None (raw admin URL or no match), so a comparison can + # never inherit another user's key/headers. + headers = build_headers(ep.api_key, ep.base_url) if (ep and ep.api_key) else None + resolved.append((sid, model, session_endpoint_url, headers)) + finally: + db.close() + + # Both endpoints validated — only now create the ephemeral [CMP] + # sessions and copy any resolved headers. + for sid, model, session_endpoint_url, headers in resolved: + name = f"[CMP] {slot_name[sid]}" if blind else f"[CMP] {model.split('/')[-1]}" + session_manager.create_session( + session_id=sid, + name=name, + endpoint_url=session_endpoint_url, + model=model, + rag=False, + owner=user, + ) + if headers: + s = session_manager.sessions.get(sid) + if s: + s.headers = headers + # Store comparison record db = SessionLocal() try: @@ -92,8 +205,12 @@ def setup_compare_routes(session_manager: SessionManager): prompt=prompt, model_a=model_a, model_b=model_b, - endpoint_a=endpoint_a, - endpoint_b=endpoint_b, + # Record the URL the session actually dials. For URL callers this + # is their raw input; for id-only callers (empty endpoint_a/_b) + # fall back to the resolved endpoint URL so the column stays + # meaningful and non-null. resolved is in [a, b] order. + endpoint_a=endpoint_a or resolved[0][2], + endpoint_b=endpoint_b or resolved[1][2], is_blind=blind, blind_mapping=json.dumps(mapping), owner=user, @@ -103,18 +220,18 @@ def setup_compare_routes(session_manager: SessionManager): finally: db.close() - # Map session IDs to left/right based on blind mapping - session_left = sid_a if mapping["left"] == "a" else sid_b - session_right = sid_a if mapping["right"] == "a" else sid_b - + # In blind mode, withhold the model identities AND the left/right + # mapping from the response. The client already knows model_a/model_b + # (it sent them), so returning either would defeat blind mode. They are + # revealed by POST /api/compare/{id}/vote once the user has voted (#1285). return { "id": comp_id, "session_left": session_left, "session_right": session_right, - "model_left": model_a if mapping["left"] == "a" else model_b, - "model_right": model_a if mapping["right"] == "a" else model_b, + "model_left": None if blind else (model_a if mapping["left"] == "a" else model_b), + "model_right": None if blind else (model_a if mapping["right"] == "a" else model_b), "is_blind": blind, - "mapping": mapping, + "mapping": None if blind else mapping, } @router.post("/{comp_id}/vote") diff --git a/routes/contacts_routes.py b/routes/contacts_routes.py index 8db546308..e4e8ce759 100644 --- a/routes/contacts_routes.py +++ b/routes/contacts_routes.py @@ -11,20 +11,24 @@ import uuid import json import csv import io +import os import httpx from pathlib import Path from datetime import datetime -from fastapi import APIRouter, Query, Depends, Response +from urllib.parse import urljoin, urlparse, urlunparse + +from fastapi import APIRouter, Query, Depends, Response, HTTPException from typing import List, Dict, Optional -from src.auth_helpers import require_user from core.middleware import require_admin +from src.url_safety import check_outbound_url logger = logging.getLogger(__name__) -DATA_DIR = Path(__file__).resolve().parent.parent / "data" -SETTINGS_FILE = DATA_DIR / "settings.json" -LOCAL_CONTACTS_FILE = DATA_DIR / "contacts.json" +from src.constants import DATA_DIR as _DATA_DIR, SETTINGS_FILE as _SETTINGS_FILE, CONTACTS_FILE as _CONTACTS_FILE +DATA_DIR = Path(_DATA_DIR) +SETTINGS_FILE = Path(_SETTINGS_FILE) +LOCAL_CONTACTS_FILE = Path(_CONTACTS_FILE) def _load_settings(): @@ -53,6 +57,21 @@ def _carddav_configured(cfg: Optional[Dict] = None) -> bool: return bool((cfg.get("url") or "").strip()) +def _validate_carddav_url(url: str) -> str: + cleaned = (url if isinstance(url, str) else "").strip().rstrip("/") + ok, reason = check_outbound_url( + cleaned, + block_private=os.getenv("CARDDAV_BLOCK_PRIVATE_IPS", "false").lower() == "true", + ) + if not ok: + raise ValueError(f"Rejected CardDAV URL: {reason}") + return cleaned + + +def _carddav_base_url(cfg: Dict) -> str: + return _validate_carddav_url(cfg.get("url") or "") + + def _normalize_contact(contact: Dict) -> Dict: emails = [] for e in contact.get("emails") or ([] if not contact.get("email") else [contact.get("email")]): @@ -130,21 +149,28 @@ def _parse_vcards(text: str) -> List[Dict]: contact = {"name": "", "emails": [], "phones": [], "uid": ""} for line in block.split("\n"): line = line.strip() - if line.startswith("FN:") or line.startswith("FN;"): - contact["name"] = _vunesc(line.split(":", 1)[1]) if ":" in line else "" - elif line.startswith("EMAIL"): + # Strip an optional RFC 6350 group prefix (e.g. "item1.EMAIL;...") + # that Apple Contacts / iCloud / many CardDAV servers emit by + # default — without this the property-name checks below miss those + # lines and silently drop the email / phone. The group token only + # precedes the property name, so it is safe to strip for matching + # and value extraction, and a no-op for non-grouped lines. + name_part = re.sub(r"^[A-Za-z0-9-]+\.", "", line, count=1) + if name_part.startswith("FN:") or name_part.startswith("FN;"): + contact["name"] = _vunesc(name_part.split(":", 1)[1]) if ":" in name_part else "" + elif name_part.startswith("EMAIL"): # Handle EMAIL:foo@bar OR EMAIL;TYPE=...:foo@bar OR EMAIL;PREF=1:foo@bar - if ":" in line: - email_addr = _vunesc(line.split(":", 1)[1]) + if ":" in name_part: + email_addr = _vunesc(name_part.split(":", 1)[1]) if email_addr and email_addr not in contact["emails"]: contact["emails"].append(email_addr) - elif line.startswith("TEL"): - if ":" in line: - phone = _vunesc(line.split(":", 1)[1]) + elif name_part.startswith("TEL"): + if ":" in name_part: + phone = _vunesc(name_part.split(":", 1)[1]) if phone and phone not in contact["phones"]: contact["phones"].append(phone) - elif line.startswith("UID:"): - contact["uid"] = _vunesc(line[4:]) + elif name_part.startswith("UID:"): + contact["uid"] = _vunesc(name_part[4:]) if contact["name"] or contact["emails"]: contacts.append(contact) return contacts @@ -212,14 +238,18 @@ _contact_cache = {"contacts": [], "fetched_at": None} def _abs_url(href: str) -> str: """Combine a multistatus (an absolute path like /user/contacts/x.vcf) with the configured CardDAV server origin so we - get a fully-qualified URL to PUT/DELETE. If href is already absolute - (http...), return it as-is.""" - from urllib.parse import urlparse, urlunparse - if href.startswith("http://") or href.startswith("https://"): - return href + get a fully-qualified URL to PUT/DELETE. Absolute hrefs are accepted only + for the configured origin; a cross-origin href is treated as a path on the + configured server so a malicious CardDAV response cannot redirect later + writes/deletes to cloud metadata or another host.""" cfg = _get_carddav_config() - p = urlparse(cfg["url"]) - return urlunparse((p.scheme, p.netloc, href, "", "", "")) + base = _carddav_base_url(cfg) + base_p = urlparse(base) + joined = urljoin(base.rstrip("/") + "/", href or "") + joined_p = urlparse(joined) + if (joined_p.scheme, joined_p.netloc) != (base_p.scheme, base_p.netloc): + joined = urlunparse((base_p.scheme, base_p.netloc, joined_p.path or "/", "", joined_p.query, "")) + return _validate_carddav_url(joined) # CardDAV REPORT body — pull every card's etag + raw vCard in ONE request, @@ -290,6 +320,7 @@ def _fetch_contacts(force=False): return contacts try: + cfg["url"] = _carddav_base_url(cfg) auth = None if cfg["username"]: auth = (cfg["username"], cfg["password"]) @@ -346,8 +377,8 @@ def _create_contact(name: str, email: str) -> bool: contact_uid = str(uuid.uuid4()) vcard = _build_vcard(name, email, contact_uid) - url = cfg["url"].rstrip("/") + "/" + contact_uid + ".vcf" try: + url = _carddav_base_url(cfg) + "/" + contact_uid + ".vcf" auth = None if cfg["username"]: auth = (cfg["username"], cfg["password"]) @@ -375,7 +406,7 @@ def _vcard_url(uid: str) -> str: escape the collection and target an arbitrary CardDAV resource.""" from urllib.parse import quote cfg = _get_carddav_config() - return cfg["url"].rstrip("/") + "/" + quote(uid, safe="") + ".vcf" + return _carddav_base_url(cfg) + "/" + quote(uid, safe="") + ".vcf" def _import_vcards(text: str) -> Dict: @@ -406,6 +437,11 @@ def _import_vcards(text: str) -> Dict: if imported: _save_local_contacts(contacts) return {"imported": imported, "failed": 0, "total": len(parsed)} + try: + base_url = _carddav_base_url(cfg) + except ValueError as e: + logger.warning("CardDAV import URL rejected: %s", e) + return {"imported": 0, "failed": 0, "total": 0, "error": str(e)} auth = (cfg["username"], cfg["password"]) if cfg["username"] else None # Split into individual cards. re.split drops the BEGIN line, so we # re-add it. Normalize CRLF. @@ -434,7 +470,7 @@ def _import_vcards(text: str) -> Dict: elif not re.search(r"^VERSION:", block, re.MULTILINE): block = block.replace("BEGIN:VCARD", "BEGIN:VCARD\nVERSION:4.0", 1) vcard = block.replace("\n", "\r\n") + "\r\n" - url = cfg["url"].rstrip("/") + "/" + quote(uid, safe="") + ".vcf" + url = base_url + "/" + quote(uid, safe="") + ".vcf" try: r = httpx.put( url, data=vcard.encode("utf-8"), @@ -594,8 +630,8 @@ def _update_contact(uid: str, name: str, emails: List[str], phones: List[str]) - vcard = _build_vcard(name, "", uid=uid, emails=emails, phones=phones) # Use the real resource href (handles externally-created contacts whose # filename != UID); falls back to the .vcf guess. - url = _resolve_resource_url(uid) try: + url = _resolve_resource_url(uid) auth = (cfg["username"], cfg["password"]) if cfg["username"] else None r = httpx.put( url, @@ -623,8 +659,8 @@ def _delete_contact(uid: str) -> bool: _save_local_contacts(remaining) return True - url = _resolve_resource_url(uid) try: + url = _resolve_resource_url(uid) auth = (cfg["username"], cfg["password"]) if cfg["username"] else None r = httpx.delete(url, auth=auth, timeout=10) if r.status_code in (200, 204): @@ -676,8 +712,8 @@ def setup_contacts_routes(): @router.post("/add") async def add_contact(data: dict, _admin: str = Depends(require_admin)): """Add a new contact.""" - name = data.get("name", "").strip() - email = data.get("email", "").strip() + name = (data.get("name") or "").strip() + email = (data.get("email") or "").strip() if not email: return {"success": False, "error": "Email required"} # Check if already exists @@ -740,7 +776,13 @@ def setup_contacts_routes(): settings = _load_settings() for key in ("carddav_url", "carddav_username", "carddav_password"): if key in data: - settings[key] = data[key] + if key == "carddav_url" and str(data[key] or "").strip(): + try: + settings[key] = _validate_carddav_url(data[key]) + except ValueError as e: + raise HTTPException(400, str(e)) + else: + settings[key] = data[key] _save_settings(settings) # Force re-fetch _contact_cache["fetched_at"] = None diff --git a/routes/cookbook_helpers.py b/routes/cookbook_helpers.py index a8412d54a..39a18f715 100644 --- a/routes/cookbook_helpers.py +++ b/routes/cookbook_helpers.py @@ -2,19 +2,32 @@ Extracted from cookbook_routes.py; the routes module imports the symbols it needs.""" import logging +import ntpath import os +import posixpath import re import shlex from fastapi import HTTPException from pydantic import BaseModel +from core.platform_compat import _ssh_exec_argv + logger = logging.getLogger(__name__) # HuggingFace repo IDs are /, both alphanumerics plus ._- # Rejecting anything else up front closes off shell-interpolation vectors. _REPO_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*/[A-Za-z0-9][A-Za-z0-9._-]*$") +# Cached models scanned from a custom/local model dir are keyed by their leaf +# folder name (no slash), e.g. `DeepSeek-R1-UD-IQ4_XS`. The serve command uses +# the real on-disk path separately; this identifier is only for UI/task +# bookkeeping, so serving should accept the same safe glyph set as repo IDs. +_LOCAL_MODEL_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*$") +# Ollama model names include tags, e.g. `qwen2.5:0.5b` or `llama3.2:latest`. +# Some registries also use a namespace path. Keep this shell-safe: no spaces, +# quotes, `$`, `;`, `&`, pipes, or redirects. +_OLLAMA_MODEL_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._:/-]{0,200}$") # Include pattern is a glob: allow typical safe glyphs only. _INCLUDE_RE = re.compile(r"^[A-Za-z0-9._\-*?/\[\]]+$") # Remote host: user@host (optionally with :port-free hostname parts). @@ -31,6 +44,15 @@ _GPU_LIST_RE = re.compile(r"^\d+(?:,\d+)*$") # only (no quotes, shell metacharacters, or spaces) since it lands in a shell # command. A leading ~ is expanded to $HOME at command-build time. _LOCAL_DIR_RE = re.compile(r"^~?/[A-Za-z0-9._/-]*$|^~$") +_WINDOWS_DRIVE_PATH_RE = re.compile(r"^[A-Za-z]:[\\/]") + + +def _git_bash_path(path: str) -> str: + m = re.match(r"^([A-Za-z]):[\\/](.*)$", path) + if not m: + return path + drive, rest = m.groups() + return f"/{drive.lower()}/{rest.replace(chr(92), '/')}" def _validate_repo_id(v: str | None) -> str: @@ -39,6 +61,14 @@ def _validate_repo_id(v: str | None) -> str: return v +def _validate_serve_model_id(v: str | None) -> str: + if not v: + raise HTTPException(400, "repo_id is required") + if _REPO_ID_RE.match(v) or _LOCAL_MODEL_ID_RE.match(v) or _OLLAMA_MODEL_ID_RE.match(v): + return v + raise HTTPException(400, "Invalid repo_id — must be /, an Ollama name:tag, or a cached local model id") + + def _validate_include(v: str | None) -> str | None: if v is None or v == "": return None @@ -112,7 +142,16 @@ def _local_tooling_path_export(executable: str) -> str: macOS, where the `pip --user` self-heal also misses (`pip` isn't a command, only `pip3`/`python3 -m pip`). Local runs only; meaningless over SSH. """ - bin_dir = os.path.dirname(os.path.abspath(executable)) + # This builds a bash snippet, so an explicit POSIX absolute path should keep + # POSIX semantics even when the app/tests run on Windows. Otherwise + # os.path.abspath("/opt/...") would incorrectly turn it into "D:\\opt\\...". + if executable.startswith("/"): + bin_dir = posixpath.dirname(executable) + elif _WINDOWS_DRIVE_PATH_RE.match(executable): + bin_dir = ntpath.dirname(executable) + else: + bin_dir = os.path.dirname(os.path.abspath(executable)) + bin_dir = _git_bash_path(bin_dir) # Escape for a double-quoted context: $PATH must still expand, but spaces # and shell metacharacters in the path must be preserved literally. esc = ( @@ -124,6 +163,365 @@ def _local_tooling_path_export(executable: str) -> str: return f'export PATH="{esc}:$PATH"' +def _pip_install_no_cache(cmd: str) -> str: + """Add ``--no-cache-dir`` to a pip install command. + + Cookbook dependency installs (vLLM, llama-cpp-python, …) build large wheels; + pip's default cache lives under ``$HOME/.cache/pip`` and these builds can fill + a small home filesystem with ``[Errno 28] No space left on device`` mid-build + (issue #1219), leaving the dependency "installed" but unusable (#1459). + Disabling the cache for these one-off installs keeps them off the home disk + (the maintainer's suggested ``PIP_CACHE_DIR=`` workaround, made the default). + Idempotent; leaves non-pip-install commands untouched.""" + if not cmd or "pip install" not in cmd or "--no-cache-dir" in cmd: + return cmd + return cmd.replace("pip install", "pip install --no-cache-dir", 1) + + +def _pip_install_attempt(pip_cmd: str) -> str: + """Wrap a single pip install command so its exit status survives the + fallback chain and its stderr is visible in the tmux log on failure. + + Without this wrapper, `pip … 2>&1 | tail -5` returns ``tail``'s exit + code (0), masking pip's real failure and preventing the next fallback + from running. The generated snippet captures all output to a temp + file, prints the last 5 lines on failure (so the Cookbook log panel + shows useful diagnostics), cleans up, and exits with pip's original + status. + """ + return ( + "bash -c '" + f'_out=$(mktemp) && {pip_cmd} >"$_out" 2>&1; _rc=$?; ' + 'tail -5 "$_out"; rm -f "$_out"; exit $_rc' + "'" + ) + + +def _pip_command(python_cmd: str) -> str: + """Return a pip command for either a pip executable or a Python executable.""" + cmd = python_cmd.strip() + if " -m pip" in cmd or cmd in {"pip", "pip3"}: + return python_cmd + if cmd in {"python", "python3", "python.exe"} or cmd.endswith(("/python", "/python3", "\\python.exe")): + return f"{python_cmd} -m pip" + return python_cmd + + +def _pip_break_system_packages_check(pip_cmd: str) -> str: + return f"{pip_cmd} install --help 2>/dev/null | grep -q -- --break-system-packages" + + +def _pip_install_fallback_chain(package: str, *, python_cmd: str = "python3 -m pip", upgrade: bool = False) -> str: + """Build a bash pip install fallback chain that surfaces errors. + + Try the active interpreter/environment first. ``--user`` is invalid + inside many venvs, so only attempt the ``--user`` fallback when NOT + inside a venv. + + Each attempt is wrapped via :func:`_pip_install_attempt` so pip's real + exit code is preserved (no ``| tail`` masking) and the last 5 lines of + pip output appear in the Cookbook log on failure. + """ + from core.platform_compat import IS_WINDOWS + upgrade_flag = " -U" if upgrade else "" + # Shell-quote the package spec: an extras spec like ``llama-cpp-python[server]`` + # contains brackets that bash would treat as a glob, so it must be quoted + # before being embedded in the install command. Plain names (e.g. + # ``huggingface_hub``) are returned unchanged by ``shlex.quote``. + pkg = shlex.quote(package) + # llama-cpp-python source builds are brittle on older distro pip/packaging + # stacks (common on WSL images). Prefer the prebuilt wheel index whenever + # this package is requested so dependency-install tasks are reliable. + if "llama-cpp-python" in package: + pkg += " --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu" + + pip_cmd = _pip_command(python_cmd) + base = _pip_install_attempt(f"{pip_cmd} install -q{upgrade_flag} {pkg}") + user = _pip_install_attempt(f"{pip_cmd} install --user -q{upgrade_flag} {pkg}") + user_break_system = _pip_install_attempt(f"{pip_cmd} install --user --break-system-packages -q{upgrade_flag} {pkg}") + user_fallback = f"( {user} || {{ {_pip_break_system_packages_check(pip_cmd)} && {user_break_system}; }} )" + # Derive the python executable for the venv detection check. + # Must use the same interpreter that pip belongs to; hardcoding + # python3 breaks when pip lives in a venv that only has "python". + if " -m pip" in pip_cmd: + python_exe = pip_cmd.replace(" -m pip", "") + elif pip_cmd.strip() == "pip": + python_exe = "python" + elif pip_cmd.strip() == "pip3": + python_exe = "python3" + else: + python_exe = "python3" + venv_check = f'{python_exe} -c "import sys; sys.exit(0 if sys.prefix != sys.base_prefix else 1)"' + # Negated: `! venv_check` succeeds (exit 0) when NOT in a venv -> `&&` tries + # --user. When IN a venv `! venv_check` fails -> `&&` skips --user and the + # group exits non-zero, propagating the base-install failure instead of + # masking it as success (the `|| { venv_check || … }` shape from #903 + # swallowed the exit code because venv_check's exit-0 became the group's + # result). `--break-system-packages` is only attempted when the active pip + # supports it; older pip versions abort with "no such option" otherwise. + return f"{base} || {{ ! {venv_check} && {user_fallback}; }}" + + +def _venv_safe_local_pip_install_cmd(cmd: str, *, local: bool, in_venv: bool) -> str: + """Drop pip user-install flags that are invalid for local venv installs. + + Cookbook dependency installs run through the model-serve task path so users + can watch progress in the same log UI. For local POSIX runs, that task + prepends Odysseus' own interpreter directory to PATH. If Odysseus itself is + running from a venv, `python3` resolves to the venv Python and pip rejects + `--user` with "User site-packages are not visible in this virtualenv". + + Keep remote and non-venv installs unchanged: remotes may intentionally use + system Python, and Docker/non-venv installs still need user-site fallback. + """ + if not local or not in_venv: + return cmd + if "pip install" not in (cmd or ""): + return cmd + try: + parts = shlex.split(cmd) + except ValueError: + return cmd + stripped = [ + part + for part in parts + if part not in {"--user", "--break-system-packages"} + ] + return shlex.join(stripped) + + +def _pip_install_command_without_break_system_packages(cmd: str) -> str: + try: + parts = shlex.split(cmd) + except ValueError: + return cmd + stripped = [part for part in parts if part != "--break-system-packages"] + return shlex.join(stripped) + + +def _pip_install_help_check_from_cmd(cmd: str) -> str | None: + try: + parts = shlex.split(cmd) + except ValueError: + return None + try: + install_index = parts.index("install") + except ValueError: + return None + if install_index <= 0: + return None + pip_prefix = parts[:install_index] + return f"{shlex.join(pip_prefix + ['install', '--help'])} 2>/dev/null | grep -q -- --break-system-packages" + + +def _append_pip_install_runner_lines(runner_lines: list[str], cmd: str) -> None: + """Append a pip install command, guarding --break-system-packages support. + + The Dependencies UI may submit ``python3 -m pip install --user + --break-system-packages ...`` for non-venv installs. That flag is useful on + PEP-668-locked distros, but older pip (including Ubuntu 22.04's apt pip in + the NVIDIA CUDA base image) aborts with "no such option". Branch at runner + time so stale browser JS and remote targets are handled by the server too. + """ + if "--break-system-packages" not in (cmd or ""): + runner_lines.append(cmd) + return + help_check = _pip_install_help_check_from_cmd(cmd) + without_break = _pip_install_command_without_break_system_packages(cmd) + if not help_check or without_break == cmd: + runner_lines.append(cmd) + return + runner_lines.append(f"if {help_check}; then") + runner_lines.append(f" {cmd}") + runner_lines.append("else") + runner_lines.append(' echo "[odysseus] pip does not support --break-system-packages; installing without it."') + runner_lines.append(f" {without_break}") + runner_lines.append("fi") + + +def _user_shell_path_bootstrap() -> list[str]: + return [ + 'ODYSSEUS_USER_SHELL="${SHELL:-}"', + 'if [ -n "$ODYSSEUS_USER_SHELL" ] && [ -x "$ODYSSEUS_USER_SHELL" ]; then', + ' ODYSSEUS_USER_PATH="$("$ODYSSEUS_USER_SHELL" -ic \'printf "__ODYSSEUS_PATH__%s\\n" "$PATH"\' 2>/dev/null | sed -n \'s/^__ODYSSEUS_PATH__//p\' | tail -n 1 || true)"', + ' if [ -n "$ODYSSEUS_USER_PATH" ]; then export PATH="$ODYSSEUS_USER_PATH:$PATH"; fi', + 'fi', + 'command -v python3 >/dev/null 2>&1 || python3() { python "$@"; }', + 'command -v python >/dev/null 2>&1 || python() { python3 "$@"; }', + ] + + +def _cached_model_scan_script(model_dirs: list[str] | None = None, add_hf_cache: str | None = None) -> str: + """Build the standalone Python scanner used by /api/model/cached. + Allows for an additional HuggingFace cache path to be scanned (i.e. Windows HF cache for local WSL envs.) + """ + lines = [ + "import json, os, re, shutil, subprocess, urllib.request", + "models = []", + "seen = set()", + "BLOCKED_ROOTS = ('/sys', '/proc', '/dev', '/run', '/var/run')", + "def safe_path(p):", + " try:", + " rp = os.path.realpath(os.path.expanduser(p))", + " return not any(rp == b or rp.startswith(b + os.sep) for b in BLOCKED_ROOTS)", + " except Exception:", + " return False", + "def safe_walk(top):", + " if not safe_path(top): return", + " for root, dirs, fns in os.walk(top, followlinks=False):", + " dirs[:] = [d for d in dirs if not os.path.islink(os.path.join(root, d)) and safe_path(os.path.join(root, d))]", + " yield root, dirs, fns", + "def gguf_role(name):", + " n = name.lower()", + " if n.startswith('mmproj') or 'mmproj' in n: return 'projector'", + " return 'model'", + "def gguf_quant(name):", + " m = re.search(r'(?i)(UD-)?(IQ[0-9]_[A-Z0-9_]+|Q[0-9](?:_[A-Z0-9]+)+|BF16|F16|FP16|F32|Q8_0)', name)", + " return m.group(0).upper() if m else ''", + "def collect_ggufs(base):", + " files = []", + " split_groups = {}", + " if not os.path.isdir(base) or not safe_path(base): return files", + " for root, dirs, fns in safe_walk(base):", + " for fn in sorted(fns):", + " if not fn.lower().endswith('.gguf'): continue", + " fp = os.path.join(root, fn)", + " try: size = os.path.getsize(fp)", + " except Exception: size = 0", + " try: rel = os.path.relpath(fp, base).replace(os.sep, '/')", + " except Exception: rel = fn", + " sm = re.match(r'(?i)^(.+)-(\\d+)-of-(\\d+)\\.gguf$', fn)", + " if sm:", + " prefix, part_s, total_s = sm.group(1), sm.group(2), sm.group(3)", + " key = (root, prefix, total_s)", + " g = split_groups.setdefault(key, {'name':fn,'rel_path':rel,'size_bytes':0,'role':gguf_role(fn),'quant':gguf_quant(fn),'parts':int(total_s),'split':True})", + " g['size_bytes'] += size", + " if int(part_s) == 1:", + " g.update({'name':fn,'rel_path':rel,'role':gguf_role(fn),'quant':gguf_quant(fn)})", + " continue", + " files.append({'name':fn,'rel_path':rel,'size_bytes':size,'role':gguf_role(fn),'quant':gguf_quant(fn)})", + " files.extend(split_groups.values())", + " files.sort(key=lambda f: (f.get('role') != 'model', f.get('rel_path', '')))", + " return files", + "def scan_hf(cache):", + " if not os.path.isdir(cache): return", + " for d in sorted(os.listdir(cache)):", + " if not d.startswith('models--'): continue", + " rid = d.replace('models--','').replace('--','/')", + " if rid in seen: continue", + " seen.add(rid)", + " blobs = os.path.join(cache, d, 'blobs')", + " sz, nf, ic = 0, 0, False", + " if os.path.isdir(blobs):", + " for f in os.scandir(blobs):", + " if f.is_file(): nf += 1; sz += f.stat().st_size", + " if f.name.endswith('.incomplete'): ic = True", + " snap = os.path.join(cache, d, 'snapshots')", + " # Windows HF cache stores files directly in snapshots/; blobs/ may be empty.", + " # Fallback: scan snapshots for real files when blobs yielded nothing.", + " if sz == 0 and os.path.isdir(snap):", + " for sd in os.listdir(snap):", + " sf = os.path.join(snap, sd)", + " if not os.path.isdir(sf): continue", + " for f in os.scandir(sf):", + " if f.is_file(): nf += 1; sz += f.stat().st_size", + " if f.name.endswith('.incomplete'): ic = True", + " is_diffusion = False; gguf_files = []", + " if os.path.isdir(snap):", + " for sd in os.listdir(snap):", + " sf = os.path.join(snap, sd)", + " if not os.path.isdir(sf): continue", + " if os.path.exists(os.path.join(sf, 'model_index.json')): is_diffusion = True", + " for f in collect_ggufs(sf): f['rel_path'] = sd + '/' + f['rel_path']; gguf_files.append(f)", + " models.append({'repo_id':rid,'size_bytes':sz,'nb_files':nf,'has_incomplete':ic,'path':cache,'is_diffusion':is_diffusion,'is_gguf':bool(gguf_files),'gguf_files':gguf_files})", + "def hf_cache_paths():", + " candidates = []", + " def add(p):", + " if not p: return", + " p = os.path.expanduser(p)", + " if p not in candidates: candidates.append(p)", + " add(os.environ.get('HUGGINGFACE_HUB_CACHE'))", + " hf_home = os.environ.get('HF_HOME')", + " if hf_home: add(os.path.join(hf_home, 'hub'))", + " add('~/.cache/huggingface/hub')", + " # Docker images mount ./data/huggingface at /app/.cache/huggingface.", + " # When HOME is /root, expanduser() misses that persisted cache.", + " add('/app/.cache/huggingface/hub')", + f" add({add_hf_cache!r})" if add_hf_cache else "", + " return candidates", + "def scan_dir(p):", + " if not os.path.isdir(p) or not safe_path(p): return", + " for d in sorted(os.listdir(p)):", + " if d.startswith('.'): continue", + " if d.startswith('models--'): continue", + " fp = os.path.join(p, d)", + " if not os.path.isdir(fp) or os.path.islink(fp) or not safe_path(fp): continue", + " if d in seen: continue", + " is_model = False; gguf_files = []", + " for root, dirs, fns in safe_walk(fp):", + " for fn in fns:", + " if fn.lower().endswith('.gguf'): is_model = True", + " elif fn == 'config.json' or fn.endswith('.safetensors') or fn.endswith('.bin'): is_model = True", + " if is_model: break", + " if not is_model: continue", + " gguf_files = collect_ggufs(fp)", + " seen.add(d)", + " sz, nf = 0, 0", + " for dp, _, fns in safe_walk(fp):", + " for fn in fns:", + " try: nf += 1; sz += os.path.getsize(os.path.join(dp, fn))", + " except Exception: pass", + " is_diff = os.path.exists(os.path.join(fp, 'model_index.json'))", + " models.append({'repo_id':d,'size_bytes':sz,'nb_files':nf,'has_incomplete':False,'path':p,'is_local_dir':True,'is_diffusion':is_diff,'is_gguf':bool(gguf_files),'gguf_files':gguf_files})", + "def parse_size(num, unit):", + " try: n = float(num)", + " except Exception: return 0", + " u = (unit or '').upper()", + " if u.startswith('TB'): return int(n * 1024 ** 4)", + " if u.startswith('GB'): return int(n * 1024 ** 3)", + " if u.startswith('MB'): return int(n * 1024 ** 2)", + " if u.startswith('KB'): return int(n * 1024)", + " return int(n)", + "def scan_ollama():", + " if not shutil.which('ollama'): return", + " try:", + " p = subprocess.run(['ollama', 'list'], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True, timeout=6)", + " except Exception:", + " return", + " if p.returncode != 0: return", + " for line in (p.stdout or '').splitlines()[1:]:", + " parts = line.split()", + " if len(parts) < 4: continue", + " name = parts[0]", + " if not name or name in seen: continue", + " size_bytes = parse_size(parts[2], parts[3])", + " seen.add(name)", + " models.append({'repo_id':name,'size_bytes':size_bytes,'nb_files':1,'has_incomplete':False,'path':'ollama','backend':'ollama','is_ollama':True})", + "def scan_ollama_api():", + " urls = ['http://127.0.0.1:11434/api/tags', 'http://localhost:11434/api/tags', 'http://host.docker.internal:11434/api/tags']", + " for url in urls:", + " try:", + " with urllib.request.urlopen(url, timeout=2) as r:", + " data = json.loads(r.read().decode('utf-8', 'replace'))", + " except Exception:", + " continue", + " for item in data.get('models', []):", + " name = item.get('name') or item.get('model')", + " if not name or name in seen: continue", + " size_bytes = int(item.get('size') or item.get('size_bytes') or 0)", + " seen.add(name)", + " models.append({'repo_id':name,'size_bytes':size_bytes,'nb_files':1,'has_incomplete':False,'path':'ollama','backend':'ollama','is_ollama':True})", + " return", + "for _hf_cache in hf_cache_paths(): scan_hf(_hf_cache)", + "scan_ollama()", + "scan_ollama_api()", + ] + for model_dir in model_dirs or []: + lines.append(f"scan_dir(os.path.expanduser({model_dir!r}))") + lines.append("print(json.dumps(models))") + return "\n".join(lines) + "\n" + + def _ps_squote(v: str) -> str: """Escape a value for PowerShell single-quoted string interpolation. Belt-and-suspenders on top of _validate_token's regex — if the regex @@ -155,6 +553,38 @@ _SERVE_CMD_ALLOWLIST = { _GGUF_PRELUDE_RE = re.compile( r'^MODEL_FILE=\$\([^\n]*?\)\s*&&\s*\{[^{}]*\}\s*\|\|\s*\{[^{}]*\}\s*&&\s*' ) +_OLLAMA_HOST_ASSIGNMENT_RE = re.compile(r"(?:^|\s)OLLAMA_HOST=([^\s]+)") +_OLLAMA_BIND_RE = re.compile(r"^\[([^\]]+)\]:(\d+)$|^([^:]+):(\d+)$") +_OLLAMA_BIND_HOST_RE = re.compile(r"^[A-Za-z0-9._:-]+$") + + +def _ollama_bind_from_cmd(cmd: str | None, *, default_host: str = "127.0.0.1") -> tuple[str, str]: + """Return the Ollama bind host/port requested by a serve command. + + Plain local `ollama serve` defaults to loopback. Remote callers can pass a + wider default host so the resulting API is reachable by Odysseus. + """ + if not cmd: + return default_host, "11434" + match = _OLLAMA_HOST_ASSIGNMENT_RE.search(cmd) + if not match: + return default_host, "11434" + value = match.group(1).strip("'\"") + bind_match = _OLLAMA_BIND_RE.match(value) + if not bind_match: + return "127.0.0.1", "11434" + bracketed_host = bind_match.group(1) + host = bracketed_host or bind_match.group(3) or "127.0.0.1" + port = bind_match.group(2) or bind_match.group(4) or "11434" + if not _OLLAMA_BIND_HOST_RE.match(host): + return "127.0.0.1", "11434" + try: + port_num = int(port, 10) + except ValueError: + return "127.0.0.1", "11434" + if port_num < 1 or port_num > 65535: + return "127.0.0.1", "11434" + return f"[{host}]" if bracketed_host else host, port def _check_serve_binary(seg: str) -> None: @@ -198,6 +628,7 @@ def _validate_serve_cmd(v: str | None) -> str | None: # Backticks and raw newlines are never legitimate here. if any(c in v for c in ("`", "\n", "\r")): raise HTTPException(400, "Invalid characters in cmd") + # Known GGUF launcher prelude → validate the serve invocation(s) it guards. m = _GGUF_PRELUDE_RE.match(v) if m: @@ -206,14 +637,154 @@ def _validate_serve_cmd(v: str | None) -> str | None: for part in rest.split("||"): _check_serve_binary(part.strip()) return v + # Otherwise: a single invocation — no shell metacharacters allowed. + # Temporarily replace safe $(printf %s ...) expressions with a placeholder + # to avoid triggering the metacharacter/command-injection checks. + cleaned_v = v + printf_matches = list(re.finditer(r"\$\(\s*printf\s+%s\s+([^\n()]*?)\)", v)) + for match in printf_matches: + inner = match.group(1) + if not any(c in inner for c in (";", "&&", "||", "$(", "`")): + cleaned_v = cleaned_v.replace(match.group(0), "/placeholder/safe/path.gguf") + # (`$(` was the original intent; bare `$` is fine for shell-safe paths.) - if any(c in v for c in (";", "&&", "||", "$(")): + if any(c in cleaned_v for c in (";", "&&", "||", "$(")): raise HTTPException(400, "Invalid characters in cmd") _check_serve_binary(v) return v +def _append_serve_preflight_exit_lines(runner_lines: list[str], *, keep_shell_open: bool) -> None: + """Append serve-runner lines that surface preflight failures before exit.""" + runner_lines.append('if [ -n "$ODYSSEUS_PREFLIGHT_EXIT" ]; then') + runner_lines.append(' echo ""; echo "=== Process exited with code $ODYSSEUS_PREFLIGHT_EXIT ==="') + if keep_shell_open: + # Decouple the post-crash interactive shell from the persistent log + # file. fds 3/4 were saved BEFORE the tee redirect at the top of + # the runner; restoring them here means the neofetch banner the + # user's .zshrc prints lands on the tmux pane only, not in the + # log file the agent's tail_serve_output reads. + runner_lines.append(' exec 1>&3 2>&4 3>&- 4>&- 2>/dev/null || true') + runner_lines.append(' sleep 0.2 # let tee child flush + exit') + runner_lines.append(' exec "${SHELL:-/bin/bash}"') + else: + runner_lines.append(' exit "$ODYSSEUS_PREFLIGHT_EXIT"') + runner_lines.append('fi') + + +def _append_vllm_linux_preflight_lines(runner_lines: list[str]) -> None: + """Append Linux vLLM readiness lines that identify the runtime being used.""" + # Keep the user install bin visible for Odysseus-managed `pip install --user` + # installs, but then report the actual CLI path so external runtimes are clear. + runner_lines.append('export PATH="$HOME/.local/bin:$PATH"') + runner_lines.append('ODYSSEUS_VLLM_BIN="$(command -v vllm 2>/dev/null || true)"') + runner_lines.append('if [ -z "$ODYSSEUS_VLLM_BIN" ]; then') + runner_lines.append(' echo "ERROR: vLLM is not installed."') + runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127') + runner_lines.append('else') + runner_lines.append(' echo "[odysseus] vLLM CLI: $ODYSSEUS_VLLM_BIN"') + runner_lines.append(' ODYSSEUS_VLLM_VERSION="$("$ODYSSEUS_VLLM_BIN" --version 2>&1 | head -n 1 || true)"') + runner_lines.append(' if [ -n "$ODYSSEUS_VLLM_VERSION" ]; then echo "[odysseus] vLLM version: $ODYSSEUS_VLLM_VERSION"; fi') + runner_lines.append('fi') + +def _append_serve_exit_code_lines( + runner_lines: list[str], + *, + keep_shell_open: bool, + is_pip_install: bool = False, +) -> None: + """Append serve-runner lines that preserve and report the command exit code.""" + runner_lines.append('ODYSSEUS_CMD_EXIT=$?') + if is_pip_install: + runner_lines.append('if [ $ODYSSEUS_CMD_EXIT -eq 0 ]; then echo ""; echo "DOWNLOAD_OK"; fi') + if keep_shell_open: + runner_lines.append('echo ""; echo "=== Process exited with code $ODYSSEUS_CMD_EXIT ==="') + # See preflight branch above for the rationale on restoring fds 3/4. + runner_lines.append('exec 1>&3 2>&4 3>&- 4>&- 2>/dev/null || true') + runner_lines.append('sleep 0.2 # let tee child flush + exit') + runner_lines.append('exec "${SHELL:-/bin/bash}"') + else: + runner_lines.append('echo ""; echo "=== Process exited with code $ODYSSEUS_CMD_EXIT ==="') + runner_lines.append('exit "$ODYSSEUS_CMD_EXIT"') + + +def _append_llama_cpp_linux_accel_build_lines(runner_lines: list[str]) -> None: + """Append Linux llama.cpp build lines that prefer ROCm/HIP when available. + + Cookbook already detects AMD GPUs elsewhere, but the llama.cpp bootstrap used + to hard-wire CUDA on Linux. That made ROCm hosts attempt a CUDA configure and + fail with "CUDA Toolkit not found" instead of building with HIP. + """ + # Detect pip-installed nvcc (from vLLM/nvidia CUDA wheels) and put it on PATH + # so cmake's CUDA configure can find it. We keep this after the ROCm/HIP + # check — a machine with both stacks should honor the native HIP toolchain on + # AMD hosts instead of accidentally preferring a stray nvcc wheel. + runner_lines.append(' for _cudir in ~/.local/lib/python*/site-packages/nvidia/cu13 ~/.local/lib/python*/site-packages/nvidia/cu12 ~/.local/lib/python*/site-packages/nvidia/cuda_nvcc; do') + runner_lines.append(' [ -x "$_cudir/bin/nvcc" ] && export CUDA_HOME="$_cudir" && export PATH="$_cudir/bin:$PATH" && break') + runner_lines.append(' done') + # rm -rf build so a prior poisoned CMakeCache.txt (e.g. from a failed CUDA + # or HIP attempt) doesn't cause the next configure to reuse stale settings. + runner_lines.append(' cd ~/llama.cpp && rm -rf build') + runner_lines.append(' if command -v hipconfig &>/dev/null || [ -d /opt/rocm ] || [ -n "$ROCM_PATH" ] || [ -n "$HIP_PATH" ]; then') + runner_lines.append(' if command -v hipconfig &>/dev/null; then') + runner_lines.append(' export HIPCXX="${HIPCXX:-$(hipconfig -l)/clang}"') + runner_lines.append(' export HIP_PATH="${HIP_PATH:-$(hipconfig -R)}"') + runner_lines.append(' fi') + runner_lines.append(' echo "[odysseus] ROCm/HIP detected — building llama-server with HIP support..."') + runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_HIP=ON && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server') + runner_lines.append(' elif command -v nvcc &>/dev/null; then') + # nvcc alone is not sufficient — pip-installed CUDA wheels or incomplete + # tooling can expose nvcc without shipping libcudart, causing cmake to fail + # mid-build with "CUDA runtime library not found". Check cudart explicitly + # via a small helper so the guard stays readable. + runner_lines.append(' _odysseus_has_cudart() {') + runner_lines.append(' ldconfig -p 2>/dev/null | grep -q \'libcudart\\.so\' && return 0') + runner_lines.append(' local _cuh="${CUDA_HOME:-/usr/local/cuda}"') + runner_lines.append(' ls "$_cuh/lib64/libcudart.so"* &>/dev/null && return 0') + runner_lines.append(' ls "$_cuh/lib/libcudart.so"* &>/dev/null && return 0') + runner_lines.append(' ls /usr/local/cuda/lib64/libcudart.so* &>/dev/null && return 0') + runner_lines.append(' ls /usr/local/cuda/lib/libcudart.so* &>/dev/null && return 0') + runner_lines.append(' ls "${_cuh%/cuda_nvcc}/cuda_runtime/lib/libcudart.so"* &>/dev/null && return 0') + runner_lines.append(' return 1') + runner_lines.append(' }') + runner_lines.append(' if _odysseus_has_cudart; then') + runner_lines.append(' echo "[odysseus] CUDA nvcc + cudart found — building llama-server with CUDA (GPU) support..."') + runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_CUDA=ON && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server') + runner_lines.append(' else') + runner_lines.append(' echo "[odysseus] WARNING: nvcc found but CUDA runtime (libcudart.so) is not visible — building llama-server for CPU only."') + runner_lines.append(' echo "[odysseus] GPU inference will not be available for this llama.cpp build."') + runner_lines.append(' echo "[odysseus] Ensure libcudart is installed (e.g. cuda-runtime package) and visible via ldconfig or CUDA_HOME."') + runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server') + runner_lines.append(' fi') + runner_lines.append(' else') + runner_lines.append(' echo "[odysseus] WARNING: no HIP/CUDA toolchain found — building llama-server for CPU only."') + runner_lines.append(' echo "[odysseus] GPU inference will not be available for this llama.cpp build."') + runner_lines.append(' echo "[odysseus] Install ROCm for AMD GPUs or vLLM/CUDA tooling for NVIDIA, then re-launch this serve task."') + runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server') + runner_lines.append(' fi') + + +def _llama_cpp_rebuild_cmd() -> str: + """Shell command that clears the Cookbook-managed llama.cpp build. + + Removes the cached ``llama-server`` symlink and the ``~/llama.cpp/build`` + directory so the next llama.cpp serve recompiles from source, picking up a + CUDA or HIP toolchain if one is now available. The serve bootstrap only + builds when ``llama-server`` is missing from PATH, so without this an + existing CPU-only build is reused forever. It deliberately installs and + downloads nothing; the rebuild itself happens on the next serve. + """ + return ( + 'mkdir -p "$HOME/bin" && ' + 'rm -f "$HOME/bin/llama-server" && ' + 'rm -rf "$HOME/llama.cpp/build" && ' + 'echo "[odysseus] Cleared the cached llama.cpp build. ' + 'Re-launch the serve task to rebuild llama-server from source ' + '(CUDA or HIP will be used if a toolchain is now available)."' + ) + + class ModelDownloadRequest(BaseModel): repo_id: str include: str | None = None # glob pattern e.g. "*Q4_K_M*" @@ -276,6 +847,8 @@ def _parse_serve_phase(snapshot: str, task_type: str = "serve") -> dict: } if "Application startup complete" in flat: return {"phase": "ready", "status": "ready"} + if re.search(r'Ollama API ready on port\s+\d+', flat, re.I): + return {"phase": "ready", "status": "ready"} # HTTP access logs (e.g. GET /v1/models 200 OK) mean the server is up and serving if re.search(r'(?:GET|POST)\s+/[^\s]*\s+HTTP/[\d.]+"\s*\d{3}', flat): return {"phase": "idle", "status": "ready"} @@ -360,3 +933,172 @@ def _ssh_ps(host, script_path, port=None): # Windows session dir — stored in user's temp on the remote WIN_SESSION_DIR = "$env:TEMP\\\\odysseus-sessions" + + +def _diagnose_serve_output(text: str) -> dict | None: + """Server-side mirror of the Cookbook UI's common serve diagnoses. + + The browser uses cookbook-diagnosis.js for clickable fixes. This gives + the agent/tool path the same structured signal so it can retry with an + adjusted command instead of guessing from raw tmux output. + """ + if not text: + return None + tail = text[-6000:] + patterns = [ + ( + r"No available memory for the cache blocks|Available KV cache memory:.*-", + "No GPU memory left for KV cache after loading model.", + [ + {"label": "retry with GPU memory utilization 0.95", "op": "replace", "flag": "--gpu-memory-utilization", "value": "0.95"}, + {"label": "retry with context 2048", "op": "replace", "flag": "--max-model-len", "value": "2048"}, + ], + ), + ( + r"CUDA out of memory|torch\.cuda\.OutOfMemoryError|CUDA error: out of memory|warming up sampler|max_num_seqs.*gpu_memory_utilization", + "GPU ran out of memory during startup or warmup.", + [ + {"label": "retry with context 4096", "op": "replace", "flag": "--max-model-len", "value": "4096"}, + {"label": "retry with GPU memory utilization 0.80", "op": "replace", "flag": "--gpu-memory-utilization", "value": "0.80"}, + {"label": "retry with --enforce-eager", "op": "append", "arg": "--enforce-eager"}, + ], + ), + ( + r"not divisib|must be divisible|attention heads.*divisible", + "Tensor parallel size is incompatible with the model.", + [ + {"label": "retry with tensor parallel size 1", "op": "replace", "flag": "--tensor-parallel-size", "value": "1"}, + {"label": "retry with tensor parallel size 2", "op": "replace", "flag": "--tensor-parallel-size", "value": "2"}, + ], + ), + ( + r"KV cache.*too (small|large)|max_model_len.*exceeds|maximum.*context", + "Context length is too large for available GPU memory.", + [ + {"label": "retry with context 8192", "op": "replace", "flag": "--max-model-len", "value": "8192"}, + {"label": "retry with context 4096", "op": "replace", "flag": "--max-model-len", "value": "4096"}, + ], + ), + ( + r"enable-auto-tool-choice requires --tool-call-parser", + "Auto tool choice requires an explicit tool call parser.", + [{"label": "retry with Hermes tool parser", "op": "append", "arg": "--tool-call-parser hermes"}], + ), + ( + r"Please pass.*trust.remote.code=True|contains custom code which must be executed to correctly load|does not recognize this architecture|model type.*but Transformers does not", + "Model requires custom code or newer model support.", + [{"label": "retry with --trust-remote-code", "op": "append", "arg": "--trust-remote-code"}], + ), + ( + r"There is no module or parameter named ['\"]lm_head\.input_scale['\"]|lm_head\.input_scale|weight_scale_2", + "vLLM cannot load this ModelOpt LM-head quantized checkpoint with the current runtime.", + [ + { + "label": "upgrade vLLM through the environment that provides this CLI, or use a compatible checkpoint", + "op": "manual", + } + ], + ), + ( + r"Either a revision or a version must be specified|transformers\.integrations\.hub_kernels|kernels/layer", + "vLLM/Transformers kernel package mismatch.", + [{"label": "update vLLM, Transformers, and kernels on this server", "op": "dependency", "package": "vllm transformers kernels"}], + ), + ( + r"Address already in use|bind.*address.*in use", + "Port is already in use.", + [{"label": "retry on port 8001", "op": "replace", "flag": "--port", "value": "8001"}], + ), + ( + r"No CUDA GPUs are available|no GPU.*found|CUDA_VISIBLE_DEVICES.*invalid", + "No GPUs are visible to the serve process.", + [{"label": "clear Cookbook GPU selection or choose available GPUs", "op": "settings", "field": "gpus", "value": ""}], + ), + ( + r"Failed to infer device type|NVML Shared Library Not Found|No module named 'amdsmi'|platform is not available", + "vLLM could not find a supported GPU (CUDA or ROCm). " + "This machine may have integrated or unsupported graphics only.", + [ + {"label": "switch to llama.cpp (CPU/Metal, works without a discrete GPU)", "op": "manual"}, + {"label": "switch to Ollama (CPU/Metal, works without a discrete GPU)", "op": "manual"}, + ], + ), + ( + r"vllm.*command not found|No module named vllm|ERROR: vLLM is not installed", + "vLLM is not installed or not in PATH on this server.", + [{"label": "install vLLM in Cookbook Dependencies", "op": "dependency", "package": "vllm"}], + ), + ( + r"sglang.*command not found|No module named sglang|SGLang is not installed", + "SGLang is not installed or not in PATH on this server.", + [{"label": "install SGLang in Cookbook Dependencies", "op": "dependency", "package": "sglang[all]"}], + ), + ( + r"llama-server.*command not found|llama\.cpp.*not found|No module named.*llama_cpp|No module named 'starlette_context'|git: command not found|cmake: command not found", + "llama.cpp / llama-cpp-python dependencies are missing.", + [{"label": "install llama.cpp dependencies or llama-cpp-python[server]", "op": "dependency", "package": "llama-cpp-python[server]"}], + ), + ( + r"No GGUF found on this host|no \.gguf file|No GGUF file found", + "No GGUF file found for this model on this host. The llama.cpp backend needs a .gguf file.", + [{"label": "download a GGUF build of this model (repo name usually ends in -GGUF, file like Q4_K_M.gguf)", "op": "manual"}], + ), + ( + r"No module named 'torch'|No module named torch|No module named 'diffusers'|No module named diffusers", + "Diffusion serving requires PyTorch and diffusers.", + [{"label": "install diffusers[torch] in Cookbook Dependencies", "op": "dependency", "package": "diffusers[torch]"}], + ), + ( + r"403 Forbidden|401 Unauthorized|Access to model.*is restricted|gated repo|not in the authorized list|awaiting a review", + "Model access is gated or unauthorized.", + [{"label": "set HF token and request model access on HuggingFace", "op": "manual"}], + ), + ] + for pattern, message, suggestions in patterns: + if re.search(pattern, tail, re.I): + return {"message": message, "suggestions": suggestions} + if re.search(r"Traceback \(most recent call last\)", tail, re.I) and not re.search( + r"Application startup complete|GET /v1/|Uvicorn running on", tail, re.I + ): + return { + "message": "Python traceback detected during serve startup.", + "suggestions": [{"label": "inspect traceback and retry with adjusted backend/settings", "op": "manual"}], + } + return None + + +async def run_ssh_command_async( + remote: str, + ssh_port: str | None, + remote_cmd: str, + *, + timeout: float, + connect_timeout: int | None = None, + strict_host_key_checking: bool | None = None, + stdin_data: bytes | None = None, +) -> tuple[int, bytes, bytes]: + """Run an ssh command with centralized timeout and stderr/stdout capture. + Async version of core.platform_compat.run_ssh_command_sync. + """ + import asyncio + proc = await asyncio.create_subprocess_exec( + *_ssh_exec_argv( + remote, + ssh_port, + remote_cmd=remote_cmd, + connect_timeout=connect_timeout, + strict_host_key_checking=strict_host_key_checking, + ), + stdin=asyncio.subprocess.PIPE if stdin_data is not None else None, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + try: + stdout, stderr = await asyncio.wait_for( + proc.communicate(input=stdin_data), timeout=timeout + ) + except asyncio.TimeoutError: + proc.kill() + await proc.communicate() + raise + return proc.returncode or 0, stdout, stderr diff --git a/routes/cookbook_routes.py b/routes/cookbook_routes.py index b14a1479b..7a1ee85c6 100644 --- a/routes/cookbook_routes.py +++ b/routes/cookbook_routes.py @@ -15,28 +15,40 @@ from pathlib import Path from fastapi import APIRouter, HTTPException, Request, Depends from src.auth_helpers import require_user +from src.constants import COOKBOOK_STATE_FILE from pydantic import BaseModel from core.middleware import require_admin from core.platform_compat import ( IS_WINDOWS, + SSH_PATH_OVERRIDE, + NVIDIA_PATH_CANDIDATES, detached_popen_kwargs, find_bash, + git_bash_path, kill_process_tree, pid_alive, safe_chmod, which_tool, + translate_path, + get_wsl_windows_user_profile, ) from routes.shell_routes import TMUX_LOG_DIR +from src.constants import COOKBOOK_STATE_FILE logger = logging.getLogger(__name__) from routes.cookbook_helpers import ( _SSH_PORT_RE, _REMOTE_HOST_RE, _SESSION_ID_RE, - _validate_repo_id, _validate_include, _validate_remote_host, _validate_token, + _validate_repo_id, _validate_serve_model_id, _validate_include, _validate_remote_host, _validate_token, _validate_local_dir, _validate_ssh_port, _validate_gpus, _shell_path, _ps_squote, _bash_squote, _validate_serve_cmd, _parse_serve_phase, - _safe_env_prefix, _local_tooling_path_export, + _safe_env_prefix, _local_tooling_path_export, _append_serve_preflight_exit_lines, + _append_serve_exit_code_lines, _append_llama_cpp_linux_accel_build_lines, _cached_model_scan_script, + _append_vllm_linux_preflight_lines, _ollama_bind_from_cmd, _pip_install_fallback_chain, + _pip_install_no_cache, _user_shell_path_bootstrap, _venv_safe_local_pip_install_cmd, + _append_pip_install_runner_lines, + _diagnose_serve_output, run_ssh_command_async, ModelDownloadRequest, ServeRequest, ) @@ -51,7 +63,7 @@ _HF_TOKEN_STATUS_SNIPPET = ( def setup_cookbook_routes() -> APIRouter: router = APIRouter(tags=["cookbook"]) - _cookbook_state_path = Path(os.environ.get("DATA_DIR", "data")) / "cookbook_state.json" + _cookbook_state_path = Path(COOKBOOK_STATE_FILE) def _mask_secret(value: str) -> str: if not value: @@ -78,113 +90,6 @@ def setup_cookbook_routes() -> APIRouter: task["payload"].pop("hf_token", None) return state - def _diagnose_serve_output(text: str) -> dict | None: - """Server-side mirror of the Cookbook UI's common serve diagnoses. - - The browser uses cookbook-diagnosis.js for clickable fixes. This gives - the agent/tool path the same structured signal so it can retry with an - adjusted command instead of guessing from raw tmux output. - """ - if not text: - return None - tail = text[-6000:] - patterns = [ - ( - r"No available memory for the cache blocks|Available KV cache memory:.*-", - "No GPU memory left for KV cache after loading model.", - [ - {"label": "retry with GPU memory utilization 0.95", "op": "replace", "flag": "--gpu-memory-utilization", "value": "0.95"}, - {"label": "retry with context 2048", "op": "replace", "flag": "--max-model-len", "value": "2048"}, - ], - ), - ( - r"CUDA out of memory|torch\.cuda\.OutOfMemoryError|CUDA error: out of memory|warming up sampler|max_num_seqs.*gpu_memory_utilization", - "GPU ran out of memory during startup or warmup.", - [ - {"label": "retry with context 4096", "op": "replace", "flag": "--max-model-len", "value": "4096"}, - {"label": "retry with GPU memory utilization 0.80", "op": "replace", "flag": "--gpu-memory-utilization", "value": "0.80"}, - {"label": "retry with --enforce-eager", "op": "append", "arg": "--enforce-eager"}, - ], - ), - ( - r"not divisib|must be divisible|attention heads.*divisible", - "Tensor parallel size is incompatible with the model.", - [ - {"label": "retry with tensor parallel size 1", "op": "replace", "flag": "--tensor-parallel-size", "value": "1"}, - {"label": "retry with tensor parallel size 2", "op": "replace", "flag": "--tensor-parallel-size", "value": "2"}, - ], - ), - ( - r"KV cache.*too (small|large)|max_model_len.*exceeds|maximum.*context", - "Context length is too large for available GPU memory.", - [ - {"label": "retry with context 8192", "op": "replace", "flag": "--max-model-len", "value": "8192"}, - {"label": "retry with context 4096", "op": "replace", "flag": "--max-model-len", "value": "4096"}, - ], - ), - ( - r"enable-auto-tool-choice requires --tool-call-parser", - "Auto tool choice requires an explicit tool call parser.", - [{"label": "retry with Hermes tool parser", "op": "append", "arg": "--tool-call-parser hermes"}], - ), - ( - r"Please pass.*trust.remote.code=True|contains custom code which must be executed to correctly load|does not recognize this architecture|model type.*but Transformers does not", - "Model requires custom code or newer model support.", - [{"label": "retry with --trust-remote-code", "op": "append", "arg": "--trust-remote-code"}], - ), - ( - r"Either a revision or a version must be specified|transformers\.integrations\.hub_kernels|kernels/layer", - "vLLM/Transformers kernel package mismatch.", - [{"label": "update vLLM, Transformers, and kernels on this server", "op": "dependency", "package": "vllm transformers kernels"}], - ), - ( - r"Address already in use|bind.*address.*in use", - "Port is already in use.", - [{"label": "retry on port 8001", "op": "replace", "flag": "--port", "value": "8001"}], - ), - ( - r"No CUDA GPUs are available|no GPU.*found|CUDA_VISIBLE_DEVICES.*invalid", - "No GPUs are visible to the serve process.", - [{"label": "clear Cookbook GPU selection or choose available GPUs", "op": "settings", "field": "gpus", "value": ""}], - ), - ( - r"vllm.*command not found|No module named vllm|ERROR: vLLM is not installed", - "vLLM is not installed or not in PATH on this server.", - [{"label": "install vLLM in Cookbook Dependencies", "op": "dependency", "package": "vllm"}], - ), - ( - r"sglang.*command not found|No module named sglang|SGLang is not installed", - "SGLang is not installed or not in PATH on this server.", - [{"label": "install SGLang in Cookbook Dependencies", "op": "dependency", "package": "sglang[all]"}], - ), - ( - r"llama-server.*command not found|llama\.cpp.*not found|No module named.*llama_cpp|No module named 'starlette_context'|git: command not found|cmake: command not found", - "llama.cpp / llama-cpp-python dependencies are missing.", - [{"label": "install llama.cpp dependencies or llama-cpp-python[server]", "op": "dependency", "package": "llama-cpp-python[server]"}], - ), - ( - r"No module named 'torch'|No module named torch|No module named 'diffusers'|No module named diffusers", - "Diffusion serving requires PyTorch and diffusers.", - [{"label": "install diffusers[torch] in Cookbook Dependencies", "op": "dependency", "package": "diffusers[torch]"}], - ), - ( - r"403 Forbidden|401 Unauthorized|Access to model.*is restricted|gated repo|not in the authorized list|awaiting a review", - "Model access is gated or unauthorized.", - [{"label": "set HF token and request model access on HuggingFace", "op": "manual"}], - ), - ] - for pattern, message, suggestions in patterns: - if re.search(pattern, tail, re.I): - return {"message": message, "suggestions": suggestions} - if re.search(r"Traceback \(most recent call last\)", tail, re.I) and not re.search( - r"Application startup complete|GET /v1/|Uvicorn running on", tail, re.I - ): - return { - "message": "Python traceback detected during serve startup.", - "suggestions": [{"label": "inspect traceback and retry with adjusted backend/settings", "op": "manual"}], - } - return None - def _state_for_client(state): """Return cookbook state without raw secrets for browser clients.""" _strip_task_secrets(state) @@ -278,14 +183,6 @@ def setup_cookbook_routes() -> APIRouter: safe_chmod(key_path.with_suffix(".pub"), 0o644) return {"ok": True, "public_key": _read_cookbook_public_key()} - def _user_shell_path_bootstrap() -> list[str]: - return [ - 'ODYSSEUS_USER_SHELL="${SHELL:-}"', - 'if [ -n "$ODYSSEUS_USER_SHELL" ] && [ -x "$ODYSSEUS_USER_SHELL" ]; then', - ' ODYSSEUS_USER_PATH="$("$ODYSSEUS_USER_SHELL" -ic \'printf "__ODYSSEUS_PATH__%s\\n" "$PATH"\' 2>/dev/null | sed -n \'s/^__ODYSSEUS_PATH__//p\' | tail -n 1 || true)"', - ' if [ -n "$ODYSSEUS_USER_PATH" ]; then export PATH="$ODYSSEUS_USER_PATH:$PATH"; fi', - 'fi', - ] def _needs_binary(cmd: str, binary: str) -> bool: return bool(re.search(rf"(^|[\s;&|()]){re.escape(binary)}($|[\s;&|()])", cmd or "")) @@ -347,8 +244,8 @@ def setup_cookbook_routes() -> APIRouter: # POSIX form + shell-quoting so drive paths / spaces survive. inner = TMUX_LOG_DIR / f"{session_id}_run.sh" inner.write_text("\n".join(bash_lines) + "\n", encoding="utf-8") - lp = shlex.quote(log_path.as_posix()) - ip = shlex.quote(inner.as_posix()) + lp = shlex.quote(git_bash_path(log_path)) + ip = shlex.quote(git_bash_path(inner)) script_path = TMUX_LOG_DIR / f"{session_id}.sh" script_path.write_text( f"bash {ip} > {lp} 2>&1\n", @@ -367,11 +264,15 @@ def setup_cookbook_routes() -> APIRouter: encoding="utf-8", ) argv = [os.environ.get("ComSpec", "cmd.exe"), "/c", str(script_path)] + env = os.environ.copy() + env["PYTHONUTF8"] = "1" + env["PYTHONIOENCODING"] = "utf-8" proc = subprocess.Popen( argv, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, stdin=subprocess.DEVNULL, + env=env, **detached_popen_kwargs(), ) pid_path.write_text(str(proc.pid), encoding="utf-8") @@ -423,20 +324,20 @@ def setup_cookbook_routes() -> APIRouter: lines.append('export PATH="$HOME/.local/bin:$PATH"') # When Odysseus runs from a venv (e.g. native macOS install), put its bin # on PATH so the tmux shell finds the bundled `hf`/`python3` without an - # activated venv. Local bash runs only — meaningless over SSH/Windows. - if not req.remote_host and req.platform != "windows": + # activated venv. Local bash runs only — meaningless over SSH. + if not req.remote_host: lines.append(_local_tooling_path_export(sys.executable)) # Best-effort install hf CLI (always). hf_transfer (Rust parallel downloader) # is fast but flaky on large files — it tends to crash near the end at high # throughput. Retries set disable_hf_transfer to fall back to the plain, # slower-but-reliable downloader (resumes cleanly from the .incomplete files). # Use `python3 -m pip` not `pip` — macOS has no bare `pip` command. - lines.append("command -v hf >/dev/null 2>&1 || python3 -m pip install --user --break-system-packages -q -U huggingface_hub 2>/dev/null || python3 -m pip install -q -U huggingface_hub 2>/dev/null") + lines.append(f"command -v hf >/dev/null 2>&1 || {_pip_install_fallback_chain('huggingface_hub', upgrade=True)}") if req.disable_hf_transfer: lines.append("export HF_HUB_ENABLE_HF_TRANSFER=0") lines.append("export HF_HUB_DOWNLOAD_MAX_WORKERS=4") else: - lines.append("python3 -c 'import hf_transfer' 2>/dev/null || python3 -m pip install --user --break-system-packages -q hf_transfer 2>/dev/null || python3 -m pip install -q hf_transfer 2>/dev/null") + lines.append(f"python3 -c 'import hf_transfer' 2>/dev/null || {_pip_install_fallback_chain('hf_transfer')}") lines.append("python3 -c 'import hf_transfer' 2>/dev/null && export HF_HUB_ENABLE_HF_TRANSFER=1") lines.append("export HF_HUB_DOWNLOAD_MAX_WORKERS=8") @@ -460,6 +361,8 @@ def setup_cookbook_routes() -> APIRouter: ps_lines = [] ps_lines.append('$sessionDir = "$env:TEMP\\odysseus-sessions"') ps_lines.append('New-Item -ItemType Directory -Force -Path $sessionDir | Out-Null') + ps_lines.append('$env:PYTHONIOENCODING = "utf-8"') + ps_lines.append('$env:PYTHONUTF8 = "1"') if req.hf_token: ps_lines.append(f"$env:HF_TOKEN = '{_ps_squote(req.hf_token)}'") if req.env_prefix: @@ -530,12 +433,18 @@ def setup_cookbook_routes() -> APIRouter: ) # Ensure pip-user scripts (e.g. hf CLI installed via --user) are on PATH runner_lines.append('export PATH="$HOME/.local/bin:$PATH"') - # Install hf CLI + hf_transfer best-effort so future runs get the fast path. - # Use --break-system-packages on PEP-668 systems (Arch, newer Debian) so it doesn't bail. - runner_lines.append("command -v hf >/dev/null 2>&1 || pip install --user --break-system-packages -q -U huggingface_hub 2>/dev/null || pip install -q -U huggingface_hub 2>/dev/null") - runner_lines.append("python3 -c 'import hf_transfer' 2>/dev/null || pip install --user --break-system-packages -q hf_transfer 2>/dev/null || pip install -q hf_transfer 2>/dev/null") - runner_lines.append("python3 -c 'import hf_transfer' 2>/dev/null && export HF_HUB_ENABLE_HF_TRANSFER=1") - runner_lines.append("export HF_HUB_DOWNLOAD_MAX_WORKERS=8") + # Install hf CLI + optional hf_transfer best-effort. Retries disable + # hf_transfer because the Rust parallel path is fast but has been + # flaky near the end of very large multi-file downloads. + # The helper tries active pip first, then guarded user-site fallbacks. + runner_lines.append(f"command -v hf >/dev/null 2>&1 || {_pip_install_fallback_chain('huggingface_hub', python_cmd='pip', upgrade=True)}") + if req.disable_hf_transfer: + runner_lines.append("export HF_HUB_ENABLE_HF_TRANSFER=0") + runner_lines.append("export HF_HUB_DOWNLOAD_MAX_WORKERS=4") + else: + runner_lines.append(f"python3 -c 'import hf_transfer' 2>/dev/null || {_pip_install_fallback_chain('hf_transfer', python_cmd='pip')}") + runner_lines.append("python3 -c 'import hf_transfer' 2>/dev/null && export HF_HUB_ENABLE_HF_TRANSFER=1") + runner_lines.append("export HF_HUB_DOWNLOAD_MAX_WORKERS=8") # Surface whether the HF token actually reached THIS server, so a gated # download's "not authorized" failure can be told apart from a missing # token (the token is masked — we only print applied / not-set). @@ -546,15 +455,19 @@ def setup_cookbook_routes() -> APIRouter: runner_lines.append(f' {hf_cmd} < /dev/null') runner_lines.append('elif python3 -c "import huggingface_hub" 2>/dev/null; then') runner_lines.append(' echo "hf CLI not found, using Python huggingface_hub..."') - runner_lines.append(f' python3 -c "import os; from huggingface_hub import snapshot_download; snapshot_download(\'{req.repo_id}\'{_dl_pyarg}, max_workers=8)"') + runner_lines.append(f' python3 -c "import os; from huggingface_hub import snapshot_download; snapshot_download(\'{req.repo_id}\'{_dl_pyarg}, max_workers={4 if req.disable_hf_transfer else 8})"') runner_lines.append('else') runner_lines.append(' echo "Installing huggingface-hub and dependencies..."') runner_lines.append(' pip install --no-deps -q huggingface-hub 2>/dev/null') - runner_lines.append(' pip install -q filelock fsspec packaging pyyaml tqdm typer httpx requests hf_transfer 2>/dev/null') - runner_lines.append(" python3 -c 'import hf_transfer' 2>/dev/null && export HF_HUB_ENABLE_HF_TRANSFER=1") - runner_lines.append(f' python3 -c "import os; from huggingface_hub import snapshot_download; snapshot_download(\'{req.repo_id}\'{_dl_pyarg}, max_workers=8)"') + if req.disable_hf_transfer: + runner_lines.append(' pip install -q filelock fsspec packaging pyyaml tqdm typer httpx requests 2>/dev/null') + runner_lines.append(' export HF_HUB_ENABLE_HF_TRANSFER=0') + else: + runner_lines.append(' pip install -q filelock fsspec packaging pyyaml tqdm typer httpx requests hf_transfer 2>/dev/null') + runner_lines.append(" python3 -c 'import hf_transfer' 2>/dev/null && export HF_HUB_ENABLE_HF_TRANSFER=1") + runner_lines.append(f' python3 -c "import os; from huggingface_hub import snapshot_download; snapshot_download(\'{req.repo_id}\'{_dl_pyarg}, max_workers={4 if req.disable_hf_transfer else 8})"') runner_lines.append('fi') - runner_lines.append('if [ $? -eq 0 ]; then echo ""; echo "DOWNLOAD_OK"; else echo ""; echo "DOWNLOAD_FAILED (exit $?)"; fi') + runner_lines.append('_ec=$?; if [ $_ec -eq 0 ]; then echo ""; echo "DOWNLOAD_OK"; else echo ""; echo "DOWNLOAD_FAILED (exit $_ec)"; fi') runner_lines.append(f"rm -f {remote_runner}") runner_lines.append('exec "${SHELL:-/bin/bash}"') runner_path = TMUX_LOG_DIR / f"{session_id}_run.sh" @@ -585,11 +498,11 @@ def setup_cookbook_routes() -> APIRouter: # Detached path: no controlling TTY, so skip `< /dev/null` # (handled by Popen stdin=DEVNULL) and don't keep a shell open. lines.append(hf_cmd) - lines.append('if [ $? -eq 0 ]; then echo ""; echo "DOWNLOAD_OK"; else echo ""; echo "DOWNLOAD_FAILED (exit $?)"; fi') + lines.append('_ec=$?; if [ $_ec -eq 0 ]; then echo ""; echo "DOWNLOAD_OK"; else echo ""; echo "DOWNLOAD_FAILED (exit $_ec)"; fi') else: # < /dev/null suppresses interactive "update available? [Y/n]" prompt lines.append(f"{hf_cmd} < /dev/null") - lines.append('if [ $? -eq 0 ]; then echo ""; echo "DOWNLOAD_OK"; else echo ""; echo "DOWNLOAD_FAILED (exit $?)"; fi') + lines.append('_ec=$?; if [ $_ec -eq 0 ]; then echo ""; echo "DOWNLOAD_OK"; else echo ""; echo "DOWNLOAD_FAILED (exit $_ec)"; fi') lines.append(f"rm -f '{wrapper_script}'") lines.append('exec "${SHELL:-/bin/bash}"') wrapper_script.write_text("\n".join(lines) + "\n", encoding="utf-8") @@ -646,107 +559,50 @@ def setup_cookbook_routes() -> APIRouter: raise HTTPException(400, "Invalid ssh_port") TMUX_LOG_DIR.mkdir(parents=True, exist_ok=True) - paths_code = "import json, os\n" - paths_code += "models = []\n" - paths_code += "seen = set()\n" - paths_code += "BLOCKED_ROOTS = ('/sys', '/proc', '/dev', '/run', '/var/run')\n" - paths_code += "def safe_path(p):\n" - paths_code += " try:\n" - paths_code += " rp = os.path.realpath(os.path.expanduser(p))\n" - paths_code += " return not any(rp == b or rp.startswith(b + os.sep) for b in BLOCKED_ROOTS)\n" - paths_code += " except Exception:\n" - paths_code += " return False\n" - paths_code += "def safe_walk(top):\n" - paths_code += " if not safe_path(top): return\n" - paths_code += " for root, dirs, fns in os.walk(top, followlinks=False):\n" - paths_code += " dirs[:] = [d for d in dirs if not os.path.islink(os.path.join(root, d)) and safe_path(os.path.join(root, d))]\n" - paths_code += " yield root, dirs, fns\n" - # Scan HF cache format (models-- directories with blobs/) - paths_code += "def scan_hf(cache):\n" - paths_code += " if not os.path.isdir(cache): return\n" - paths_code += " for d in sorted(os.listdir(cache)):\n" - paths_code += " if not d.startswith('models--'): continue\n" - paths_code += " rid = d.replace('models--','').replace('--','/')\n" - paths_code += " if rid in seen: continue\n" - paths_code += " seen.add(rid)\n" - paths_code += " blobs = os.path.join(cache, d, 'blobs')\n" - paths_code += " sz, nf, ic = 0, 0, False\n" - paths_code += " if os.path.isdir(blobs):\n" - paths_code += " for f in os.scandir(blobs):\n" - paths_code += " if f.is_file(): nf += 1; sz += f.stat().st_size\n" - paths_code += " if f.name.endswith('.incomplete'): ic = True\n" - paths_code += " # Check if it's an LLM (has config.json with model_type) vs diffusion (has model_index.json)\n" - paths_code += " snap = os.path.join(cache, d, 'snapshots')\n" - paths_code += " is_diffusion = False; is_gguf = False\n" - paths_code += " if os.path.isdir(snap):\n" - paths_code += " for sd in os.listdir(snap):\n" - paths_code += " sf = os.path.join(snap, sd)\n" - paths_code += " if not os.path.isdir(sf): continue\n" - paths_code += " if os.path.exists(os.path.join(sf, 'model_index.json')): is_diffusion = True\n" - paths_code += " try:\n" - paths_code += " if any(x.endswith('.gguf') for x in os.listdir(sf)): is_gguf = True\n" - paths_code += " except Exception: pass\n" - paths_code += " models.append({'repo_id':rid,'size_bytes':sz,'nb_files':nf,'has_incomplete':ic,'path':cache,'is_diffusion':is_diffusion,'is_gguf':is_gguf})\n" - # Scan plain directory (each subdirectory = a model if it has model files) - paths_code += "def scan_dir(p):\n" - paths_code += " if not os.path.isdir(p) or not safe_path(p): return\n" - paths_code += " for d in sorted(os.listdir(p)):\n" - paths_code += " if d.startswith('.'): continue\n" - paths_code += " fp = os.path.join(p, d)\n" - paths_code += " if not os.path.isdir(fp) or os.path.islink(fp) or not safe_path(fp): continue\n" - paths_code += " if d in seen: continue\n" - paths_code += " # Check if it looks like a model (has config.json, safetensors, bin, or gguf)\n" - paths_code += " is_model = False; is_gguf = False\n" - paths_code += " for root, dirs, fns in safe_walk(fp):\n" - paths_code += " for fn in fns:\n" - paths_code += " if fn.endswith('.gguf'): is_gguf = True; is_model = True\n" - paths_code += " elif fn == 'config.json' or fn.endswith('.safetensors') or fn.endswith('.bin'): is_model = True\n" - paths_code += " if is_model: break\n" - paths_code += " if not is_model: continue\n" - paths_code += " seen.add(d)\n" - paths_code += " sz, nf = 0, 0\n" - paths_code += " for dp, _, fns in safe_walk(fp):\n" - paths_code += " for fn in fns:\n" - paths_code += " try: nf += 1; sz += os.path.getsize(os.path.join(dp, fn))\n" - paths_code += " except Exception: pass\n" - paths_code += " is_diff = os.path.exists(os.path.join(fp, 'model_index.json'))\n" - paths_code += " models.append({'repo_id':d,'size_bytes':sz,'nb_files':nf,'has_incomplete':False,'path':p,'is_local_dir':True,'is_diffusion':is_diff,'is_gguf':is_gguf})\n" - # Always scan HF cache - paths_code += "scan_hf(os.path.expanduser('~/.cache/huggingface/hub'))\n" - # Also scan custom model dirs (comma-separated) if specified + model_dirs = [] if model_dir: for d in model_dir.split(','): d = d.strip() - if d and d != '~/.cache/huggingface/hub': - # repr() encodes the dir as a properly-escaped Python string - # literal. The old f"...'{d}'..." broke out of the quotes on - # any `'` in the value, injecting arbitrary Python that then - # ran locally or over ssh. - paths_code += f"scan_dir(os.path.expanduser({d!r}))\n" - paths_code += "print(json.dumps(models))\n" + if d: + translated_d = translate_path(d) if not host else d + model_dirs.append(translated_d) + win_hf_hub = None + if not host: + win_profile = get_wsl_windows_user_profile() + win_hf_hub = os.path.join(win_profile, ".cache", "huggingface", "hub") if win_profile else None + + paths_code = _cached_model_scan_script(model_dirs, win_hf_hub) scan_py = TMUX_LOG_DIR / "scan_cache.py" scan_py.write_text(paths_code, encoding="utf-8") + scan_payload = scan_py.read_bytes() if host: - _pf = f"-p {ssh_port} " if ssh_port and ssh_port != "22" else "" if platform == "windows": - # Windows: use 'python' and pipe via stdin with double-quote wrapping - cmd = f'ssh {_pf}{host} "python -" < \'{scan_py}\'' + remote_cmd = "python -" else: - cmd = f"ssh {_pf}{host} 'python3 -' < '{scan_py}'" - proc = await asyncio.create_subprocess_shell( - cmd, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - cwd=str(Path.home()), + # POSIX: use 'python3' if available, fall back to 'python'; throw if neither is found. + remote_cmd = ( + "if command -v python3 >/dev/null 2>&1; then python3 -; " + "elif command -v python >/dev/null 2>&1; then python -; " + "else echo \"python3/python not found\" >&2; exit 127; fi" + ) + rc, stdout_b, stderr_b = await run_ssh_command_async( + host, + ssh_port, + remote_cmd, + timeout=60, + stdin_data=scan_payload, ) else: - # LOCAL scan: run the interpreter directly. `python3` isn't a thing on - # Windows (it's `python`/`py`), and shell single-quoting of the path - # doesn't survive cmd.exe — so resolve the interpreter and exec it - # with the script path as an argv element (no shell quoting needed). - local_py = ( + # LOCAL scan: use sys.executable (the venv Python Odysseus is already + # running under) — it's guaranteed real Python on all platforms. + # Falling back to which_tool on Windows risks hitting the Microsoft + # Store stub alias for "python3"/"python", which prints + # "Python was not found; run without arguments to install from the + # Microsoft Store" and exits 9009, producing empty stdout and a + # JSON parse error. sys.executable bypasses PATH entirely. + local_py = sys.executable or ( which_tool("python3") or which_tool("python") or which_tool("py") or "python" ) @@ -756,7 +612,7 @@ def setup_cookbook_routes() -> APIRouter: stderr=asyncio.subprocess.PIPE, cwd=str(Path.home()), ) - stdout_b, stderr_b = await asyncio.wait_for(proc.communicate(), timeout=60) + stdout_b, stderr_b = await asyncio.wait_for(proc.communicate(), timeout=60) models = [] try: @@ -778,6 +634,14 @@ def setup_cookbook_routes() -> APIRouter: } if m.get("is_local_dir"): entry["is_local_dir"] = True + if m.get("is_gguf"): + entry["is_gguf"] = True + if m.get("backend"): + entry["backend"] = m.get("backend") + if m.get("is_ollama"): + entry["is_ollama"] = True + if isinstance(m.get("gguf_files"), list): + entry["gguf_files"] = m["gguf_files"] models.append(entry) except Exception as e: logger.warning(f"Failed to parse cached models: {e}") @@ -839,14 +703,159 @@ def setup_cookbook_routes() -> APIRouter: finally: db.close() + def _pick_free_port_for_ollama( + remote: str | None, ssh_port: str | None, start_port: int, max_offset: int + ) -> int | None: + """Return the first free port in [start_port, start_port+max_offset] on + the target host. Used to pick a real bind for `ollama serve` so we + don't reattach to an external systemd ollama (or other listener) the + Cookbook Stop button can't kill.""" + import socket + if remote: + # Probe over SSH. Bash's /dev/tcp gives a portable "is anything + # listening" check without requiring ss/netstat/nmap. + ssh_base = ["ssh", "-o", "ConnectTimeout=4", "-o", "StrictHostKeyChecking=no"] + if ssh_port and str(ssh_port) != "22": + if not _SSH_PORT_RE.match(str(ssh_port)): + return None + ssh_base.extend(["-p", str(ssh_port)]) + host_arg = remote + if not _REMOTE_HOST_RE.match(host_arg): + return None + probe_ports = " ".join(str(start_port + i) for i in range(max_offset + 1)) + script = ( + f"for p in {probe_ports}; do " + "if ! (exec 3<>/dev/tcp/127.0.0.1/$p) 2>/dev/null; then " + "echo $p; exit 0; fi; exec 3<&-; exec 3>&-; done; exit 1" + ) + try: + import subprocess + r = subprocess.run( + ssh_base + [host_arg, script], + capture_output=True, text=True, timeout=8, + ) + if r.returncode == 0: + out = (r.stdout or "").strip().splitlines() + if out and out[0].isdigit(): + return int(out[0]) + except Exception: + return None + return None + # Local: just try to connect. + for off in range(max_offset + 1): + p = start_port + off + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.settimeout(0.25) + try: + s.connect(("127.0.0.1", p)) + except (ConnectionRefusedError, socket.timeout, OSError): + return p + return None + + def _auto_register_llm_endpoint(req: ServeRequest, remote: str | None) -> str | None: + """Register a freshly-served LLM as a model endpoint so it appears in the + model picker without a manual /setup step — the text-model sibling of + _auto_register_image_endpoint. + + Cookbook serve commands launch an OpenAI-compatible server (llama.cpp's + llama-server, vLLM, SGLang, or Ollama) on a known port. We point an + endpoint at that server's /v1; the picker auto-discovers the model id by + probing /v1/models and dims the endpoint until the server is reachable, + so registering immediately (before the server finishes loading) is safe. + """ + import re + from core.database import SessionLocal, ModelEndpoint + + # Port: ordered fallbacks so we match whatever the user actually + # asked for, not a hardcoded default: + # 1. explicit `--port N` (vllm / sglang / llama-server) + # 2. `OLLAMA_HOST=host:port` (the way Ollama specifies its bind) + # 3. fallback by backend (11434 ollama / 8080 llama.cpp) + # Previously the OLLAMA_HOST form was silently ignored and we + # registered every Ollama endpoint at 11434 — even if the user + # set OLLAMA_HOST=0.0.0.0:11435 to avoid colliding with an + # existing systemd Ollama, the registered endpoint pointed at + # the OLD port and showed as offline. + port_match = re.search(r'--port\s+(\d+)', req.cmd) + ollama_host_match = re.search(r'OLLAMA_HOST=[^\s]*?:(\d+)', req.cmd) + if port_match: + port = int(port_match.group(1)) + elif ollama_host_match: + port = int(ollama_host_match.group(1)) + elif "ollama" in req.cmd: + port = 11434 + else: + port = 8080 # llama.cpp's llama-server default — the Apple Silicon path + + # Determine host (mirrors the image path: SSH alias for remote serves). + # For local serves while Odysseus runs inside Docker, "localhost" + # resolves to the container itself — useless. Use host.docker.internal + # which compose maps to the actual host, matching what /setup adds + # for Ollama by hand. + if remote: + host = remote.split("@")[-1] if "@" in remote else remote + else: + from routes.model_routes import _docker_host_gateway_reachable + host = "host.docker.internal" if _docker_host_gateway_reachable() else "localhost" + + base_url = f"http://{host}:{port}/v1" + + short_name = req.repo_id.split("/")[-1] if "/" in req.repo_id else req.repo_id + display_name = short_name or "Local model" + + # If the serve command opts models into OpenAI tool-calling, record it so + # agent_loop trusts emitted tool_calls instead of the name heuristic. + supports_tools = True if "--enable-auto-tool-choice" in req.cmd else None + + db = SessionLocal() + try: + # Reuse an endpoint already pointed at this URL instead of duplicating. + existing = db.query(ModelEndpoint).filter(ModelEndpoint.base_url == base_url).first() + if existing: + existing.is_enabled = True + existing.model_type = "llm" + existing.name = display_name + if supports_tools is not None: + existing.supports_tools = supports_tools + # Wipe stale model lists so the picker re-probes and discovers + # the newly-served model instead of showing the old one. + existing.cached_models = None + existing.hidden_models = None + db.commit() + logger.info(f"Updated existing local model endpoint: {base_url}") + return existing.id + + ep_id = f"local-{uuid.uuid4().hex[:8]}" + ep = ModelEndpoint( + id=ep_id, + name=display_name, + base_url=base_url, + api_key=None, + is_enabled=True, + model_type="llm", + supports_tools=supports_tools, + ) + db.add(ep) + db.commit() + logger.info(f"Auto-registered local model endpoint: {display_name} @ {base_url}") + return ep_id + except Exception as e: + logger.error(f"Failed to auto-register local model endpoint: {e}") + db.rollback() + return None + finally: + db.close() + @router.post("/api/model/serve") async def model_serve(request: Request, req: ServeRequest): """Launch a model server in a tmux session (or PowerShell background process on Windows). `repo_id` is dual-purpose: a HuggingFace repo (`/`) for - model-serve commands, OR a bare pip package name when the cmd is a - `python -m pip install …`. We only enforce the strict HF format on - the model paths. + model-serve commands, a cached local-model id (the folder name reported + by `/api/model/cached`) for models scanned from a custom model dir, OR a + bare pip package name when the cmd is a `python -m pip install …`. We + keep strict validation, but serving local cached models must not require + a fake org/name wrapper. """ require_admin(request) # Defence-in-depth: reject values that could break out of shell contexts. @@ -862,8 +871,33 @@ def setup_cookbook_routes() -> APIRouter: # many downstream `"engine" in req.cmd` membership checks can't hit # `TypeError: argument of type 'NoneType'` (a 500 instead of a clean 400). req.cmd = _validate_serve_cmd(req.cmd) or "" + req.cmd = _venv_safe_local_pip_install_cmd( + req.cmd, + local=not bool(req.remote_host), + in_venv=sys.prefix != sys.base_prefix, + ) is_pip_install = bool(req.cmd and "pip install" in req.cmd) + remote = req.remote_host + is_windows = req.platform == "windows" + local_windows = IS_WINDOWS and not remote + if is_windows or local_windows: + if req.cmd.startswith("python3 "): + req.cmd = "python " + req.cmd[len("python3 "):] + if is_pip_install and ("llama-cpp-python" in req.cmd or "llama_cpp" in req.cmd) and (is_windows or local_windows): + if "--extra-index-url" not in req.cmd: + req.cmd += " --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu" + if is_pip_install: + # Keep big dependency wheel builds (vLLM, …) off the home filesystem's + # pip cache so they don't fail mid-build with "No space left" (#1219) + # and leave the dep installed-but-unusable (#1459). + req.cmd = _pip_install_no_cache(req.cmd) + # Accept common aliases and enforce server extras for llama-cpp so + # `python -m llama_cpp.server` has all runtime dependencies. + req.cmd = re.sub(r"(?=!~,` for version specifiers. # v2 review HIGH-14: tightened from the previous regex which @@ -875,11 +909,24 @@ def setup_cookbook_routes() -> APIRouter: ): raise HTTPException(400, "Invalid pip package name") else: - _validate_repo_id(req.repo_id) + _validate_serve_model_id(req.repo_id) TMUX_LOG_DIR.mkdir(parents=True, exist_ok=True) session_id = f"serve-{uuid.uuid4().hex[:8]}" remote = req.remote_host is_windows = req.platform == "windows" + + # Ollama: if the user didn't pin a port, resolve the actual port we'll + # bind to here (before runner construction) by probing the target host. + # Otherwise the runner script picks one at runtime and `_auto_register` + # below still registers the stale 11434 default — which on a host with + # a systemd ollama lands on the wrong (unreachable-from-docker) service. + if "ollama" in req.cmd and "OLLAMA_HOST=" not in req.cmd: + _ollama_bind_host = "0.0.0.0" if remote else "127.0.0.1" + _ollama_chosen_port = _pick_free_port_for_ollama( + remote, req.ssh_port, start_port=11434, max_offset=10, + ) + if _ollama_chosen_port: + req.cmd = f"OLLAMA_HOST={_ollama_bind_host}:{_ollama_chosen_port} {req.cmd}" # LOCAL execution on a native-Windows host never uses tmux (detached # process path below), regardless of the UI-supplied platform. local_windows = IS_WINDOWS and not remote @@ -903,6 +950,8 @@ def setup_cookbook_routes() -> APIRouter: ps_lines = [] ps_lines.append('$sessionDir = "$env:TEMP\\odysseus-sessions"') ps_lines.append('New-Item -ItemType Directory -Force -Path $sessionDir | Out-Null') + ps_lines.append('$env:PYTHONIOENCODING = "utf-8"') + ps_lines.append('$env:PYTHONUTF8 = "1"') if req.hf_token: ps_lines.append(f"$env:HF_TOKEN = '{_ps_squote(req.hf_token)}'") if req.gpus: @@ -921,12 +970,14 @@ def setup_cookbook_routes() -> APIRouter: ps_lines.append('try { python -c "import llama_cpp" 2>$null } catch {}') ps_lines.append('if ($LASTEXITCODE -ne 0) {') ps_lines.append(' Write-Host "Installing llama-cpp-python..."') - ps_lines.append(' python -m pip install llama-cpp-python[server]') + ps_lines.append(' python -m pip install llama-cpp-python[server] --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu') ps_lines.append('}') elif "vllm" in req.cmd: ps_lines.append('Write-Host "ERROR: vLLM is not supported on Windows. Use Ollama or llama.cpp instead."') ps_lines.append('exit 1') ps_lines.append(req.cmd) + if is_pip_install: + ps_lines.append('if ($LASTEXITCODE -eq 0) { Write-Host ""; Write-Host "DOWNLOAD_OK" }') ps_lines.append('Write-Host ""') ps_lines.append('Write-Host "=== Process exited with code $LASTEXITCODE ==="') runner_path = TMUX_LOG_DIR / f"{session_id}_run.ps1" @@ -949,7 +1000,23 @@ def setup_cookbook_routes() -> APIRouter: else: # ── Linux/Termux: bash + tmux (existing flow) ── runner_lines = ["#!/bin/bash"] + # Mirror every line of stdout+stderr into a persistent log file + # on the host running the serve. This is the file tail_serve_output + # reads when the tmux pane has been overwritten by the post-crash + # bash prompt — without it, the agent's diagnostic tool sees the + # neofetch banner instead of the actual Python traceback. + # We save the original fds to 3/4 so we can RESTORE them before + # `exec ${SHELL}` at the end of the script. Without that restore, + # the post-crash interactive shell's neofetch banner ALSO gets + # teed into the log file and `tail -N` returns ONLY the banner — + # the actual traceback ends up earlier than the tail window. + runner_lines.append("mkdir -p /tmp/odysseus-tmux 2>/dev/null || true") + runner_lines.append("exec 3>&1 4>&2") + runner_lines.append( + f"exec > >(tee -a /tmp/odysseus-tmux/{session_id}.log) 2>&1" + ) runner_lines.extend(_user_shell_path_bootstrap()) + runner_lines.append('ODYSSEUS_PREFLIGHT_EXIT=""') # Put Odysseus's own venv bin on PATH (local runs only) so the serve # shell resolves the bundled python3/hf, mirroring the download flow. if not remote: @@ -966,6 +1033,7 @@ def setup_cookbook_routes() -> APIRouter: # Show whether the HF token reached this server (masked) — a gated # model vLLM has to download will be denied without it. runner_lines.append(_HF_TOKEN_STATUS_SNIPPET) + handled_ollama_serve = False # Auto-install inference engine if missing if "llama_cpp" in req.cmd or "llama-server" in req.cmd: # Prefer the NATIVE llama-server binary — its minja templating @@ -977,90 +1045,158 @@ def setup_cookbook_routes() -> APIRouter: # ollama is found (otherwise macOS falls back to a slow source build). # /opt/homebrew = Apple Silicon, /usr/local = Intel; harmless on Linux. runner_lines.append('export PATH="$HOME/.local/bin:$HOME/bin:$HOME/llama.cpp/build/bin:/opt/homebrew/bin:/usr/local/bin:$PATH"') - runner_lines.append('if [ -d /data/data/com.termux ]; then') - runner_lines.append(' # Termux: no native build — use the Python bindings (CPU).') - runner_lines.append(' if ! python3 -c "import llama_cpp" 2>/dev/null; then') - runner_lines.append(' pkg install -y cmake 2>/dev/null') - runner_lines.append(' pip install numpy diskcache jinja2 2>/dev/null') - runner_lines.append(' CMAKE_ARGS="-DGGML_BLAS=OFF -DGGML_LLAMAFILE=OFF" pip install llama-cpp-python --no-build-isolation --no-cache-dir 2>&1 || true') - runner_lines.append(' fi') - runner_lines.append('elif ! command -v llama-server &>/dev/null; then') - runner_lines.append(' echo "Native llama-server not found — building from source (one-time, may take a few minutes)..."') - runner_lines.append(' mkdir -p ~/bin') - runner_lines.append(' cd ~ && [ -d llama.cpp ] || git clone --depth 1 https://github.com/ggml-org/llama.cpp') - # Build with the right accelerator: Metal on macOS (llama.cpp - # enables it automatically, no flag), CUDA on Linux when present, - # else a plain CPU build. nproc is Linux-only — fall back to - # `sysctl hw.ncpu` on macOS. (Tip: `brew install llama.cpp` ships - # a prebuilt llama-server and skips this whole source build.) - runner_lines.append(' NPROC="$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 4)"') - runner_lines.append(' if [ "$(uname -s)" = "Darwin" ]; then') - runner_lines.append(' command -v cmake >/dev/null 2>&1 || echo "WARNING: cmake not found — install it with: brew install cmake (or: brew install llama.cpp for a prebuilt llama-server)."') - # Start from a clean cache: a prior failed configure (e.g. a CUDA - # attempt) poisons build/CMakeCache.txt, so a plain `cmake -B build` - # would reuse the bad settings and fail again. CMAKE_BUILD_TYPE is - # explicit so the binary is optimized (Metal auto-enables on macOS). - runner_lines.append(' cd ~/llama.cpp && rm -rf build && cmake -B build -DCMAKE_BUILD_TYPE=Release \\') - runner_lines.append(' && cmake --build build -j"$NPROC" --target llama-server \\') - runner_lines.append(' && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server') - runner_lines.append(' else') - runner_lines.append(' cd ~/llama.cpp && { cmake -B build -DGGML_CUDA=ON 2>/dev/null || cmake -B build; } \\') - runner_lines.append(' && cmake --build build -j"$NPROC" --target llama-server \\') - runner_lines.append(' && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server') - runner_lines.append(' fi') - runner_lines.append(' # If the native build failed, fall back to the Python bindings.') - runner_lines.append(' if ! command -v llama-server &>/dev/null && ! python3 -c "import llama_cpp" 2>/dev/null; then') - runner_lines.append(' echo "llama-server build failed — installing Python bindings as fallback..."') - runner_lines.append(' pip install --user --break-system-packages -q llama-cpp-python 2>/dev/null || pip install -q llama-cpp-python 2>/dev/null || true') - runner_lines.append(' fi') - runner_lines.append('fi') + if local_windows: + # LOCAL Windows: no native source compilation (no cmake/compiler on Git Bash). + # Just check python bindings (using native `python` binary) and fall back to pip install. + runner_lines.append('if ! command -v llama-server &>/dev/null && ! python -c "import llama_cpp" 2>/dev/null; then') + runner_lines.append(' echo "llama-server not found — installing Python bindings..."') + runner_lines.append(f" {_pip_install_fallback_chain('llama-cpp-python[server]', python_cmd='python')} || true") + runner_lines.append('fi') + runner_lines.append('if ! command -v llama-server &>/dev/null && ! python -c "import llama_cpp" 2>/dev/null; then') + runner_lines.append(' echo "ERROR: llama.cpp serving is not available after install attempts."') + runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127') + runner_lines.append('fi') + else: + runner_lines.append('if [ -d /data/data/com.termux ]; then') + runner_lines.append(' # Termux: no native build — use the Python bindings (CPU).') + runner_lines.append(' if ! python3 -c "import llama_cpp" 2>/dev/null; then') + runner_lines.append(' pkg install -y cmake 2>/dev/null') + runner_lines.append(' pip install numpy diskcache jinja2 2>/dev/null') + runner_lines.append(' CMAKE_ARGS="-DGGML_BLAS=OFF -DGGML_LLAMAFILE=OFF" pip install \'llama-cpp-python[server]\' --no-build-isolation --no-cache-dir 2>&1 || true') + runner_lines.append(' fi') + runner_lines.append('elif ! command -v llama-server &>/dev/null; then') + runner_lines.append(' echo "Native llama-server not found — building from source (one-time, may take a few minutes)..."') + runner_lines.append(' mkdir -p ~/bin') + runner_lines.append(' cd ~ && [ -d llama.cpp ] || git clone --depth 1 https://github.com/ggml-org/llama.cpp') + # Build with the right accelerator: Metal on macOS (llama.cpp + # enables it automatically, no flag), CUDA on Linux when present, + # else a plain CPU build. nproc is Linux-only — fall back to + # `sysctl hw.ncpu` on macOS. (Tip: `brew install llama.cpp` ships + # a prebuilt llama-server and skips this whole source build.) + runner_lines.append(' NPROC="$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 4)"') + runner_lines.append(' if [ "$(uname -s)" = "Darwin" ]; then') + runner_lines.append(' command -v cmake >/dev/null 2>&1 || echo "WARNING: cmake not found — install it with: brew install cmake (or: brew install llama.cpp for a prebuilt llama-server)."') + # Start from a clean cache: a prior failed configure (e.g. a CUDA + # attempt) poisons build/CMakeCache.txt, so a plain `cmake -B build` + # would reuse the bad settings and fail again. CMAKE_BUILD_TYPE is + # explicit so the binary is optimized (Metal auto-enables on macOS). + runner_lines.append(' cd ~/llama.cpp && rm -rf build && cmake -B build -DCMAKE_BUILD_TYPE=Release \\') + runner_lines.append(' && cmake --build build -j"$NPROC" --target llama-server \\') + runner_lines.append(' && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server') + runner_lines.append(' else') + _append_llama_cpp_linux_accel_build_lines(runner_lines) + runner_lines.append(' fi') + # If the native build failed, fall back to the Python bindings. + runner_lines.append(' if ! command -v llama-server &>/dev/null && ! python3 -c "import llama_cpp" 2>/dev/null; then') + runner_lines.append(' echo "llama-server build failed — installing Python bindings as fallback..."') + runner_lines.append(f" {_pip_install_fallback_chain('llama-cpp-python[server]', python_cmd='pip')} || true") + runner_lines.append(' fi') + runner_lines.append(' if ! command -v llama-server &>/dev/null && ! python3 -c "import llama_cpp" 2>/dev/null; then') + runner_lines.append(' echo "ERROR: llama.cpp serving is not available after install/build attempts."') + runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127') + runner_lines.append(' fi') + runner_lines.append('fi') elif "ollama" in req.cmd: - # Ollama manages its own model store and HTTP server. Just make - # sure the binary exists and the daemon is up before running the - # command (the natural serving engine on Apple Silicon / Metal). + handled_ollama_serve = True + _ollama_default_host = "0.0.0.0" if remote else "127.0.0.1" + _ollama_host, _ollama_port = _ollama_bind_from_cmd( + req.cmd, + default_host=_ollama_default_host, + ) + # Always launch a fresh ollama under tmux so Stop reliably + # kills it. If the requested port is busy (e.g. a systemd + # ollama on 11434), scan upward for a free one rather than + # silently reattaching to an external service that Stop + # can't reach. + runner_lines.append(f'ODYSSEUS_OLLAMA_HOST={_bash_squote(_ollama_host)}') + runner_lines.append(f'ODYSSEUS_OLLAMA_PORT="{_ollama_port}"') + runner_lines.append('for _ody_off in 0 1 2 3 4 5 6 7 8 9; do') + runner_lines.append(' _ody_try_port=$((ODYSSEUS_OLLAMA_PORT + _ody_off))') + runner_lines.append(' if ! (exec 3<>/dev/tcp/127.0.0.1/$_ody_try_port) 2>/dev/null; then') + runner_lines.append(' exec 3<&-; exec 3>&-') + runner_lines.append(' ODYSSEUS_OLLAMA_PORT="$_ody_try_port"') + runner_lines.append(' break') + runner_lines.append(' fi') + runner_lines.append(' echo "[odysseus] Ollama API ready on port ${ODYSSEUS_OLLAMA_PORT}: ${ODYSSEUS_OLLAMA_URL}"') + runner_lines.append(' echo "[odysseus] This task is monitoring an existing Ollama server; stopping it here will not stop an external Docker/system service."') + if local_windows: + # Windows detached process has no TTY; exec bash -i crashes. + # Keep the monitoring task alive with a sleep loop. + runner_lines.append(' while true; do sleep 60; done') + else: + runner_lines.append(' exec bash -i') + runner_lines.append('fi') runner_lines.append('if ! command -v ollama &>/dev/null; then') - runner_lines.append(' echo "ERROR: Ollama not found. Install it (macOS: brew install ollama, or https://ollama.com/download), then launch again."') - runner_lines.append(' exit 127') - runner_lines.append('fi') - runner_lines.append('if ! curl -sf http://localhost:11434/api/tags >/dev/null 2>&1; then') - runner_lines.append(' echo "Starting ollama server..."; (ollama serve >/dev/null 2>&1 &)') - runner_lines.append(' for _ in 1 2 3 4 5 6 7 8 9 10; do curl -sf http://localhost:11434/api/tags >/dev/null 2>&1 && break; sleep 1; done') + runner_lines.append(' echo "ERROR: Ollama not found on this server. Install it from https://ollama.com/download or `curl -fsSL https://ollama.com/install.sh | sh`."') + runner_lines.append(' echo') + runner_lines.append(' echo "=== Process exited with code 127 ==="') + if local_windows: + runner_lines.append(' exit 127') + else: + runner_lines.append(' exec bash -i') runner_lines.append('fi') + runner_lines.append('ODYSSEUS_OLLAMA_URL="http://${ODYSSEUS_OLLAMA_HOST}:${ODYSSEUS_OLLAMA_PORT}"') + if remote and _ollama_host in ("0.0.0.0", "::"): + runner_lines.append('echo "[odysseus] WARNING: remote Ollama will bind to ${ODYSSEUS_OLLAMA_HOST}:${ODYSSEUS_OLLAMA_PORT} so Odysseus can reach it from this host."') + runner_lines.append('echo "[odysseus] Ollama has no built-in authentication; expose this only on a trusted LAN/VPN or provide an explicit OLLAMA_HOST with your own access controls."') + runner_lines.append('echo "Starting ollama server on ${ODYSSEUS_OLLAMA_HOST}:${ODYSSEUS_OLLAMA_PORT}..."') + runner_lines.append('OLLAMA_HOST="${ODYSSEUS_OLLAMA_HOST}:${ODYSSEUS_OLLAMA_PORT}" ollama serve') + if local_windows: + _append_serve_exit_code_lines(runner_lines, keep_shell_open=False) + else: + runner_lines.append('_ody_exit=$?') + runner_lines.append('echo') + runner_lines.append('echo "=== Process exited with code ${_ody_exit} ==="') + runner_lines.append('exec bash -i') elif "vllm serve" in req.cmd: # vLLM is CUDA/ROCm-only and does not run on macOS at all. runner_lines.append('if [ "$(uname -s)" = "Darwin" ]; then') runner_lines.append(' echo "ERROR: vLLM does not run on macOS. Use Ollama or llama.cpp (Metal) instead."') - runner_lines.append(' exit 1') - runner_lines.append('fi') - # Put ~/.local/bin on PATH first — without a venv, vllm installs - # there via --user and the non-login serve shell otherwise can't - # find the `vllm` CLI ("command not found"). Mirrors llama.cpp above. - runner_lines.append('export PATH="$HOME/.local/bin:$PATH"') - runner_lines.append('if ! command -v vllm &>/dev/null; then') - runner_lines.append(' echo "ERROR: vLLM is not installed. Open Cookbook -> Dependencies and install vllm on this server, then launch again."') - runner_lines.append(' exit 127') + runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=1') runner_lines.append('fi') + _append_vllm_linux_preflight_lines(runner_lines) elif "sglang.launch_server" in req.cmd: runner_lines.append('export PATH="$HOME/.local/bin:$PATH"') - runner_lines.append('if ! python3 -c "import sglang" 2>/dev/null; then') - runner_lines.append(' echo "ERROR: SGLang is not installed. Open Cookbook -> Dependencies and install sglang on this server, then launch again."') - runner_lines.append(' exit 127') + runner_lines.append('if ! command -v sglang &>/dev/null; then') + runner_lines.append(' echo "ERROR: SGLang is not installed."') + runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127') + runner_lines.append('elif ! ODYSSEUS_SGLANG_IMPORT_ERROR="$(python3 -c "import sglang" 2>&1)"; then') + runner_lines.append(' echo "ERROR: SGLang is installed but failed to import."') + runner_lines.append(' printf "%s\\n" "$ODYSSEUS_SGLANG_IMPORT_ERROR"') + runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127') runner_lines.append('fi') elif "scripts/diffusion_server.py" in req.cmd or ".diffusion_server.py" in req.cmd: runner_lines.append('export PATH="$HOME/.local/bin:$PATH"') - runner_lines.append('if ! python3 -c "import torch, diffusers" 2>/dev/null; then') - runner_lines.append(' echo "ERROR: Diffusion serving requires PyTorch + diffusers. Open Cookbook -> Dependencies and install diffusers on this server, then launch again."') - runner_lines.append(' exit 127') + runner_lines.append('if ! ODYSSEUS_DIFFUSION_IMPORT_ERROR="$(python3 -c "import torch, diffusers" 2>&1)"; then') + runner_lines.append(' echo "ERROR: Diffusion serving requires PyTorch + diffusers."') + runner_lines.append(' printf "%s\\n" "$ODYSSEUS_DIFFUSION_IMPORT_ERROR"') + runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127') runner_lines.append('fi') - runner_lines.append(req.cmd) - if local_windows: - # Detached background process — no interactive shell to keep open. - # Print the exit marker the status poller looks for, then stop. - runner_lines.append('echo ""; echo "=== Process exited with code $? ==="') - else: - # Keep shell open after exit so user can see errors - runner_lines.append('echo ""; echo "=== Process exited with code $? ==="; exec "${SHELL:-/bin/bash}"') + if not handled_ollama_serve: + _append_serve_preflight_exit_lines( + runner_lines, + keep_shell_open=not local_windows, + ) + if is_pip_install: + _append_pip_install_runner_lines(runner_lines, req.cmd) + else: + runner_lines.append(req.cmd) + if local_windows: + # Detached background process — no interactive shell to keep open. + # Print the exit marker the status poller looks for, then stop. + _append_serve_exit_code_lines( + runner_lines, + keep_shell_open=False, + is_pip_install=is_pip_install, + ) + else: + # Keep shell open after exit so user can see errors + _append_serve_exit_code_lines( + runner_lines, + keep_shell_open=True, + is_pip_install=is_pip_install, + ) runner_path = TMUX_LOG_DIR / f"{session_id}_run.sh" runner_path.write_text("\n".join(runner_lines) + "\n", encoding="utf-8") @@ -1116,11 +1252,16 @@ def setup_cookbook_routes() -> APIRouter: stderr = (await proc.stderr.read()).decode(errors="replace") return {"ok": False, "error": stderr, "session_id": session_id} - # Auto-register as model endpoint if serving a diffusion model + # Auto-register a model endpoint so the served model shows up in the model + # picker with no manual /setup step. Diffusion models get an image + # endpoint; any other real model serve (i.e. not a pip-install task) gets + # a local LLM endpoint pointed at its /v1. endpoint_id = None is_diffusion = "diffusion_server.py" in req.cmd if is_diffusion: endpoint_id = _auto_register_image_endpoint(req, remote) + elif not is_pip_install: + endpoint_id = _auto_register_llm_endpoint(req, remote) # Log to assistant try: @@ -1201,8 +1342,8 @@ def setup_cookbook_routes() -> APIRouter: cmd = f"ssh {pf}{host} '{setup_script}'" else: # Linux: auto-install tmux (via whichever package manager is available) - # and huggingface_hub + hf_transfer (falling back to --user/--break-system-packages - # on PEP-668 locked distros like Arch / newer Debian). + # and huggingface_hub + hf_transfer (falling back to --user, then + # guarded --break-system-packages on PEP-668 locked distros). setup_script = ( # Install tmux if missing — try common package managers; skip if no sudo "if ! command -v tmux >/dev/null 2>&1; then " @@ -1214,10 +1355,15 @@ def setup_cookbook_routes() -> APIRouter: " fi; " "fi; " "command -v tmux >/dev/null 2>&1 || echo 'WARNING: tmux missing and auto-install failed (need passwordless sudo). Install manually.'; " - # Install Python bits. Try system install first; fall back to --user --break-system-packages on PEP 668 systems. + # Install Python bits. Try system install first; fall back to --user, + # then use --break-system-packages only when pip supports it. "pip install -q huggingface_hub hf_transfer 2>/dev/null || " - "pip install --user --break-system-packages -q huggingface_hub hf_transfer 2>/dev/null || " - "pip3 install --user --break-system-packages -q huggingface_hub hf_transfer 2>/dev/null; " + "pip install --user -q huggingface_hub hf_transfer 2>/dev/null || " + "( pip install --help 2>/dev/null | grep -q -- --break-system-packages && " + "pip install --user --break-system-packages -q huggingface_hub hf_transfer 2>/dev/null ) || " + "pip3 install --user -q huggingface_hub hf_transfer 2>/dev/null || " + "( pip3 install --help 2>/dev/null | grep -q -- --break-system-packages && " + "pip3 install --user --break-system-packages -q huggingface_hub hf_transfer 2>/dev/null ); " "python3 -c 'from huggingface_hub import snapshot_download; print(\"OK\")'" ) cmd = f"ssh {pf}{host} '{setup_script}'" @@ -1240,11 +1386,38 @@ def setup_cookbook_routes() -> APIRouter: async def _run_nvidia_smi(query: str, host: str | None, ssh_port: str | None, timeout: int = 8): """Run nvidia-smi locally or over SSH. Returns (stdout, error_or_None).""" if host: - pf = f"-p {ssh_port} " if ssh_port and ssh_port != "22" else "" - cmd = f"ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no {pf}{host} '{query}'" - proc = await asyncio.create_subprocess_shell( - cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE - ) + candidates = [query] + stripped = query.strip() + if stripped.startswith("nvidia-smi "): + args = stripped[len("nvidia-smi "):] + candidates.append( + "bash -lc " + + shlex.quote( + f"{SSH_PATH_OVERRIDE}" + f"nvidia-smi {args}" + ) + ) + for nvidia_path in NVIDIA_PATH_CANDIDATES: + candidates.append(f"{nvidia_path} {args}") + + last_err = "nvidia-smi failed" + for candidate in candidates: + try: + rc, stdout, stderr = await run_ssh_command_async( + host, + ssh_port, + candidate, + connect_timeout=5, + timeout=timeout, + ) + except asyncio.TimeoutError: + return None, "nvidia-smi timed out" + if rc == 0: + return stdout.decode("utf-8", errors="replace"), None + err = (stderr.decode("utf-8", errors="replace") or "").strip()[:200] + if err: + last_err = err + return None, last_err else: proc = await asyncio.create_subprocess_exec( *shlex.split(query), @@ -1357,9 +1530,16 @@ def setup_cookbook_routes() -> APIRouter: total_mb = max(0, int(total_bytes / (1024 * 1024))) used_mb = max(0, min(total_mb, int(used_bytes / (1024 * 1024)))) free_mb = max(0, total_mb - used_mb) + # GTT = the system-RAM pool the GPU pages into when VRAM is full. + # On a discrete card a large gtt_used means the model spilled past + # VRAM into RAM over PCIe — much slower. Surface it so the UI can + # warn "spilling to RAM" instead of the user wondering why it's slow. + gtt_used_raw = await _gpu_read_file(f"{base}/mem_info_gtt_used", host, ssh_port) + gtt_used_mb = max(0, int(int(gtt_used_raw) / (1024 * 1024))) if (gtt_used_raw and gtt_used_raw.isdigit()) else 0 gpus.append({ "index": len(gpus), "name": name, "uuid": entry, "free_mb": free_mb, "total_mb": total_mb, "used_mb": used_mb, + "gtt_used_mb": gtt_used_mb, "util_pct": 0, "busy": bool(total_mb and (free_mb / total_mb) < 0.85), "processes": [], "backend": "rocm", "source": "amd-sysfs", "unified_memory": unified, @@ -1461,6 +1641,46 @@ def setup_cookbook_routes() -> APIRouter: if gpus: return {"ok": True, "gpus": gpus, "backend": "cuda", "source": "nvidia-smi"} + # Local Apple Silicon / Metal fallback. macOS has no nvidia-smi and no + # Linux /sys/class/drm tree, but services.hwfit.hardware already knows + # how to size the shared unified-memory GPU budget. Keep this route in + # sync so Cookbook's GPU picker doesn't show "nvidia-smi not found" on + # native Mac launches. + if not host and sys.platform == "darwin": + try: + from services.hwfit.hardware import detect_system + info = detect_system(fresh=True) + backend = str(info.get("backend") or "").lower() + if backend in {"metal", "mps", "apple"} and info.get("gpu_count", 0) > 0: + total_mb = int(float(info.get("gpu_vram_gb") or info.get("total_ram_gb") or 0) * 1024) + free_mb = int(float(info.get("available_ram_gb") or 0) * 1024) + if total_mb and (free_mb <= 0 or free_mb > total_mb): + free_mb = total_mb + used_mb = max(0, total_mb - max(0, free_mb)) + return { + "ok": True, + "gpus": [{ + "index": 0, + "name": info.get("gpu_name") or info.get("cpu_name") or "Apple Silicon GPU", + "uuid": "apple-metal-0", + "free_mb": max(0, free_mb), + "total_mb": max(0, total_mb), + "used_mb": used_mb, + "util_pct": 0, + "busy": bool(total_mb and (free_mb / total_mb) < 0.5), + "processes": [], + "backend": "metal", + "source": "apple-metal", + "unified_memory": True, + }], + "backend": "metal", + "source": "apple-metal", + "fallback_from": "nvidia-smi", + "nvidia_error": nvidia_error, + } + except Exception as e: + logger.warning("Apple Metal GPU fallback failed: %s", e) + amd_gpus = await _probe_amd_sysfs(host, ssh_port) if amd_gpus: return { @@ -1607,6 +1827,33 @@ def setup_cookbook_routes() -> APIRouter: disk_tasks = on_disk.get("tasks") or [] if isinstance(on_disk, dict) else [] incoming_tasks = data.get("tasks") if isinstance(data.get("tasks"), list) else [] + # Anti-poisoning guard: a stale browser tab can keep POSTing a + # download task as status='done' from before the strict-finish + # fix landed, undoing any server-side correction. For each + # incoming "done" download, override to "running" if the last + # shard pattern says N _completed: + logger.info(f"cookbook state POST: rejecting stale done for {_it.get('sessionId')} " + f"({_completed}/{_starts} files complete, no DOWNLOAD_OK)") + _it["status"] = "running" incoming_ids = {t.get("sessionId") for t in incoming_tasks if isinstance(t, dict) and t.get("sessionId")} import time as _t now_ms = int(_t.time() * 1000) @@ -1725,10 +1972,14 @@ def setup_cookbook_routes() -> APIRouter: if vram_gb > 0 and needed_vram is not None and needed_vram > vram_gb: continue - # Skip if no size info — without a size we can't tell if it's a real - # full-weight model or a tiny adapter, so we'd rather drop it - if est_vram is None: - continue + # Unknown-size models (e.g. MiniMax-M2.7, DeepSeek-V4-Flash) have no + # "NB" in the repo id, so the regex above can't extract their + # param count. Previously we dropped them entirely, which made + # brand-new flagship releases silently vanish from this list even + # on rigs with hundreds of GB of VRAM. Adapters/LoRAs are already + # filtered by _is_excluded(), so what falls through here is + # overwhelmingly full models — keep them, just without a size + # badge (the frontend handles needed_vram_gb=null gracefully). out.append({ "repo_id": repo_id, @@ -1745,6 +1996,153 @@ def setup_cookbook_routes() -> APIRouter: return {"models": out} + # Rate-limit for the orphan-tmux adoption sweep. The UI polls + # tasks/status every ~3s; we don't want to SSH every host on every + # poll. 20s is fast enough that a model the agent launched in the + # background shows up "almost immediately" in the UI without being + # wasteful. + _last_orphan_sweep_ts = [0.0] + _ORPHAN_SWEEP_MIN_INTERVAL_S = 20.0 + + def _maybe_sweep_orphans(tasks: list, state: dict) -> None: + """Scan each configured cookbook server for `serve-*` tmux sessions + the cookbook doesn't know about and adopt them into state.tasks. + + Writes are conditional: if no orphans are found, nothing is touched. + Rate-limited so polling UIs don't trigger SSH on every refresh. + """ + import time as _time + import subprocess + logger.info(f"_maybe_sweep_orphans: entered, last_ts={_last_orphan_sweep_ts[0]}") + now = _time.monotonic() + if now - _last_orphan_sweep_ts[0] < _ORPHAN_SWEEP_MIN_INTERVAL_S: + logger.info(f"_maybe_sweep_orphans: rate-limited, {now - _last_orphan_sweep_ts[0]:.1f}s since last") + return + _last_orphan_sweep_ts[0] = now + + env = state.get("env") if isinstance(state, dict) else {} + servers = env.get("servers") if isinstance(env, dict) else [] + logger.info(f"orphan sweep starting: {len(servers) if isinstance(servers, list) else 0} server(s), known_sids={len([t for t in tasks if isinstance(t, dict) and t.get('sessionId')])}") + if not isinstance(servers, list): + return + + known_sids = { + t.get("sessionId") for t in tasks + if isinstance(t, dict) and t.get("sessionId") + } + + adopted_any = False + for srv in servers: + if not isinstance(srv, dict): + continue + host = (srv.get("host") or "").strip() + if not host: + continue # local-only entry; the /proc scan handles it + if not _REMOTE_HOST_RE.match(host): + continue + sport = str(srv.get("port") or "").strip() + ssh_base = ["ssh", "-o", "ConnectTimeout=4", "-o", "StrictHostKeyChecking=no"] + if sport and sport != "22": + if not _SSH_PORT_RE.match(sport): + continue + ssh_base.extend(["-p", sport]) + + try: + ls = subprocess.run( + ssh_base + [host, "tmux ls 2>/dev/null"], + timeout=6, capture_output=True, text=True, + ) + except Exception: + continue + for line in (ls.stdout or "").splitlines(): + sid = line.split(":", 1)[0].strip() + if not sid or not _SESSION_ID_RE.match(sid): + continue + if sid in known_sids: + continue + # Adopt any session whose pane is currently running a + # known model-server process (checked below). The earlier + # prefix gate (serve-/cookbook-) dropped legitimate + # serves whenever tmux fell back to numeric IDs, leaving + # them invisible in the Cookbook UI — so the user could + # neither see nor stop them. + # Skip zombie / idle-shell sessions. A tmux session left + # over from a crashed vllm just shows a bash prompt — + # adopting it would pollute the UI with "running" tasks + # that aren't actually serving anything. pane_current_command + # is the foreground process in the pane right now; only + # real model serves leave a python/vllm/etc. process there. + try: + pc = subprocess.run( + ssh_base + [host, "tmux", "list-panes", "-t", sid, + "-F", "#{pane_current_command}"], + timeout=4, capture_output=True, text=True, + ) + cur = (pc.stdout or "").strip().splitlines() + except Exception: + cur = [] + LIVE_PROCS = {"python", "python3", "vllm", "llama-server", + "llama_cpp_main", "sglang", "lmdeploy", + "ollama", "node", "uvicorn"} + if not any(c in LIVE_PROCS for c in cur): + continue + # Try to recover a plausible repo_id + port from the + # pane buffer. Cheap heuristic — if we can't, register + # with placeholder fields; the UI still shows it. + try: + cap = subprocess.run( + ssh_base + [host, "tmux", "capture-pane", "-t", sid, "-p", "-S", "-300"], + timeout=6, capture_output=True, text=True, + ) + pane = cap.stdout or "" + except Exception: + pane = "" + import re as _re_orphan + # vLLM banner: "model /path/...". Falls back to the + # raw vllm-serve command if the banner already scrolled. + m_model = _re_orphan.search(r"model\s+(\S+)", pane) + model = m_model.group(1) if m_model else "" + if not model: + m_serve = _re_orphan.search(r"vllm\s+serve\s+(\S+)", pane) + model = m_serve.group(1) if m_serve else f"adopted:{sid}" + m_port = _re_orphan.search(r"--port\s+(\d+)", pane) + port = int(m_port.group(1)) if m_port else 0 + + import time as _t2 + tasks.append({ + "id": sid, + "sessionId": sid, + "name": model.split("/")[-1] if "/" in model else model, + "type": "serve", + "status": "running", + "output": f"Auto-adopted from orphan tmux session on {host}. " + "Open the task to see live output.", + "ts": int(_t2.time() * 1000), + "payload": { + "repo_id": model, + "remote_host": host, + "_cmd": "(orphan tmux session — original launch cmd unknown)", + "port": port, + }, + "remoteHost": host, + "sshPort": sport, + "platform": "linux", + "_serveReady": False, + "_endpointAdded": False, + "_adoptedExternally": True, + }) + known_sids.add(sid) + adopted_any = True + logger.info(f"auto-adopted orphan tmux session {sid!r} on {host}") + + if adopted_any: + try: + from core.atomic_io import atomic_write_json + state["tasks"] = tasks + atomic_write_json(_cookbook_state_path, state) + except Exception as e: + logger.warning(f"orphan sweep: state write failed: {e}") + @router.get("/api/cookbook/tasks/status") async def cookbook_tasks_status(request: Request): """Check status of all active cookbook tmux sessions. @@ -1759,8 +2157,52 @@ def setup_cookbook_routes() -> APIRouter: def _cookbook_tasks_status_sync(): import subprocess + def _download_cache_complete(repo_id: str, remote_host: str = "", ssh_port: str = "") -> bool: + """Best-effort check for a completed HF cache entry. + + tmux output can stop at a stale progress line if the pane/session + disappears before Cookbook captures the final DOWNLOAD_OK marker. + In that case, trust the cache shape: a snapshot directory with files + and no *.incomplete blobs means HuggingFace finished materializing the + model. + """ + if not repo_id or "/" not in repo_id: + return False + py = ( + "import os,sys;" + "repo=sys.argv[1];" + "base=os.environ.get('HUGGINGFACE_HUB_CACHE') or os.path.join(os.environ.get('HF_HOME', os.path.expanduser('~/.cache/huggingface')), 'hub');" + "d=os.path.join(base,'models--'+repo.replace('/','--'));" + "snap=os.path.join(d,'snapshots');" + "ok=os.path.isdir(snap) and any(os.path.isdir(os.path.join(snap,x)) and os.listdir(os.path.join(snap,x)) for x in os.listdir(snap));" + "inc=False;" + "blobs=os.path.join(d,'blobs');" + "inc=os.path.isdir(blobs) and any(x.endswith('.incomplete') for x in os.listdir(blobs));" + "sys.exit(0 if ok and not inc else 1)" + ) + if remote_host: + cmd = ["python3", "-c", py, repo_id] + else: + # Local Windows: python3 can hit the Microsoft Store stub. Use the + # real Python Odysseus is running under (guaranteed to exist). + import sys as _sys_local + cmd = [_sys_local.executable, "-c", py, repo_id] + try: + if remote_host: + ssh_base = ["ssh"] + if ssh_port and ssh_port != "22": + ssh_base.extend(["-p", str(ssh_port)]) + shell_cmd = " ".join(shlex.quote(x) for x in cmd) + proc = subprocess.run(ssh_base + [remote_host, shell_cmd], timeout=12, capture_output=True) + else: + proc = subprocess.run(cmd, timeout=12, capture_output=True) + return proc.returncode == 0 + except Exception: + return False + # Load saved tasks from cookbook state tasks = [] + state = {} if _cookbook_state_path.exists(): try: state = json.loads(_cookbook_state_path.read_text(encoding="utf-8")) @@ -1772,6 +2214,21 @@ def setup_cookbook_routes() -> APIRouter: except Exception: pass + # Orphan-tmux auto-adoption sweep. When the agent (or anyone) + # SSH-launches a `serve-*` tmux session — usually because + # serve_model rejected `source ... && vllm ...` or because of a + # manual relaunch via tmux send-keys — that session is invisible + # to the cookbook UI even though it's a live model server. The + # sweep finds those orphans on each configured remote host and + # writes them into state.tasks with _adoptedExternally=True, so + # they show up in the UI on the next poll without anyone having + # to remember to call adopt_served_model. Rate-limited via the + # module-level _last_orphan_sweep so we don't SSH every 3s. + try: + _maybe_sweep_orphans(tasks, state) + except Exception as _sweep_e: + logger.warning(f"orphan sweep failed (non-fatal): {_sweep_e!r}") + results = [] for task in tasks: session_id = task.get("sessionId", "") @@ -1831,7 +2288,12 @@ def setup_cookbook_routes() -> APIRouter: if _tport and _tport != "22": ssh_base.extend(["-p", str(_tport)]) check_cmd = ssh_base + [remote, "tmux", "has-session", "-t", session_id] - capture_cmd = ssh_base + [remote, "tmux", "capture-pane", "-t", session_id, "-p", "-S", "-50"] + # Capture 500 lines (was 50) so a Python traceback survives + # the post-crash neofetch banner + bash prompt that otherwise + # fills the visible tail. Without this, output_tail ends up + # as just "Locale: C / Ubuntu_Odysseus ❯" and the agent + # can't diagnose the actual error. + capture_cmd = ssh_base + [remote, "tmux", "capture-pane", "-t", session_id, "-p", "-S", "-500"] elif IS_WINDOWS: # LOCAL Windows task: launched as a detached process (no tmux). # Liveness comes from the .pid file, output from the @@ -1840,7 +2302,7 @@ def setup_cookbook_routes() -> APIRouter: capture_cmd = None else: check_cmd = ["tmux", "has-session", "-t", session_id] - capture_cmd = ["tmux", "capture-pane", "-t", session_id, "-p", "-S", "-50"] + capture_cmd = ["tmux", "capture-pane", "-t", session_id, "-p", "-S", "-500"] local_win_task = (not remote) and IS_WINDOWS @@ -1898,14 +2360,21 @@ def setup_cookbook_routes() -> APIRouter: # persists after the process exits, so a finished download still has a # snapshot to classify (DOWNLOAD_OK / exit marker) — evaluate it even # when the PID is gone instead of blindly reporting "stopped". + download_zero_files = False status = "unknown" if is_alive or (local_win_task and full_snapshot): lower = full_snapshot.lower() - has_exit = "=== process exited with code" in lower + exit_match = re.search(r"=== process exited with code\s+(-?\d+)", full_snapshot, re.I) + has_exit = exit_match is not None + exit_code = int(exit_match.group(1)) if exit_match else None has_error = "error" in lower or "failed" in lower or "traceback" in lower if has_exit and task_type == "serve": # Serve tasks that exit are always errors — they should run indefinitely status = "error" + elif has_exit and task_type == "download": + # Dependency installs are tracked as download tasks but only + # emit the generic runner exit marker, not HF download markers. + status = "completed" if exit_code == 0 else "error" elif has_exit and "unrecognized arguments" in lower: status = "error" elif has_error and not ("application startup complete" in lower): @@ -1914,7 +2383,11 @@ def setup_cookbook_routes() -> APIRouter: # Only download tasks treat 100% as "completed". # Serve tasks log 100%|██████| during inference progress # (diffusion sampling, etc.) — that's "running", not done. - status = "completed" + if re.search(r"Fetching\s+0\s+files", full_snapshot, re.IGNORECASE): + status = "error" + download_zero_files = True + else: + status = "completed" elif "application startup complete" in lower: status = "ready" elif not is_alive: @@ -1924,7 +2397,14 @@ def setup_cookbook_routes() -> APIRouter: status = "running" else: # Session is dead — check if it completed or crashed - status = "stopped" + if task_type == "download" and _download_cache_complete(_payload.get("repo_id") or model, remote, str(_tport or "")): + status = "completed" + if not progress_text: + progress_text = "Download complete" + if not full_snapshot: + full_snapshot = "DOWNLOAD_OK" + else: + status = "stopped" # Parse structured phase info — single source of truth for the UI phase_info = _parse_serve_phase(full_snapshot, task_type) if (task_type == "serve" and status == "running" and full_snapshot) else {} @@ -1934,6 +2414,8 @@ def setup_cookbook_routes() -> APIRouter: diagnosis = _diagnose_serve_output(full_snapshot) if task_type == "serve" and full_snapshot else None if diagnosis and status in {"running", "unknown", "stopped"}: status = "error" + if download_zero_files: + diagnosis = {"message": "No matching files were downloaded. The model repo or filename/quant pattern may be wrong (for example a ':Q4_K_M' tag that does not exist in the repo). Check the repo and the include/quant pattern."} output_tail = "\n".join(full_snapshot.splitlines()[-12:]) if full_snapshot else "" results.append({ diff --git a/routes/copilot_routes.py b/routes/copilot_routes.py new file mode 100644 index 000000000..1d8be52ce --- /dev/null +++ b/routes/copilot_routes.py @@ -0,0 +1,173 @@ +# routes/copilot_routes.py +"""GitHub Copilot device-flow login. + +Drives the GitHub OAuth *device flow* and, on success, creates (or refreshes) +an owner-scoped ``ModelEndpoint`` pointing at the Copilot API with the +device-flow access token stored as its (encrypted) ``api_key``. After that the +endpoint behaves like any other OpenAI-compatible provider — the Copilot- +specific request headers are injected centrally by ``build_headers`` / +``_provider_headers`` (see :mod:`src.copilot`). + +Flow: + 1. ``POST /api/copilot/device/start`` → returns a ``poll_id`` plus the + ``user_code`` + ``verification_uri`` to show the user. The secret + ``device_code`` is kept server-side, never sent to the browser. + 2. The browser polls ``POST /api/copilot/device/poll`` with ``poll_id``. + While pending it returns ``{status: "pending"}``; once the user authorises + it provisions the endpoint and returns ``{status: "authorized", ...}``. + +All routes are admin-gated (endpoint/provider management is an admin action). +""" + +import json +import uuid +import logging +from typing import Dict, Optional + +import httpx +from fastapi import HTTPException, Request + +from core.database import SessionLocal, ModelEndpoint +from routes.device_flow import ( + DeviceFlowPoll, + DeviceFlowStart, + PendingDeviceFlowStore, + create_device_flow_router, +) +from src.auth_helpers import get_current_user +from src import copilot + +logger = logging.getLogger(__name__) + +_DEVICE_FLOW_STORE = PendingDeviceFlowStore() + + +def _provision_endpoint(token: str, base: str, owner: Optional[str]) -> Dict: + """Create or update the owner's Copilot endpoint with a fresh token.""" + try: + models = copilot.fetch_models(base, token) + except Exception as e: + logger.warning(f"Copilot model fetch failed during provisioning: {e}") + models = [] + model_ids = [m["id"] for m in models] + # Copilot picker models support OpenAI-style tool calling; mark the endpoint + # tool-capable so the agent loop sends native tool schemas. + # Tool-capable if any picker model advertises tool_calls. When the model + # fetch failed (empty list) default to True, since Copilot picker models + # support OpenAI-style tool calling. + supports_tools = bool(not models or any(m.get("tool_calls") for m in models)) + + db = SessionLocal() + try: + ep = ( + db.query(ModelEndpoint) + .filter(ModelEndpoint.base_url == base) + .filter((ModelEndpoint.owner.is_(None)) | (ModelEndpoint.owner == owner)) + .order_by(ModelEndpoint.owner.desc()) + .first() + ) + if ep is None: + ep = ModelEndpoint( + id=str(uuid.uuid4())[:8], + name="GitHub Copilot", + base_url=base, + model_type="llm", + owner=owner, + ) + db.add(ep) + ep.api_key = token + ep.is_enabled = True + ep.supports_tools = supports_tools + if model_ids: + ep.cached_models = json.dumps(model_ids) + db.commit() + result = { + "id": ep.id, + "name": ep.name, + "base_url": ep.base_url, + "models": model_ids, + } + finally: + db.close() + + # Best-effort: refresh the model cache so the new endpoint shows up. + try: + from routes.model_routes import _invalidate_models_cache + _invalidate_models_cache() + except Exception: + pass + return result + + +def _start_device_flow(request: Request, form) -> DeviceFlowStart: + host = copilot.GITHUB_HOST + ent = str(form.get("enterprise_url") or "").strip() + if ent: + host = copilot.normalize_domain(ent) + try: + data = copilot.request_device_code(host) + except httpx.HTTPStatusError as e: + status = e.response.status_code if e.response is not None else "unknown" + raise HTTPException(502, f"GitHub device-code request failed (HTTP {status})") + except Exception as e: + raise HTTPException(502, f"GitHub device-code request failed: {e}") + + device_code = data.get("device_code") + if not device_code: + raise HTTPException(502, "GitHub did not return a device code") + + # verification_uri_complete embeds the user code, so the browser tab we + # open lands the user straight on GitHub's "Authorize" screen with the + # code pre-filled — one click, no manual code entry. + return DeviceFlowStart( + pending={ + "device_code": device_code, + "host": host, + "enterprise_url": ent, + "owner": get_current_user(request) or None, + }, + response={ + "user_code": data.get("user_code"), + "verification_uri": data.get("verification_uri"), + "verification_uri_complete": data.get("verification_uri_complete"), + }, + interval=int(data.get("interval") or 5), + expires_in=int(data.get("expires_in") or 900), + ) + + +def _poll_device_flow(_request: Request, pending: Dict) -> DeviceFlowPoll: + try: + data = copilot.poll_access_token(pending["host"], pending["device_code"]) + except Exception as e: + return DeviceFlowPoll.pending(f"poll error: {e}") + + token = data.get("access_token") + if token: + base = copilot.enterprise_base(pending["enterprise_url"]) if pending["enterprise_url"] else copilot.COPILOT_BASE + try: + result = _provision_endpoint(token, base, pending["owner"]) + except Exception as e: + logger.exception("Copilot endpoint provisioning failed") + raise HTTPException(500, f"Login succeeded but provisioning failed: {e}") + return DeviceFlowPoll.authorized(result) + + err = data.get("error") + if err == "authorization_pending": + return DeviceFlowPoll.pending() + if err == "slow_down": + return DeviceFlowPoll.slow_down(int(data.get("interval") or 0) or None) + if err in ("expired_token", "access_denied"): + return DeviceFlowPoll.failed(err) + # Unknown error — surface but keep the session for another try. + return DeviceFlowPoll.pending(err or "unknown") + + +def setup_copilot_routes(): + return create_device_flow_router( + prefix="/api/copilot", + tags=["copilot"], + store=_DEVICE_FLOW_STORE, + start_flow=_start_device_flow, + poll_flow=_poll_device_flow, + ) diff --git a/routes/device_flow.py b/routes/device_flow.py new file mode 100644 index 000000000..8b8ab4ac8 --- /dev/null +++ b/routes/device_flow.py @@ -0,0 +1,193 @@ +"""Shared OAuth/device-flow route scaffolding for provider setup.""" + +from __future__ import annotations + +import inspect +import threading +import time +import uuid +from dataclasses import dataclass +from typing import Any, Callable, Iterable, Mapping, Optional + +from fastapi import APIRouter, Form, HTTPException, Request + +from core.middleware import require_admin + + +@dataclass(frozen=True) +class DeviceFlowStart: + """Provider-specific start result consumed by the shared route wrapper.""" + + pending: Mapping[str, Any] + response: Mapping[str, Any] + interval: int = 5 + expires_in: int = 900 + + +@dataclass(frozen=True) +class DeviceFlowPoll: + """Normalized provider poll outcome.""" + + status: str + endpoint: Optional[Mapping[str, Any]] = None + error: Optional[str] = None + detail: Optional[str] = None + interval: Optional[int] = None + + @classmethod + def pending(cls, detail: Optional[str] = None) -> "DeviceFlowPoll": + return cls(status="pending", detail=detail) + + @classmethod + def slow_down(cls, interval: Optional[int] = None, detail: Optional[str] = None) -> "DeviceFlowPoll": + return cls(status="slow_down", interval=interval, detail=detail) + + @classmethod + def authorized(cls, endpoint: Mapping[str, Any]) -> "DeviceFlowPoll": + return cls(status="authorized", endpoint=endpoint) + + @classmethod + def failed(cls, error: str) -> "DeviceFlowPoll": + return cls(status="failed", error=error) + + +class PendingDeviceFlowStore: + """Thread-safe in-memory pending device-flow store. + + Device codes and provider-side secrets stay inside this process. Each entry + stores provider payload separately from poll metadata so provider callbacks + only receive the fields they created. + """ + + def __init__(self, *, time_func: Callable[[], float] = time.time): + self._pending: dict[str, dict[str, Any]] = {} + self._lock = threading.Lock() + self._time = time_func + + def _now(self) -> float: + return float(self._time()) + + def prune_expired(self) -> None: + now = self._now() + with self._lock: + for key in [k for k, v in self._pending.items() if v.get("expires_at", 0) < now]: + self._pending.pop(key, None) + + def add(self, payload: Mapping[str, Any], *, interval: int, expires_in: int) -> str: + self.prune_expired() + poll_id = uuid.uuid4().hex + with self._lock: + self._pending[poll_id] = { + "payload": dict(payload), + "interval": max(int(interval or 5), 1), + "expires_at": self._now() + max(int(expires_in or 900), 1), + "next_poll_at": 0.0, + } + return poll_id + + def get_payload(self, poll_id: str) -> Optional[dict[str, Any]]: + self.prune_expired() + with self._lock: + entry = self._pending.get(poll_id) + if entry is None: + return None + return dict(entry.get("payload") or {}) + + def is_throttled(self, poll_id: str) -> bool: + with self._lock: + entry = self._pending.get(poll_id) + return bool(entry and self._now() < float(entry.get("next_poll_at") or 0)) + + def schedule_next(self, poll_id: str) -> None: + now = self._now() + with self._lock: + entry = self._pending.get(poll_id) + if entry is not None: + entry["next_poll_at"] = now + int(entry.get("interval") or 5) + + def slow_down(self, poll_id: str, interval: Optional[int] = None) -> None: + now = self._now() + with self._lock: + entry = self._pending.get(poll_id) + if entry is not None: + new_interval = int(interval or (int(entry.get("interval") or 5) + 5)) + entry["interval"] = max(new_interval, 1) + entry["next_poll_at"] = now + entry["interval"] + + def pop(self, poll_id: str) -> None: + with self._lock: + self._pending.pop(poll_id, None) + + +async def _maybe_await(value: Any) -> Any: + if inspect.isawaitable(value): + return await value + return value + + +def _pending_response(detail: Optional[str] = None) -> dict[str, Any]: + response: dict[str, Any] = {"status": "pending"} + if detail: + response["detail"] = detail + return response + + +def create_device_flow_router( + *, + prefix: str, + tags: Iterable[str], + store: PendingDeviceFlowStore, + start_flow: Callable[[Request, Mapping[str, Any]], DeviceFlowStart], + poll_flow: Callable[[Request, Mapping[str, Any]], DeviceFlowPoll], +) -> APIRouter: + """Create standard `/device/start|poll|cancel` routes for a provider.""" + + router = APIRouter(prefix=prefix, tags=list(tags)) + + @router.post("/device/start") + async def device_start(request: Request): + require_admin(request) + form = await request.form() + start = await _maybe_await(start_flow(request, form)) + interval = int(start.interval or 5) + expires_in = int(start.expires_in or 900) + poll_id = store.add(start.pending, interval=interval, expires_in=expires_in) + response = dict(start.response) + response.update({"poll_id": poll_id, "interval": interval, "expires_in": expires_in}) + return response + + @router.post("/device/poll") + async def device_poll(request: Request, poll_id: str = Form(...)): + require_admin(request) + payload = store.get_payload(poll_id) + if payload is None: + raise HTTPException(404, "Unknown or expired login session") + if store.is_throttled(poll_id): + return {"status": "pending"} + + try: + outcome = await _maybe_await(poll_flow(request, payload)) + except Exception: + store.pop(poll_id) + raise + + if outcome.status == "authorized": + store.pop(poll_id) + return {"status": "authorized", "endpoint": dict(outcome.endpoint or {})} + if outcome.status == "failed": + store.pop(poll_id) + return {"status": "failed", "error": outcome.error or "denied"} + if outcome.status == "slow_down": + store.slow_down(poll_id, outcome.interval) + return _pending_response(outcome.detail) + + store.schedule_next(poll_id) + return _pending_response(outcome.detail) + + @router.post("/device/cancel") + def device_cancel(request: Request, poll_id: str = Form(...)): + require_admin(request) + store.pop(poll_id) + return {"status": "cancelled"} + + return router diff --git a/routes/diagnostics_routes.py b/routes/diagnostics_routes.py index 8f3a915c2..daebef8d2 100644 --- a/routes/diagnostics_routes.py +++ b/routes/diagnostics_routes.py @@ -3,10 +3,11 @@ import logging from typing import Dict, Any -from fastapi import APIRouter, HTTPException, Form +from fastapi import APIRouter, HTTPException, Form, Request from services.youtube.youtube_handler import extract_youtube_id, extract_transcript_async from core.constants import DEFAULT_HOST +from core.middleware import require_admin logger = logging.getLogger(__name__) @@ -19,7 +20,8 @@ def setup_diagnostics_routes( router = APIRouter(tags=["diagnostics"]) @router.get("/api/db/stats") - async def get_database_stats() -> Dict[str, Any]: + async def get_database_stats(request: Request) -> Dict[str, Any]: + require_admin(request) try: from core.database import get_detailed_stats return get_detailed_stats() @@ -28,13 +30,15 @@ def setup_diagnostics_routes( raise HTTPException(500, "Failed to retrieve database statistics") @router.get("/api/rag/stats") - async def get_rag_stats() -> Dict[str, Any]: + async def get_rag_stats(request: Request) -> Dict[str, Any]: + require_admin(request) if rag_available and rag_manager: return rag_manager.get_stats() return {"error": "RAG system not available"} @router.get("/api/test/youtube") - async def test_youtube(url: str) -> Dict[str, Any]: + async def test_youtube(request: Request, url: str) -> Dict[str, Any]: + require_admin(request) try: video_id = extract_youtube_id(url) if not video_id: @@ -54,7 +58,8 @@ def setup_diagnostics_routes( return {"error": str(e)} @router.post("/api/test-research") - async def test_research(query: str = Form("What is machine learning?")) -> Dict[str, Any]: + async def test_research(request: Request, query: str = Form("What is machine learning?")) -> Dict[str, Any]: + require_admin(request) try: endpoint = f"http://{DEFAULT_HOST}:8000/v1/chat/completions" model = "gpt-oss-120b" diff --git a/routes/document_helpers.py b/routes/document_helpers.py index ace4cad54..57acc50e7 100644 --- a/routes/document_helpers.py +++ b/routes/document_helpers.py @@ -5,16 +5,16 @@ import logging import os import re -from typing import Dict, Any, Optional +from typing import Any, Dict, Optional -from fastapi import HTTPException +from fastapi import HTTPException, Request from pydantic import BaseModel from core.database import Document, DocumentVersion from core.database import Session as DbSession +from src.upload_handler import UploadHandler logger = logging.getLogger(__name__) -_UPLOAD_ID_RE = re.compile(r"^[0-9a-fA-F]{32}\.[A-Za-z0-9]+$") # ---- Request schemas ---- @@ -138,83 +138,73 @@ def _upload_path_inside(upload_dir: str, path: str) -> bool: return False -def _upload_owner_allowed( - meta: Optional[dict], - user: Optional[str], +def _resolve_user_upload_path( + upload_handler: Any, + upload_id: str, + owner: Optional[str], auth_manager=None, - allow_admin: bool = True, -) -> bool: - if not user: - return ( - not bool(auth_manager and getattr(auth_manager, "is_configured", False)) - and not (meta and meta.get("owner") is not None) +) -> Optional[str]: + """Resolve an upload id to a filesystem path the caller may read.""" + if upload_handler is None: + return None + resolved = upload_handler.resolve_upload( + upload_id, + owner=owner, + auth_manager=auth_manager, + ) + if not isinstance(resolved, dict) or not resolved: + return None + path = resolved.get("path") + upload_dir = getattr(upload_handler, "upload_dir", None) + if path and upload_dir and not _upload_path_inside(upload_dir, path): + logger.warning("Upload path outside upload directory: %s", path) + return None + return path + + +def _locate_upload( + upload_dir: str, + file_id: str, + owner: Optional[str] = None, + auth_manager=None, + upload_handler: Any = None, +): + """Find an upload by its filename ID via UploadHandler.resolve_upload.""" + if upload_handler is None: + from src.upload_handler import UploadHandler + + base_dir = os.path.dirname(os.path.abspath(upload_dir)) + upload_handler = UploadHandler(base_dir, upload_dir) + return _resolve_user_upload_path(upload_handler, file_id, owner, auth_manager) + + +def _assert_pdf_marker_upload_owned( + request: Request, + content: str, + user: Optional[str], + upload_handler: Any, +) -> None: + """Reject document content whose pdf_source marker points at another user's upload.""" + if upload_handler is None: + return + from src.pdf_form_doc import find_source_upload_id + + upload_id = find_source_upload_id(content or "") + if not upload_id: + return + auth_manager = getattr(getattr(request.app, "state", None), "auth_manager", None) + if not _resolve_user_upload_path(upload_handler, upload_id, user, auth_manager): + raise HTTPException( + 400, + "Document PDF marker references an upload you do not own", ) - if allow_admin and auth_manager and hasattr(auth_manager, "is_admin"): - try: - if auth_manager.is_admin(user): - return True - except Exception: - pass - return bool(meta and meta.get("owner") == user) - - -def _locate_upload(upload_dir: str, file_id: str, owner: Optional[str] = None, auth_manager=None): - """Find an upload by its filename ID. - - Lookup order: - 1. The `uploads.json` index that `UploadHandler.save_upload` maintains, - so owner can be verified before a document reads the source file. - 2. Direct hit at `upload_dir/file_id` (very small deployments). - 3. Fallback: `os.walk` the date-bucketed tree. Slow on large stores; - only allowed after the index owner check passes, or in single-user / - admin-style contexts where no owner is enforced. - - `followlinks=False` keeps a stray symlink loop in `data/uploads/` from - spinning the walker into infinite recursion. - """ - import json as _json - - if not _UPLOAD_ID_RE.fullmatch(file_id or ""): - logger.warning("Rejected invalid upload id in document lookup: %r", file_id) - return None - - meta = None - try: - idx_path = os.path.join(upload_dir, "uploads.json") - if os.path.exists(idx_path): - with open(idx_path, "r", encoding="utf-8") as f: - idx = _json.load(f) - for item in (idx.values() if isinstance(idx, dict) else []): - if isinstance(item, dict) and item.get("id") == file_id: - meta = item - break - except Exception: - meta = None - - if not _upload_owner_allowed(meta, owner, auth_manager): - logger.warning("Upload %s denied for document owner %s", file_id, owner) - return None - - if meta: - p = meta.get("path") - if p and os.path.exists(p) and _upload_path_inside(upload_dir, p): - return p - - direct = os.path.join(upload_dir, file_id) - if os.path.exists(direct) and _upload_path_inside(upload_dir, direct): - return direct - - for root, _dirs, files in os.walk(upload_dir, followlinks=False): - if file_id in files: - p = os.path.join(root, file_id) - if _upload_path_inside(upload_dir, p): - return p - return None def _derive_title(content: str) -> str: """Derive a title from document content.""" import re + if not isinstance(content, str): + return "Untitled" text = content.strip() if not text: return "Untitled" diff --git a/routes/document_routes.py b/routes/document_routes.py index 34ef30dfc..cb41108e0 100644 --- a/routes/document_routes.py +++ b/routes/document_routes.py @@ -7,41 +7,79 @@ from typing import Dict, Any, List, Optional from fastapi import APIRouter, HTTPException, Query, Request, UploadFile, File, Form -from sqlalchemy import func +from sqlalchemy import case, func, or_ from core.database import SessionLocal, Document, DocumentVersion from core.database import Session as DbSession from src.auth_helpers import get_current_user +from src.constants import MAIL_ATTACHMENTS_DIR logger = logging.getLogger(__name__) +def _get_session_or_404(db, session_id: str, user: Optional[str]): + session = db.query(DbSession).filter(DbSession.id == session_id).first() + if not session: + raise HTTPException(404, "Session not found") + if user and session.owner != user: + raise HTTPException(404, "Session not found") + return session + + +def _aggregate_language_facets(lang_rows): + """Sum document counts per display language for the library facet. + + NULL-language and explicit "text" rows share the "text" bucket (the + language filter treats them as one), so they must be ADDED. The old dict + comprehension keyed both to "text", silently overwriting one group and + undercounting the facet versus what the filter actually returns. + """ + out = {} + for lang, cnt in lang_rows: + key = lang or "text" + out[key] = out.get(key, 0) + cnt + return out + + +def _library_language_for_document(doc: Document) -> str: + """Return the display language used by the document library. + + PDF documents are stored as markdown wrappers so the editor can preserve + extracted text, form fields, and annotations. The library should still + identify them as PDFs instead of exposing that internal wrapper format. + """ + from src.pdf_form_doc import find_source_upload_id + + if find_source_upload_id(doc.current_content or ""): + return "pdf" + return doc.language or "text" + from routes.document_helpers import ( DocumentCreate, DocumentUpdate, DocumentPatch, _doc_to_dict, _version_to_dict, _verify_doc_owner, _owner_session_filter, - _slug, _locate_upload, _derive_title, + _slug, _resolve_user_upload_path, _assert_pdf_marker_upload_owned, _derive_title, _PDF_RENDER_SCALE, ) -def _locate_current_user_upload(request: Request, upload_dir: str, upload_id: str, user: Optional[str]): - auth_manager = getattr(getattr(request.app, "state", None), "auth_manager", None) - return _locate_upload(upload_dir, upload_id, owner=user, auth_manager=auth_manager) - - -def _load_pdf_viewer_fitz(): - from src.pdf_runtime import load_pymupdf_for_pdf_viewer - - try: - return load_pymupdf_for_pdf_viewer() - except RuntimeError as exc: - raise HTTPException(503, str(exc)) from exc - - def setup_document_routes(session_manager, upload_handler=None) -> APIRouter: router = APIRouter(tags=["documents"]) + def _locate_current_user_upload(request: Request, upload_id: str, user: Optional[str]): + if upload_handler is None: + return None + auth_manager = getattr(getattr(request.app, "state", None), "auth_manager", None) + return _resolve_user_upload_path(upload_handler, upload_id, user, auth_manager) + + def _load_pdf_viewer_fitz(): + from src.pdf_runtime import load_pymupdf_for_pdf_viewer + + try: + return load_pymupdf_for_pdf_viewer() + except RuntimeError as exc: + raise HTTPException(503, str(exc)) from exc + # ---- POST /api/document ---- @router.post("/api/document") async def create_document(request: Request, req: DocumentCreate) -> Dict[str, Any]: @@ -54,17 +92,12 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter: # the doc is owner-stamped, so it lives in the library on its own. session = None if req.session_id: - session = db.query(DbSession).filter(DbSession.id == req.session_id).first() - if not session: - raise HTTPException(404, "Session not found") # Match the lenient ownership model the rest of the app uses # (see _owner_filter): only block when an AUTHENTICATED user is # writing into a DIFFERENT user's session. In single-user / - # unconfigured / localhost-bypass mode the middleware leaves - # current_user unset (None), and those sessions are already - # served freely everywhere else. - if user and session.owner and session.owner != user: - raise HTTPException(403, "Cannot create document in another user's session") + # unconfigured / localhost-bypass mode, falsey users preserve + # the existing lenient path. + session = _get_session_or_404(db, req.session_id, user) doc_id = str(uuid.uuid4()) ver_id = str(uuid.uuid4()) @@ -82,6 +115,8 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter: if _looks_like_email_document(req.content, req.title): language = "email" + _assert_pdf_marker_upload_owned(request, req.content, user, upload_handler) + doc = Document( id=doc_id, session_id=req.session_id, @@ -136,14 +171,13 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter: with a `pdf_source` marker so the viewer renders the pages without overlays. """ - from src.constants import UPLOAD_DIR from src.pdf_forms import has_form_fields, extract_fields from src.pdf_form_doc import ( save_field_sidecar, create_form_markdown_document, create_plain_pdf_document, ) - from src.document_processor import _process_pdf + from src.document_processor import _process_pdf, strip_pdf_content_marker import os from src.auth_helpers import require_privilege @@ -155,11 +189,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter: if session_id: db = SessionLocal() try: - sess = db.query(DbSession).filter(DbSession.id == session_id).first() - if not sess: - raise HTTPException(404, "Session not found") - if user and sess.owner and sess.owner != user: - raise HTTPException(403, "Cannot import into another user's session") + _get_session_or_404(db, session_id, user) finally: db.close() @@ -176,13 +206,13 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter: raise HTTPException(500, f"Upload failed: {e}") upload_id = meta["id"] - pdf_path = _locate_current_user_upload(request, UPLOAD_DIR, upload_id, user) + pdf_path = _locate_current_user_upload(request, upload_id, user) if not pdf_path: raise HTTPException(500, "Saved PDF could not be located") title = os.path.splitext(meta.get("original_name") or meta.get("name") or upload_id)[0] try: - body_text = _process_pdf(pdf_path).lstrip("\n[PDF content]:").strip() + body_text = strip_pdf_content_marker(_process_pdf(pdf_path, owner=user)) except Exception: body_text = None @@ -244,19 +274,30 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter: db = SessionLocal() try: from sqlalchemy import or_ + pdf_marker_cond = or_( + Document.current_content.like('% marker is found and stripped even + # when the result doesn't carry a "results" or "stdout" key. + _src_text = result.get("output") or result.get("results") or result.get("stdout") or "" if block.tool_type == "web_search" and _src_text: _src_marker = "" + output += "\n\n" return {"output": output, "exit_code": 0} if tool == "web_fetch": @@ -476,10 +1053,10 @@ async def _direct_fallback( # Accept either a JSON arg ({"url": "..."}) or a plain URL/domain. if raw.startswith("{"): try: - parsed = _json.loads(raw) + parsed = json.loads(raw) if isinstance(parsed, dict): url = str(parsed.get("url") or "").strip() - except _json.JSONDecodeError: + except json.JSONDecodeError: url = "" if not url: # Non-JSON (or JSON without a usable url): take the first line @@ -502,6 +1079,11 @@ async def _direct_fallback( ) except asyncio.TimeoutError: return {"error": f"web_fetch: timed out fetching {url}", "exit_code": 1} + except Exception as e: + # Direct URL fetches can hit bot protection / auth walls + # (e.g. eBay 403). Treat that as a tool failure the model can + # reason around, not an uncaught chat-stream 500. + return {"error": f"web_fetch: {url}: {e}", "exit_code": 1} err = result.get("error") text = (result.get("content") or "").strip() title = result.get("title") or "" @@ -536,8 +1118,10 @@ async def execute_tool_block( block: Any, session_id: Optional[str] = None, disabled_tools: Optional[set] = None, + tool_policy: Optional[ToolPolicy] = None, owner: Optional[str] = None, progress_cb: Optional[Callable[[Dict], Awaitable[None]]] = None, + workspace: Optional[str] = None, ) -> Tuple[str, Dict]: """Execute a single tool block. Returns (description, result_dict). @@ -553,6 +1137,7 @@ async def execute_tool_block( do_manage_documents, do_manage_settings, do_manage_notes, do_manage_calendar, do_download_model, do_serve_model, do_list_served_models, do_stop_served_model, + do_tail_serve_output, do_list_downloads, do_cancel_download, do_search_hf_models, do_list_cached_models, do_list_serve_presets, do_serve_preset, do_adopt_served_model, do_list_cookbook_servers, @@ -570,8 +1155,7 @@ async def execute_tool_block( # Return a helpful error so the model retries with the correct format. if tool in ("python", "json", "xml") and content.strip().startswith("{") and content.strip().endswith("}"): try: - import json as _json - parsed = _json.loads(content.strip()) + parsed = json.loads(content.strip()) if isinstance(parsed, dict): desc = f"{tool}: misformatted tool call" result = { @@ -593,6 +1177,12 @@ async def execute_tool_block( pass # Reject tools that the user has disabled for this request + if tool_policy and tool_policy.blocks(tool): + desc = f"{tool}: BLOCKED" + result = {"error": tool_policy.reason_for(tool), "exit_code": 1} + logger.info("Tool blocked by policy: %s", tool) + return desc, result + if disabled_tools and tool in disabled_tools: desc = f"{tool}: BLOCKED" result = {"error": f"Tool '{tool}' is disabled by user.", "exit_code": 1} @@ -617,6 +1207,87 @@ async def execute_tool_block( logger.warning("Public tool policy blocked owner=%r tool=%s", owner, tool) return desc, result + # ask_user: the agent poses a multiple-choice question to the user to get a + # decision/clarification. This is a pure UI-control marker — no subprocess, + # no filesystem. It returns an `ask_user` payload that the agent loop turns + # into an `ask_user` SSE event and then ENDS the turn, so the chat waits for + # the user's selection (their choice arrives as the next message). + if tool == "ask_user": + question, options, multi = "", [], False + raw = (content or "").strip() + try: + parsed = json.loads(raw) if raw else {} + except (ValueError, TypeError): + parsed = {} + if isinstance(parsed, dict): + question = str(parsed.get("question", "")).strip() + multi = bool(parsed.get("multi") or parsed.get("multiSelect")) + for opt in (parsed.get("options") or []): + if isinstance(opt, dict): + label = str(opt.get("label", "")).strip() + descr = str(opt.get("description", "")).strip() + elif isinstance(opt, str): + label, descr = opt.strip(), "" + else: + continue + if label: + options.append({"label": label, "description": descr}) + else: + question = raw + if not question or len(options) < 2: + return "ask_user: invalid", { + "error": ( + "ask_user needs a non-empty `question` and at least 2 `options` " + "(each an object with a `label`, optional `description`)." + ), + "exit_code": 1, + } + options = options[:6] # keep the choice list sane + desc = f"ask_user: {question[:80]}" + labels = ", ".join(o["label"] for o in options) + result = { + "ask_user": {"question": question, "options": options, "multi": multi}, + "output": f"Asked the user: {question}\nOptions: {labels}\nAwaiting their selection.", + "exit_code": 0, + } + logger.info("Tool executed: %s (%d options, multi=%s)", desc, len(options), multi) + return desc, result + + # update_plan: the agent writes back to the active plan — tick an item done + # or revise steps (e.g. when the user asks to change something). Pure UI + # marker: returns a `plan_update` payload the agent loop turns into a + # `plan_update` SSE event; the frontend replaces the stored plan and refreshes + # the docked plan window. Does NOT end the turn. + if tool == "update_plan": + import json as _json + raw = (content or "").strip() + plan = "" + try: + parsed = _json.loads(raw) if raw else {} + except (ValueError, TypeError): + parsed = {} + if isinstance(parsed, dict) and parsed.get("plan"): + plan = str(parsed.get("plan", "")).strip() + else: + # Plain-string call (raw checklist) or JSON without a usable `plan`. + plan = raw + if not plan: + return "update_plan: invalid", { + "error": "update_plan needs a non-empty `plan` (the full updated checklist as markdown).", + "exit_code": 1, + } + plan = plan[:8192] + done = plan.count("- [x]") + plan.count("- [X]") + total = done + plan.count("- [ ]") + desc = f"update_plan: {done}/{total} done" if total else "update_plan" + result = { + "plan_update": {"plan": plan}, + "output": f"Plan updated ({done}/{total} steps complete)." if total else "Plan updated.", + "exit_code": 0, + } + logger.info("Tool executed: %s", desc) + return desc, result + # Background execution: a `bash` block whose first line is the `#!bg` # marker runs DETACHED — returns a job id immediately so the chat stream # isn't held open for a multi-minute install/ffmpeg/download. The always-on @@ -625,7 +1296,7 @@ async def execute_tool_block( _is_bg, _bg_cmd = _split_bg_marker(content) if _is_bg and _bg_cmd: from src import bg_jobs - rec = bg_jobs.launch(_bg_cmd, session_id=session_id) + rec = bg_jobs.launch(_bg_cmd, session_id=session_id, cwd=workspace or _AGENT_WORKDIR) short = _bg_cmd.strip().split(chr(10))[0][:80] desc = f"bash (background): {short}" result = { @@ -647,19 +1318,26 @@ async def execute_tool_block( if tool in _MCP_TOOL_MAP: first_line = content.split(chr(10))[0][:80] desc = f"{tool}: {first_line}" - result = await _call_mcp_tool(tool, content, progress_cb=progress_cb) + result = await _call_mcp_tool(tool, content, progress_cb=progress_cb, workspace=workspace) + elif tool in ("grep", "glob", "ls"): + # Code-navigation tools — no MCP server; run the direct implementation. + # Confined to the workspace when one is set (same policy as read_file). + first_line = content.split(chr(10))[0][:80] + desc = f"{tool}: {first_line}" + result = await _direct_fallback(tool, content, progress_cb=progress_cb, workspace=workspace) \ + or {"error": f"{tool}: execution failed", "exit_code": 1} elif tool == "create_document": title = content.split("\n")[0].strip()[:60] desc = f"create_document: {title}" - result = await do_create_document(content, session_id=session_id) + result = await do_create_document(content, session_id=session_id, owner=owner) elif tool == "update_document": desc = f"update_document: {content.split(chr(10))[0][:60]}" - result = await do_update_document(content) + result = await do_update_document(content, owner=owner) elif tool == "edit_document": - result = await do_edit_document(content) + result = await do_edit_document(content, owner=owner) desc = f"edit_document: {result.get('title', '')}" elif tool == "suggest_document": - result = await do_suggest_document(content) + result = await do_suggest_document(content, owner=owner) desc = f"suggest_document: {result.get('count', 0)} suggestions" elif tool == "search_chats": query = content.split("\n")[0].strip() @@ -717,6 +1395,9 @@ async def execute_tool_block( elif tool == "stop_served_model": desc = "stop_served_model" result = await do_stop_served_model(content, owner=owner) + elif tool == "tail_serve_output": + desc = "tail_serve_output" + result = await do_tail_serve_output(content, owner=owner) elif tool == "list_downloads": desc = "list_downloads" result = await do_list_downloads(content, owner=owner) @@ -747,6 +1428,9 @@ async def execute_tool_block( elif tool == "edit_image": desc = "edit_image" result = await do_edit_image(content, owner=owner) + elif tool == "edit_file": + result = await _do_edit_file(content, workspace=workspace) + desc = result.get("output") or result.get("error") or "edit_file" elif tool == "trigger_research": desc = "trigger_research" result = await do_trigger_research(content, owner=owner) @@ -783,7 +1467,7 @@ async def execute_tool_block( result = {"error": "MCP manager not available", "exit_code": 1} else: desc = f"unknown: {tool}" - result = {"error": f"Unknown tool type: {tool}"} + result = {"error": f"Unknown tool type: {tool}", "exit_code": 1} logger.info(f"Tool executed: {desc} -> exit_code={result.get('exit_code', 'n/a')}") return desc, result diff --git a/src/tool_implementations.py b/src/tool_implementations.py index 5871deaff..548f6f0f5 100644 --- a/src/tool_implementations.py +++ b/src/tool_implementations.py @@ -5,25 +5,16 @@ Extracted tool implementation functions (do_* and helpers) from agent_tools.py. These handle the actual execution logic for each tool type. """ +import asyncio import json import logging import os import re from typing import Any, Dict, List, Optional -MAX_OUTPUT_CHARS = 10_000 -MAX_READ_CHARS = 20_000 - - -def get_mcp_manager(): - from src import agent_tools - return agent_tools.get_mcp_manager() - - -def _truncate(text: str, limit: int = MAX_OUTPUT_CHARS) -> str: - if len(text) > limit: - return text[:limit] + f"\n... (truncated, {len(text)} chars total)" - return text +from src.constants import MAX_READ_CHARS, DEEP_RESEARCH_DIR, VAULT_FILE +from src.tool_utils import get_mcp_manager +from core.constants import internal_api_base logger = logging.getLogger(__name__) @@ -88,6 +79,50 @@ def get_active_document(): return _active_document_id +def clear_active_document(doc_id: Optional[str] = None) -> bool: + """Clear the in-memory active-document pointer. + + With ``doc_id`` given, only clears when it matches the current pointer, so a + different active document is left untouched. Returns True if it was cleared. + + Called when a document is detached from its session or deleted (its tab is + closed): without this, the stale pointer makes the last-resort doc-injection + path re-surface a closed document in a later, unrelated chat — even one whose + session no longer matches — because an unlinked doc has session_id NULL (#1160). + """ + global _active_document_id + if doc_id is None or _active_document_id == doc_id: + _active_document_id = None + return True + return False + + +def _owned_document_query(query, Document, owner: Optional[str]): + if owner is None: + # A bare Python `False` is not a valid SQL expression — SQLAlchemy 1.4 + # deprecates it and 2.0 raises ArgumentError. Use the SQL `false()` + # literal to return zero rows for an unscoped (owner-less) query. + from sqlalchemy import false + return query.filter(false()) + return query.filter(Document.owner == owner) + + +def _get_owned_document(db, Document, doc_id: str, owner: Optional[str], active_only: bool = False): + q = db.query(Document).filter(Document.id == doc_id) + if active_only: + q = q.filter(Document.is_active == True) + q = _owned_document_query(q, Document, owner) + return q.first() + + +def _most_recent_owned_document(db, Document, owner: Optional[str], active_only: bool = False): + q = db.query(Document) + if active_only: + q = q.filter(Document.is_active == True) + q = _owned_document_query(q, Document, owner) + return q.order_by(Document.updated_at.desc()).first() + + # --------------------------------------------------------------------------- # Document tools — create/update/edit/suggest living documents # --------------------------------------------------------------------------- @@ -171,7 +206,7 @@ def _coerce_email_document_content(existing: str, incoming: str) -> str: return header.rstrip() + "\n---\n" + body -async def do_create_document(content_block: str, session_id: Optional[str] = None) -> Dict: +async def do_create_document(content_block: str, session_id: Optional[str] = None, owner: Optional[str] = None) -> Dict: """Create a new document. Supports two formats: 1) Line-based: line 1 = title, line 2 (optional) = language, rest = content 2) XML-like tags: ......... @@ -240,6 +275,8 @@ async def do_create_document(content_block: str, session_id: Optional[str] = Non # Inherit ownership from the chat session so the doc survives that # session later being deleted (session_id → NULL). _sess = db.query(DbSession).filter(DbSession.id == session_id).first() + if owner is not None and (not _sess or _sess.owner != owner): + return {"error": "Cannot create document in another user's session"} _owner = _sess.owner if _sess else None doc = Document( @@ -286,7 +323,7 @@ async def do_create_document(content_block: str, session_id: Optional[str] = Non db.close() -async def do_update_document(content: str, doc_id: Optional[str] = None) -> Dict: +async def do_update_document(content: str, doc_id: Optional[str] = None, owner: Optional[str] = None) -> Dict: """Update an existing document. Content = full new document text.""" import uuid from src.database import SessionLocal, Document, DocumentVersion @@ -297,9 +334,9 @@ async def do_update_document(content: str, doc_id: Optional[str] = None) -> Dict try: doc = None if target_id: - doc = db.query(Document).filter(Document.id == target_id).first() + doc = _get_owned_document(db, Document, target_id, owner) if not doc: - doc = db.query(Document).order_by(Document.updated_at.desc()).first() + doc = _most_recent_owned_document(db, Document, owner) if doc: target_id = doc.id set_active_document(target_id) @@ -350,7 +387,7 @@ def parse_edit_blocks(content: str) -> list: return edits -async def do_edit_document(content: str, doc_id: Optional[str] = None) -> Dict: +async def do_edit_document(content: str, doc_id: Optional[str] = None, owner: Optional[str] = None) -> Dict: """Apply targeted FIND/REPLACE edits to an existing document.""" import uuid from src.database import SessionLocal, Document, DocumentVersion @@ -365,11 +402,11 @@ async def do_edit_document(content: str, doc_id: Optional[str] = None) -> Dict: try: doc = None if target_id: - doc = db.query(Document).filter(Document.id == target_id).first() + doc = _get_owned_document(db, Document, target_id, owner) if not doc: # Fallback: most recently updated document. Avoids "no active doc" errors # after server restart or when the agent loses track of which doc to edit. - doc = db.query(Document).order_by(Document.updated_at.desc()).first() + doc = _most_recent_owned_document(db, Document, owner) if doc: target_id = doc.id set_active_document(target_id) @@ -458,7 +495,7 @@ def parse_suggest_blocks(content: str) -> list: return suggestions -async def do_suggest_document(content: str, doc_id: str = None) -> Dict: +async def do_suggest_document(content: str, doc_id: str = None, owner: Optional[str] = None) -> Dict: """Create inline suggestions for the active document WITHOUT modifying it.""" from src.database import SessionLocal, Document @@ -472,7 +509,7 @@ async def do_suggest_document(content: str, doc_id: str = None) -> Dict: db = SessionLocal() try: - doc = db.query(Document).filter(Document.id == target_id).first() + doc = _get_owned_document(db, Document, target_id, owner) if not doc: return {"error": f"Document {target_id} not found"} @@ -502,7 +539,7 @@ async def do_suggest_document(content: str, doc_id: str = None) -> Dict: # --------------------------------------------------------------------------- async def do_search_chats(query: str, limit: int = 20, owner: str | None = None) -> Dict: - """Search past chat messages for the calling user's sessions only. + """Search past session transcripts for the calling user's sessions only. Without an owner filter this used to leak EVERY user's chat history into the agent's `search_chats` results (v2 review HIGH-11). The @@ -510,63 +547,36 @@ async def do_search_chats(query: str, limit: int = 20, owner: str | None = None) through; legacy callers without owner pass through as before but will only see legacy/null-owner rows. """ - from src.database import SessionLocal, ChatMessage as DBChatMessage, Session as DBSession - # Escape LIKE wildcards in the user-supplied query so a stray % or _ - # doesn't widen the match (and to keep the response deterministic). - safe_q = query.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_") - db = SessionLocal() try: - q = ( - db.query(DBChatMessage, DBSession.id, DBSession.name) - .join(DBSession, DBChatMessage.session_id == DBSession.id) - .filter( - DBSession.archived == False, - DBChatMessage.content.ilike(f"%{safe_q}%", escape="\\"), - DBChatMessage.role.in_(["user", "assistant"]), - ) - ) - if owner is not None: - # Restrict to this user's sessions plus legacy null-owner - # rows (so single-user upgrades keep seeing their own data). - q = q.filter((DBSession.owner == owner) | (DBSession.owner.is_(None))) - rows = q.order_by(DBChatMessage.timestamp.desc()).limit(limit).all() + from src.session_search import search_session_messages - if not rows: + results = search_session_messages(query, limit=limit, owner=owner) + if not results: return {"results": f"No chats found matching \"{query}\"."} # Group by session to avoid duplicate links seen_sessions = {} - for msg, session_id, session_name in rows: - if session_id not in seen_sessions: - content = msg.content or "" - lower_content = content.lower() - idx = lower_content.find(query.lower()) - if idx == -1: - snippet = content[:150] - else: - start = max(0, idx - 60) - end = min(len(content), idx + len(query) + 60) - snippet = ("..." if start > 0 else "") + content[start:end] + ("..." if end < len(content) else "") - seen_sessions[session_id] = { - "name": session_name or "Untitled", - "snippet": snippet, - "role": msg.role, - "timestamp": msg.timestamp.isoformat() if msg.timestamp else None, - } + for result in results: + if result.session_id not in seen_sessions: + seen_sessions[result.session_id] = result lines = [f"Found {len(seen_sessions)} session(s) matching \"{query}\":\n"] - for sid, info in seen_sessions.items(): - lines.append(f"- **{info['name']}** (#{sid})") + for sid, result in seen_sessions.items(): + lines.append(f"- **{result.session_name}** (#{sid})") lines.append(f" Link: [Open chat](#{sid})") - lines.append(f" > {info['snippet']}") + lines.append(f" Match ({result.role}): {result.content_snippet}") + if result.context_before: + before = result.context_before[-1] + lines.append(f" Before ({before['role']}): {before['content'][:180]}") + if result.context_after: + after = result.context_after[0] + lines.append(f" After ({after['role']}): {after['content'][:180]}") lines.append("") return {"results": "\n".join(lines)} except Exception as e: logger.error(f"search_chats failed: {e}") return {"error": str(e), "exit_code": 1} - finally: - db.close() # --------------------------------------------------------------------------- @@ -627,7 +637,7 @@ async def do_manage_skills(content: str, owner: Optional[str] = None) -> Dict: if action == "view": if not name: return {"error": "name is required for view", "exit_code": 1} - md = sm.read_skill_md(name) + md = sm.read_skill_md(name, owner=owner) if md is None: return {"error": f"Skill {name!r} not found", "exit_code": 1} return {"results": md} @@ -638,7 +648,7 @@ async def do_manage_skills(content: str, owner: Optional[str] = None) -> Dict: ref = (args.get("path") or "").strip() if not ref: return {"error": "path is required for view_ref", "exit_code": 1} - text = sm.read_skill_reference(name, ref) + text = sm.read_skill_reference(name, ref, owner=owner) if text is None: return {"error": f"Reference {ref!r} not found under {name!r}", "exit_code": 1} return {"results": text} @@ -713,7 +723,7 @@ async def do_manage_skills(content: str, owner: Optional[str] = None) -> Dict: return {"error": f"Skill {name!r} not found", "exit_code": 1} if not sk_new.owner: sk_new.owner = match.get("owner") or owner - ok = sm.update_skill(name, _skill_dump(sk_new)) + ok = sm.update_skill(name, _skill_dump(sk_new), owner=owner) return {"results": f"Edited skill `{sk_new.name}`."} if ok else {"error": "Update failed", "exit_code": 1} if action == "patch": @@ -723,7 +733,7 @@ async def do_manage_skills(content: str, owner: Optional[str] = None) -> Dict: new_str = args.get("new_string", "") if not isinstance(old, str) or not old: return {"error": "old_string is required and must be non-empty", "exit_code": 1} - md = sm.read_skill_md(name) + md = sm.read_skill_md(name, owner=owner) if md is None: return {"error": f"Skill {name!r} not found", "exit_code": 1} count = md.count(old) @@ -737,7 +747,7 @@ async def do_manage_skills(content: str, owner: Optional[str] = None) -> Dict: except Exception as e: return {"error": f"Patched content is not valid SKILL.md: {e}", "exit_code": 1} sk_new.name = slugify(sk_new.name or name) - ok = sm.update_skill(name, _skill_dump(sk_new)) + ok = sm.update_skill(name, _skill_dump(sk_new), owner=owner) return {"results": f"Patched skill `{sk_new.name}`."} if ok else {"error": "Patch update failed", "exit_code": 1} if action == "publish": @@ -750,13 +760,13 @@ async def do_manage_skills(content: str, owner: Optional[str] = None) -> Dict: updates = {"status": "published"} if args.get("confidence") is not None: updates["confidence"] = max(0.0, min(1.0, float(args["confidence"]))) - sm.update_skill(name, updates) + sm.update_skill(name, updates, owner=owner) return {"results": f"✅ Published `{name}`. It now appears in the skills index for future turns."} if action == "delete": if not name: return {"error": "name is required for delete", "exit_code": 1} - ok = sm.delete_skill(name) + ok = sm.delete_skill(name, owner=owner) return {"results": f"Deleted skill `{name}`."} if ok else {"error": f"Skill {name!r} not found", "exit_code": 1} if action == "search": @@ -864,7 +874,9 @@ async def do_manage_tasks(content: str, owner: Optional[str] = None) -> Dict: ) task_id = str(_uuid.uuid4()) - name = args.get("name") or args.get("prompt", args.get("action_name", "Task"))[:50] + # Guard each fallback with `or`: args.get("prompt", default) returns + # None when the key is present but null, and None[:50] raises. + name = args.get("name") or (args.get("prompt") or args.get("action_name") or "Task")[:50] task = ScheduledTask( id=task_id, @@ -1167,7 +1179,17 @@ async def do_manage_mcp(content: str, owner: Optional[str] = None) -> Dict: try: srv = db2.query(McpServer).filter(McpServer.id == sid).first() if srv: - await mcp.connect_server(sid) + _args = json.loads(srv.args) if srv.args else [] + _env = json.loads(srv.env) if srv.env else {} + await mcp.connect_server( + server_id=sid, + name=srv.name, + transport=srv.transport, + command=srv.command, + args=_args, + env=_env, + url=srv.url, + ) st = mcp.get_server_status(sid) return {"response": f"Reconnected '{srv.name}' ({st.get('tool_count', 0)} tools)", "exit_code": 0} return {"error": f"Server {sid} not found", "exit_code": 1} @@ -1368,6 +1390,7 @@ async def do_manage_documents(content: str, owner: Optional[str] = None) -> Dict try: if action == "list": q = db.query(Document).filter(Document.is_active == True) + q = _owned_document_query(q, Document, owner) if args.get("search"): q = q.filter(Document.title.ilike(f"%{args['search']}%")) if args.get("language"): @@ -1398,7 +1421,7 @@ async def do_manage_documents(content: str, owner: Optional[str] = None) -> Dict doc_id = args.get("document_id") or args.get("id") or args.get("uid") if not doc_id: return {"error": "Need document_id (use action=list to find one)", "exit_code": 1} - doc = db.query(Document).filter(Document.id == doc_id, Document.is_active == True).first() + doc = _get_owned_document(db, Document, doc_id, owner, active_only=True) if not doc: return {"error": f"Document '{doc_id}' not found", "exit_code": 1} body = doc.current_content or "" @@ -1423,10 +1446,10 @@ async def do_manage_documents(content: str, owner: Optional[str] = None) -> Dict doc_id = args.get("document_id") or args.get("id") or args.get("uid") or _active_document_id doc = None if doc_id: - doc = db.query(Document).filter(Document.id == doc_id).first() + doc = _get_owned_document(db, Document, doc_id, owner) if not doc: # Fallback: most recently updated doc (likely what the user means) - doc = db.query(Document).filter(Document.is_active == True).order_by(Document.updated_at.desc()).first() + doc = _most_recent_owned_document(db, Document, owner, active_only=True) if not doc: return {"error": "No document to delete", "exit_code": 1} title = doc.title @@ -1478,7 +1501,14 @@ async def do_manage_settings(content: str, owner: Optional[str] = None) -> Dict: "tavily_api_key", "serper_api_key", "app_public_url", } def _is_secret(k): - return k in _SECRET_KEYS or any(t in k for t in ("api_key", "_key", "token", "secret", "password")) + # `token` must be a suffix, not a substring: otherwise the int + # setting `agent_input_token_budget` (which even has a "token budget" + # alias to set it from chat) is wrongly classified as a credential. + return ( + k in _SECRET_KEYS + or k.endswith("token") + or any(t in k for t in ("api_key", "_key", "secret", "password")) + ) # Friendly aliases → real keys, so natural phrasing resolves. _ALIASES_SET = { @@ -1499,9 +1529,14 @@ async def do_manage_settings(content: str, owner: Optional[str] = None) -> Dict: "image gen": "image_gen_enabled", "image generation": "image_gen_enabled", "reminder channel": "reminder_channel", "reminders": "reminder_channel", "ntfy topic": "reminder_ntfy_topic", + "webhook integration": "reminder_webhook_integration_id", + "webhook template": "reminder_webhook_payload_template", "webhook payload": "reminder_webhook_payload_template", "agent tool calls": "agent_max_tool_calls", "max tool calls": "agent_max_tool_calls", "agent timeout": "agent_stream_timeout_seconds", "stream timeout": "agent_stream_timeout_seconds", - "token budget": "agent_input_token_budget", + "token budget": "agent_input_token_budget", "input budget": "agent_input_token_budget", + "hard max": "agent_input_token_hard_max", + "token budget cap": "agent_input_token_hard_max", + "input budget cap": "agent_input_token_hard_max", } def _resolve(k): k2 = (k or "").strip().lower() @@ -1511,7 +1546,7 @@ async def do_manage_settings(content: str, owner: Optional[str] = None) -> Dict: _ENUMS = { "image_quality": ["low", "medium", "high"], - "reminder_channel": ["browser", "email", "ntfy"], + "reminder_channel": ["browser", "email", "ntfy", "webhook"], } def _coerce(value, default): if isinstance(default, bool): @@ -1784,6 +1819,22 @@ async def do_manage_notes(content: str, owner: Optional[str] = None) -> Dict: text = re.sub(r"^\s*reminder\s*:\s*", "", text) return re.sub(r"\s+", " ", text) + def _note_visible_to_owner(note, owner_value: Optional[str]) -> bool: + # Empty owner_value is single-user / auth-disabled mode. A real + # authenticated owner must match exactly; null/empty legacy rows are not + # shared between accounts. + if not owner_value: + return True + return getattr(note, "owner", None) == owner_value + + def _note_by_prefix(note_id: str): + if not note_id: + return None + q = db.query(Note).filter(Note.id.startswith(note_id)) + if owner: + q = q.filter(Note.owner == owner) + return q.first() + try: if action == "list": q = db.query(Note) @@ -1828,7 +1879,13 @@ async def do_manage_notes(content: str, owner: Optional[str] = None) -> Dict: title = text_raw.strip() elif not content_raw and text_raw: content_raw = text_raw - items_raw = args.get("items") + # Accept both `items` (legacy/internal field) and `checklist_items` + # (the schema-exposed name used by native function calls). Models + # following the schema emit `checklist_items`; older code paths + # and direct API callers still use `items`. + items_raw = args.get("checklist_items") + if items_raw is None: + items_raw = args.get("items") items_json = json.dumps(items_raw) if items_raw is not None else None note_type = args.get("note_type", "checklist" if items_raw else "note") # Accept natural-language due_date ("tomorrow at 1pm") in @@ -1881,20 +1938,48 @@ async def do_manage_notes(content: str, owner: Optional[str] = None) -> Dict: ) db.add(note) db.commit() - return {"response": f"Note created: \"{title or '(untitled)'}\" (id: {note.id[:8]})", "exit_code": 0} + # Return note_id so the chat-side renderer can build a real + # "View note" button that opens the notes modal at this id. + # Previously the create response only included a prose + # confirmation; the model would type "View note" as a markdown + # link with no target, leaving the user with a click that + # did nothing and uncertainty about whether the note was made. + return { + "response": f"Note created: \"{title or '(untitled)'}\" (id: {note.id[:8]})", + "note_id": note.id, + "note_title": title or "", + "open_url": f"/#open=notes¬e={note.id}", + "exit_code": 0, + } elif action == "update": note_id = args.get("id", "") - note = db.query(Note).filter(Note.id.startswith(note_id)).first() if note_id else None + note = _note_by_prefix(note_id) if not note: return {"error": f"Note '{note_id}' not found", "exit_code": 1} - if owner is not None and note.owner and note.owner != owner: + if not _note_visible_to_owner(note, owner): return {"error": "Note not found", "exit_code": 1} - for field in ("title", "content", "note_type", "color", "label", "due_date"): + for field in ("title", "content", "note_type", "color", "label"): if field in args and args[field] is not None: setattr(note, field, args[field]) - if "items" in args and args["items"] is not None: - note.items = json.dumps(args["items"]) + # Parse due_date the same way the `add` action does. The schema + # advertises natural language ("tomorrow at 9am"), and naive ISO + # strings need the user's tz offset attached so the frontend's + # `new Date()` resolves the right absolute moment. Storing the raw + # value here left updated reminders as unparseable literals that + # never fired. + if args.get("due_date") is not None: + due_raw = args["due_date"] + try: + from routes.calendar_routes import parse_due_for_user as _pdt_user + note.due_date = _pdt_user(due_raw) + except Exception: + note.due_date = due_raw # fall through; trust the model + new_items = args.get("checklist_items") + if new_items is None: + new_items = args.get("items") + if new_items is not None: + note.items = json.dumps(new_items) flag_modified(note, "items") if "pinned" in args: note.pinned = args["pinned"] @@ -1905,10 +1990,10 @@ async def do_manage_notes(content: str, owner: Optional[str] = None) -> Dict: elif action == "delete": note_id = args.get("id", "") - note = db.query(Note).filter(Note.id.startswith(note_id)).first() if note_id else None + note = _note_by_prefix(note_id) if not note: return {"error": f"Note '{note_id}' not found", "exit_code": 1} - if owner is not None and note.owner and note.owner != owner: + if not _note_visible_to_owner(note, owner): return {"error": "Note not found", "exit_code": 1} title = note.title db.delete(note) @@ -1918,10 +2003,10 @@ async def do_manage_notes(content: str, owner: Optional[str] = None) -> Dict: elif action == "toggle_item": note_id = args.get("id", "") index = args.get("index", 0) - note = db.query(Note).filter(Note.id.startswith(note_id)).first() if note_id else None + note = _note_by_prefix(note_id) if not note: return {"error": f"Note '{note_id}' not found", "exit_code": 1} - if owner is not None and note.owner and note.owner != owner: + if not _note_visible_to_owner(note, owner): return {"error": "Note not found", "exit_code": 1} if not note.items: return {"error": "Note has no checklist items", "exit_code": 1} @@ -2033,6 +2118,13 @@ async def do_manage_calendar(content: str, owner: Optional[str] = None) -> Dict: """Parse agent event datetimes in the user's timezone when available.""" return _parse_dt_pair(parse_due_for_user(raw)) + def _first_nonempty_arg(*names: str): + for name in names: + value = args.get(name) + if value not in (None, ""): + return value + return None + def _create_calendar_reminder(summary: str, location: str, dtstart: datetime, all_day: bool, minutes_before: int, is_utc: bool = False) -> tuple[Optional[str], Optional[str]]: @@ -2090,12 +2182,18 @@ async def do_manage_calendar(content: str, owner: Optional[str] = None) -> Dict: elif action == "list_events": try: - if args.get("start"): - start_dt = _parse_dt(args["start"]) + start_raw = _first_nonempty_arg( + "start", "start_date", "range_start", "from", "dtstart", "since" + ) + end_raw = _first_nonempty_arg( + "end", "end_date", "range_end", "to", "dtend", "until" + ) + if start_raw: + start_dt = _parse_dt(start_raw) else: start_dt = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0) - if args.get("end"): - end_dt = _parse_dt(args["end"]) + if end_raw: + end_dt = _parse_dt(end_raw) else: end_dt = start_dt + timedelta(days=14) except ValueError as e: @@ -2331,9 +2429,17 @@ async def do_manage_calendar(content: str, owner: Optional[str] = None) -> Dict: if args.get("location") is not None: ev.location = args["location"] if args.get("dtstart") is not None: - ev.dtstart = _parse_dt(args["dtstart"]) + # Anchor naive/natural-language input to the USER's timezone and + # refresh is_utc, exactly like create_event. Parsing with the + # raw server-local _parse_dt here (and never touching is_utc) + # silently shifted an updated event by the user's UTC offset. + _eff_all_day = ( + args["all_day"] if args.get("all_day") is not None else ev.all_day + ) + ev.dtstart, _su = _parse_event_dt(args["dtstart"]) + ev.is_utc = bool(_su and not _eff_all_day) if args.get("dtend") is not None: - ev.dtend = _parse_dt(args["dtend"]) + ev.dtend, _eu = _parse_event_dt(args["dtend"]) if args.get("all_day") is not None: ev.all_day = args["all_day"] # Tag/category + importance updates (any of these aliases). @@ -2377,10 +2483,12 @@ async def do_manage_calendar(content: str, owner: Optional[str] = None) -> Dict: # ── Cookbook tools ── -# Cookbook routes loopback. The agent's tool calls run in-process but -# need to reach admin-gated cookbook routes; we ride the per-process -# internal token so require_admin lets us through. See core/middleware.py. -_COOKBOOK_BASE = "http://localhost:7000" +# In-process loopback base for agent tools that call Odysseus's own API +# (cookbook state, model serve, gallery, email, calendar). We ride the +# per-process internal token so require_admin lets us through. See +# core/middleware.py. Resolution (override / APP_PORT / 7000) lives in +# core.constants.internal_api_base(). +_INTERNAL_BASE = internal_api_base() def _internal_headers(owner: Optional[str] = None) -> Dict[str, str]: @@ -2399,7 +2507,7 @@ async def _cookbook_servers() -> Dict[str, Any]: import httpx try: async with httpx.AsyncClient(timeout=10) as client: - r = await client.get(f"{_COOKBOOK_BASE}/api/cookbook/state", headers=_internal_headers()) + r = await client.get(f"{_INTERNAL_BASE}/api/cookbook/state", headers=_internal_headers()) state = r.json() if r.headers.get("content-type", "").startswith("application/json") else {} except Exception: return {"default_host": "", "hosts": []} @@ -2465,7 +2573,7 @@ async def _cookbook_env_for_host(host: str) -> Dict[str, Any]: state: Dict[str, Any] = {} try: async with httpx.AsyncClient(timeout=10) as client: - r = await client.get(f"{_COOKBOOK_BASE}/api/cookbook/state", headers=headers) + r = await client.get(f"{_INTERNAL_BASE}/api/cookbook/state", headers=headers) state = r.json() if r.headers.get("content-type", "").startswith("application/json") else {} except Exception as e: logger.debug(f"cookbook env lookup failed for host={host!r}: {e}") @@ -2504,6 +2612,8 @@ async def _cookbook_env_for_host(host: str) -> Dict[str, Any]: return { "env_prefix": env_prefix, + "env_type": env_kind, + "env_path": env_path, "gpus": env_root.get("gpus") or "", "platform": platform, "hf_token": env_root.get("hfToken") or "", @@ -2523,7 +2633,7 @@ async def _cookbook_register_task(session_id: str, model: str, host: str, headers = _internal_headers() try: async with httpx.AsyncClient(timeout=10) as client: - r = await client.get(f"{_COOKBOOK_BASE}/api/cookbook/state", headers=headers) + r = await client.get(f"{_INTERNAL_BASE}/api/cookbook/state", headers=headers) state = r.json() if r.headers.get("content-type", "").startswith("application/json") else {} except Exception as e: logger.debug(f"cookbook state read failed: {e}") @@ -2545,7 +2655,7 @@ async def _cookbook_register_task(session_id: str, model: str, host: str, placeholder = ( f"Launched via agent — waiting for tmux output…\n" f" session: {session_id}\n" - f" target: {target}{cmd.split()[0] if cmd else ''}\n" + f" target: {target}{(cmd.split() or [''])[0] if cmd else ''}\n" f" cmd: {cmd[:200]}{'…' if len(cmd) > 200 else ''}" ) tasks.append({ @@ -2567,7 +2677,7 @@ async def _cookbook_register_task(session_id: str, model: str, host: str, state["tasks"] = tasks try: async with httpx.AsyncClient(timeout=10) as client: - r = await client.post(f"{_COOKBOOK_BASE}/api/cookbook/state", + r = await client.post(f"{_INTERNAL_BASE}/api/cookbook/state", json=state, headers=headers) return r.status_code < 400 except Exception as e: @@ -2576,26 +2686,32 @@ async def _cookbook_register_task(session_id: str, model: str, host: str, # Paths the generic `app_api` tool will refuse to call. Auth/token/user -# administration is too risky to route through an agent surface even -# when the agent is admin-context — accidental "delete account" -# style mistakes have permanent blast radius. +# administration and host shell execution are too risky to route through an +# agent surface even when the agent is admin-context; accidental account or +# command mistakes have permanent blast radius. _APP_API_BLOCKLIST_PREFIXES = ( - "/api/auth/", # login/logout/password - "/api/users/", # user CRUD - "/api/tokens/", # api token mgmt - "/api/admin/", # admin one-shots (wipe etc.) + "/api/auth", # login/logout/password + "/api/users", # user CRUD (bare /api/users list+create+delete must also block) + "/api/tokens", # api token mgmt (bare /api/tokens list+create must also block) + "/api/admin", # admin one-shots (wipe etc.) + "/api/shell", # host shell execution must stay behind named command tooling "/api/backup/restore", # destructive restore ) # (method, prefix) pairs to refuse specifically. Used for endpoints -# where GET is fine but writes are destructive — saw the agent wipe -# cookbook_state.json (presets + tasks) by POSTing {"tasks": []} to -# /api/cookbook/state, which overwrote the whole file. Use the -# dedicated preset/task tools instead. +# where GET is fine but writes are destructive or host-control shaped. +# Saw the agent wipe cookbook_state.json (presets + tasks) by POSTing +# {"tasks": []} to /api/cookbook/state, which overwrote the whole file. +# Use dedicated tools or UI flows instead. _APP_API_BLOCKLIST_METHOD_PATH = ( ("GET", "/api/email/accounts"), # owner-filtered in tool context; use list_email_accounts MCP tool ("POST", "/api/cookbook/state"), # whole-file overwrite — agent must use serve_preset/serve_model instead ("DELETE", "/api/cookbook/state"), + # Host-control routes: package install, engine rebuild, and process + # signalling should not be reachable through the generic API bridge. + ("POST", "/api/cookbook/packages/install"), + ("POST", "/api/cookbook/rebuild-engine"), + ("POST", "/api/cookbook/kill-pid"), # Use the named tools (download_model / serve_model) — they handle # host-name resolution, per-host env_prefix, AND register the task # in cookbook state so it shows in the UI + list_downloads. Hitting @@ -2620,7 +2736,7 @@ _APP_API_BLOCKLIST_METHOD_PATH = ( async def do_app_api(content: str, owner: Optional[str] = None) -> Dict: - """Generic loopback to any internal Odysseus API endpoint. Lets the + """Generic loopback to allowed internal Odysseus API endpoints. Lets the agent reach the full UI-button surface (cookbook, email, notes, calendar, skills, sessions, gallery, research, etc.) without us landing a named tool wrapper for every one. @@ -2634,7 +2750,8 @@ async def do_app_api(content: str, owner: Optional[str] = None) -> Dict: The `endpoints` action returns the OpenAPI surface (method + path + summary) so the agent can discover what's reachable. A blocklist - refuses auth/user/admin paths to keep blast radius bounded. + refuses sensitive auth/user/admin/shell paths and method-specific + host-control routes to keep blast radius bounded. """ import httpx try: @@ -2643,7 +2760,7 @@ async def do_app_api(content: str, owner: Optional[str] = None) -> Dict: return {"error": "Invalid JSON arguments", "exit_code": 1} action = (args.get("action") or "call").lower() - base = _COOKBOOK_BASE + base = _INTERNAL_BASE if action == "endpoints": # Fetch FastAPI's OpenAPI schema so the agent can discover any @@ -2694,7 +2811,7 @@ async def do_app_api(content: str, owner: Optional[str] = None) -> Dict: if not path.startswith("/"): path = "/" + path if any(path.startswith(p) for p in _APP_API_BLOCKLIST_PREFIXES): - return {"error": f"Path blocked for safety: {path}. Auth/user/admin endpoints are off-limits via app_api.", "exit_code": 1} + return {"error": f"Path blocked for safety: {path}. Sensitive endpoints are off-limits via app_api.", "exit_code": 1} method = (args.get("method") or "GET").upper() if method not in ("GET", "POST", "PUT", "PATCH", "DELETE"): @@ -2702,6 +2819,12 @@ async def do_app_api(content: str, owner: Optional[str] = None) -> Dict: if any(method == m and path.startswith(p) for m, p in _APP_API_BLOCKLIST_METHOD_PATH): if "/api/email/accounts" in path: return {"error": "Don't use /api/email/accounts via app_api — it is owner-filtered in tool context and may return empty. Use the `list_email_accounts` email tool, then pass `account` to list_emails/read_email.", "exit_code": 1} + if "/api/cookbook/packages/install" in path: + return {"error": "Don't POST /api/cookbook/packages/install via app_api — package installation is host code execution. Use the dedicated Cookbook dependency UI/flow instead.", "exit_code": 1} + if "/api/cookbook/rebuild-engine" in path: + return {"error": "Don't POST /api/cookbook/rebuild-engine via app_api — engine rebuild mutates local or remote host state. Use the dedicated Cookbook UI/flow instead.", "exit_code": 1} + if "/api/cookbook/kill-pid" in path: + return {"error": "Don't POST /api/cookbook/kill-pid via app_api — process signalling is host control. Use the dedicated Cookbook stop/diagnostic flow instead.", "exit_code": 1} if "/api/model/download" in path: return {"error": "Don't POST /api/model/download directly — use the `download_model` tool (it resolves the server name, sets the venv env_prefix, and registers the task so it shows in the UI).", "exit_code": 1} if "/api/model/serve" in path: @@ -2898,7 +3021,7 @@ async def do_download_model(content: str, owner: Optional[str] = None) -> Dict: if env_cfg.get("ssh_port"): payload["ssh_port"] = env_cfg["ssh_port"] try: async with httpx.AsyncClient(timeout=30) as client: - resp = await client.post(f"{_COOKBOOK_BASE}/api/model/download", + resp = await client.post(f"{_INTERNAL_BASE}/api/model/download", json=payload, headers=_internal_headers()) data = resp.json() if data.get("ok"): @@ -2942,6 +3065,31 @@ async def do_serve_model(content: str, owner: Optional[str] = None) -> Dict: # the UI uses. Without env_prefix, `vllm serve …` lands in a shell # without the user's venv and fails 'command not found'. env_cfg = await _cookbook_env_for_host(host) + # Rewrite bare `vllm` / `python3` leading tokens to the venv's absolute + # binary path when the target host has a venv configured. SSH non- + # interactive shells often leave ~/.local/bin ahead of the venv bin on + # PATH even with the venv activated, so `vllm serve` finds the wrong + # binary and crashes early (e.g. compute_89 torch ABI errors on an old + # user-site torch). This mirrors what static/js/cookbook.js does in + # _buildServeCmd for the UI launch path. + env_path = (env_cfg.get("env_path") or "").rstrip("/") + env_type = (env_cfg.get("env_type") or env_cfg.get("env") or "").lower() + if env_type == "venv" and env_path: + venv_bin = f"{env_path}/bin" + # Match the FIRST shell-token: skip leading KEY=VAL env-var prefixes + # (CUDA_VISIBLE_DEVICES=… VLLM_USE_FLASHINFER_SAMPLER=…) before the binary. + import re as _re3 + tokens = cmd.split() + idx = 0 + env_re = _re3.compile(r"^[A-Za-z_][A-Za-z0-9_]*=") + while idx < len(tokens) and env_re.match(tokens[idx]): + idx += 1 + if idx < len(tokens): + head = tokens[idx] + if head in ("vllm", "python3", "python"): + tokens[idx] = f"{venv_bin}/{head}" + cmd = " ".join(tokens) + payload["cmd"] = cmd if env_cfg.get("env_prefix"): payload["env_prefix"] = env_cfg["env_prefix"] if env_cfg.get("gpus"): payload["gpus"] = env_cfg["gpus"] if env_cfg.get("hf_token"): payload["hf_token"] = env_cfg["hf_token"] @@ -2949,7 +3097,7 @@ async def do_serve_model(content: str, owner: Optional[str] = None) -> Dict: if env_cfg.get("ssh_port"): payload["ssh_port"] = env_cfg["ssh_port"] try: async with httpx.AsyncClient(timeout=30) as client: - resp = await client.post(f"{_COOKBOOK_BASE}/api/model/serve", + resp = await client.post(f"{_INTERNAL_BASE}/api/model/serve", json=payload, headers=_internal_headers()) data = resp.json() if data.get("ok"): @@ -2960,7 +3108,19 @@ async def do_serve_model(content: str, owner: Optional[str] = None) -> Dict: ) note = "" if registered else " (state-write failed — task may not show in UI)" return {"output": f"Serving {repo_id} (session: {sid}){note}", "session_id": sid, "exit_code": 0} - return {"error": data.get("error", "Serve failed"), "exit_code": 1} + # FastAPI HTTPException puts the message under `detail`, not `error`. + # Surface BOTH so the agent sees "Invalid characters in cmd" (from + # _validate_serve_cmd rejecting `&&`/`source`/`cd`) instead of + # the generic "Serve failed", which leaves it with nothing to act on. + err_msg = data.get("error") or data.get("detail") or "Serve failed" + hint = "" + if isinstance(err_msg, str) and "cmd" in err_msg.lower(): + hint = (" — the cmd must START with an allowlisted binary " + "(vllm, python3, llama-server, ollama, sglang, lmdeploy, node, npx). " + "Do NOT prefix with `cd …`, `source …`, or chain with `&&`. " + "env_prefix (e.g. `source ~/qwen35-env/bin/activate`) is added " + "automatically from the host's saved venv settings.") + return {"error": f"{err_msg}{hint}", "exit_code": 1} except Exception as e: return {"error": str(e), "exit_code": 1} @@ -2977,7 +3137,7 @@ async def do_list_served_models(content: str, owner: Optional[str] = None) -> Di cookbook_tasks: List[Dict[str, Any]] = [] try: async with httpx.AsyncClient(timeout=15) as client: - resp = await client.get(f"{_COOKBOOK_BASE}/api/cookbook/tasks/status", + resp = await client.get(f"{_INTERNAL_BASE}/api/cookbook/tasks/status", headers=_internal_headers()) cookbook_tasks = (resp.json() or {}).get("tasks") or [] except Exception as e: @@ -3004,13 +3164,31 @@ async def do_list_served_models(content: str, owner: Optional[str] = None) -> Di "exit_code": 0, } + # Sort so the agent sees what's actually LIVE first. Stopped/error/ + # completed tasks are mostly historical noise — they shouldn't lead + # the list when something is genuinely serving. + _ORDER = { + "ready": 0, "running": 1, "loading": 1, "warming": 1, + "queued": 2, "starting": 2, + "error": 5, "crashed": 5, "failed": 5, + "stopped": 6, "killed": 6, "cancelled": 6, "canceled": 6, + "done": 7, "completed": 7, "finished": 7, + } + def _rank(t: Dict[str, Any]) -> int: + phase = (t.get("phase") or t.get("status") or "unknown").lower() + return _ORDER.get(phase, 3) + merged.sort(key=_rank) + cb_n = len(cookbook_tasks) ext_n = len(external) + live_n = sum(1 for t in merged if _rank(t) <= 2) header = [] if cb_n: header.append(f"{cb_n} cookbook-tracked") if ext_n: header.append(f"{ext_n} external") + if live_n: + header.insert(0, f"{live_n} LIVE") lines = [f"Running: {', '.join(header)}."] for t in merged: phase = t.get("phase") or t.get("status", "unknown") @@ -3037,8 +3215,20 @@ async def do_list_served_models(content: str, owner: Optional[str] = None) -> Di if t.get("status") == "error" and t.get("output_tail"): tail = str(t.get("output_tail") or "").strip() if tail: + # Prefer a window around a Python traceback if one exists, + # falling back to the last 30 lines. The previous 6-line + # tail showed only the post-crash bash prompt / neofetch + # banner ("Locale: C / Ubuntu_Odysseus ❯") — useless for + # diagnosis. The traceback we want is usually 50-200 lines + # earlier in the buffer. + _tail_lines = tail.splitlines() + _shown = _tail_lines[-30:] + for _i, _ln in enumerate(_tail_lines): + if "Traceback (most recent call last)" in _ln or "ERROR" in _ln or "Error:" in _ln: + _shown = _tail_lines[_i:_i + 40] + break lines.append(" recent log:") - for line in tail.splitlines()[-6:]: + for line in _shown: lines.append(f" {line[:220]}") if t.get("external") and t.get("cmdline_preview"): lines.append(f" cmd: {t['cmdline_preview']}") @@ -3066,7 +3256,7 @@ async def _cookbook_kill_session(session_id: str, *, remote_host: str = "", state: Dict[str, Any] = {} try: async with httpx.AsyncClient(timeout=10) as client: - resp = await client.get(f"{_COOKBOOK_BASE}/api/cookbook/state", headers=headers) + resp = await client.get(f"{_INTERNAL_BASE}/api/cookbook/state", headers=headers) state = resp.json() or {} except Exception as e: logger.debug(f"cookbook state lookup failed for {session_id}: {e}") @@ -3095,7 +3285,7 @@ async def _cookbook_kill_session(session_id: str, *, remote_host: str = "", try: async with httpx.AsyncClient(timeout=15) as client: - resp = await client.post(f"{_COOKBOOK_BASE}/api/shell/exec", + resp = await client.post(f"{_INTERNAL_BASE}/api/shell/exec", json={"command": cmd}, headers=headers) if resp.status_code >= 400: return {"error": f"shell/exec returned HTTP {resp.status_code}: {resp.text[:200]}", "exit_code": 1} @@ -3116,7 +3306,7 @@ async def _cookbook_kill_session(session_id: str, *, remote_host: str = "", try: matched["status"] = "stopped" async with httpx.AsyncClient(timeout=10) as client: - await client.post(f"{_COOKBOOK_BASE}/api/cookbook/state", + await client.post(f"{_INTERNAL_BASE}/api/cookbook/state", json=state, headers=headers) except Exception as e: logger.debug(f"failed to mark {session_id} stopped in state: {e}") @@ -3144,12 +3334,131 @@ async def do_stop_served_model(content: str, owner: Optional[str] = None) -> Dic ) +async def do_tail_serve_output(content: str, owner: Optional[str] = None) -> Dict: + """Capture the last N lines of a cookbook task's tmux pane — remote-aware. + + Used by the agent to debug a failed/stuck serve: list_served_models tells + you the task is `crashed`, this tool returns the actual stderr/traceback + so the agent can match it against a known fix (compute_89 nvcc mismatch, + flashinfer version mismatch, OOM, missing kernels, etc.) and decide + whether to relaunch via serve_model with new flags. + """ + import httpx + import shlex + try: + args = _parse_tool_args(content) + except ValueError: + return {"error": "Invalid JSON arguments", "exit_code": 1} + session_id = (args.get("session_id") or "").strip() + if not session_id: + return {"error": "session_id is required (from list_served_models)", "exit_code": 1} + import re as _re + if not _re.fullmatch(r"[a-zA-Z0-9_-]+", session_id): + return {"error": "Invalid session_id format", "exit_code": 1} + try: + tail = int(args.get("tail") or 400) + except (TypeError, ValueError): + tail = 400 + tail = max(20, min(tail, 4000)) + headers = _internal_headers() + remote = (args.get("remote_host") or args.get("host") or "").strip() + sport = (args.get("ssh_port") or "").strip() + # Resolve host from cookbook state if caller didn't pass one — same + # lookup _cookbook_kill_session uses. + if not remote: + state: Dict[str, Any] = {} + try: + async with httpx.AsyncClient(timeout=10) as client: + resp = await client.get(f"{_INTERNAL_BASE}/api/cookbook/state", headers=headers) + state = resp.json() or {} + except Exception as e: + logger.debug(f"cookbook state lookup failed for {session_id}: {e}") + if isinstance(state, dict): + for t in (state.get("tasks") or []): + if isinstance(t, dict) and (t.get("sessionId") == session_id or t.get("id") == session_id): + remote = t.get("remoteHost") or "" + if not sport: + sport = t.get("sshPort") or "" + break + # Prefer the persisted /tmp/odysseus-tmux/SESSION.log file over the + # live tmux pane. The pane is what the user would see scrolling on + # their screen — including the post-crash neofetch banner and the + # idle bash prompt that overwrites the actual traceback the moment + # vllm exits. The log file is the raw stdout/stderr of the wrapped + # process and survives the crash unchanged. We only fall back to + # the pane when the log file doesn't exist (older sessions launched + # before the tmux+tee wrapper was added). + log_path = f"/tmp/odysseus-tmux/{session_id}.log" + pane_inner = f"tmux capture-pane -t {shlex.quote(session_id)} -p -S -{tail} 2>/dev/null" + file_inner = f"tail -n {tail} {shlex.quote(log_path)} 2>/dev/null" + inner = ( + f"if [ -s {shlex.quote(log_path)} ]; then {file_inner}; " + f"else {pane_inner}; fi" + ) + if remote: + _pf = f"-p {shlex.quote(str(sport))} " if sport and str(sport) != "22" else "" + cmd = ( + f"ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no " + f"{_pf}{shlex.quote(remote)} {shlex.quote(inner)}" + ) + host_label = remote + else: + cmd = inner + host_label = "local" + try: + async with httpx.AsyncClient(timeout=20) as client: + resp = await client.post(f"{_INTERNAL_BASE}/api/shell/exec", + json={"command": cmd}, headers=headers) + if resp.status_code >= 400: + return {"error": f"shell/exec returned HTTP {resp.status_code}: {resp.text[:200]}", "exit_code": 1} + data = resp.json() if resp.content else {} + output_text = (data.get("stdout") or "").strip() + stderr_text = (data.get("stderr") or "").strip() + rc = data.get("exit_code") + if rc not in (None, 0) and not output_text: + already_gone = any(s in (stderr_text or "").lower() for s in ("no server running", "can't find session", "session not found")) + if already_gone: + return {"output": f"Tmux session {session_id} on {host_label} is gone (task already exited).", "exit_code": 0, "session_id": session_id, "host": host_label} + return {"error": f"capture-pane failed on {host_label}: {stderr_text or f'exit {rc}'}", "exit_code": 1} + # Dedupe download-progress noise. A 100-shard HF download produces + # tens of thousands of `model-NN-of-MM.safetensors: 91%|...` lines + # that all look the same to the agent and drown the actual error. + # Keep only one sample per (file, decile-percent) bucket. + import re as _re2 + lines = output_text.splitlines() + dedup_lines = [] + seen_progress = set() + progress_re = _re2.compile(r"^([\w./\-]+):\s+(\d+)%") + for ln in lines: + m = progress_re.match(ln.strip()) + if m: + key = (m.group(1), int(m.group(2)) // 10) # bucket by 10% + if key in seen_progress: + continue + seen_progress.add(key) + dedup_lines.append(ln) + output_text = "\n".join(dedup_lines) + # Hard cap so the agent doesn't blow its token budget. + MAX_CHARS = 8000 + if len(output_text) > MAX_CHARS: + output_text = "…(earlier output truncated)…\n" + output_text[-MAX_CHARS:] + return { + "output": output_text or "(empty pane)", + "session_id": session_id, + "host": host_label, + "tail_lines": tail, + "exit_code": 0, + } + except Exception as e: + return {"error": str(e), "exit_code": 1} + + async def do_list_downloads(content: str, owner: Optional[str] = None) -> Dict: """List in-flight model downloads (filters /api/cookbook/tasks/status to type=download).""" import httpx try: async with httpx.AsyncClient(timeout=15) as client: - resp = await client.get(f"{_COOKBOOK_BASE}/api/cookbook/tasks/status", + resp = await client.get(f"{_INTERNAL_BASE}/api/cookbook/tasks/status", headers=_internal_headers()) data = resp.json() tasks = [t for t in data.get("tasks", []) if (t.get("type") or "").lower() == "download"] @@ -3200,7 +3509,7 @@ async def do_search_hf_models(content: str, owner: Optional[str] = None) -> Dict params["limit"] = str(limit) try: async with httpx.AsyncClient(timeout=30) as client: - resp = await client.get(f"{_COOKBOOK_BASE}/api/cookbook/hf-latest", + resp = await client.get(f"{_INTERNAL_BASE}/api/cookbook/hf-latest", params=params, headers=_internal_headers()) data = resp.json() models = data.get("models") if isinstance(data, dict) else data @@ -3266,7 +3575,7 @@ async def do_adopt_served_model(content: str, owner: Optional[str] = None) -> Di check = f"tmux has-session -t {shlex.quote(sess)} 2>&1" try: async with httpx.AsyncClient(timeout=10) as client: - r = await client.post(f"{_COOKBOOK_BASE}/api/shell/exec", + r = await client.post(f"{_INTERNAL_BASE}/api/shell/exec", json={"command": check}, headers=headers) data = r.json() if r.headers.get("content-type", "").startswith("application/json") else {} if r.status_code >= 400 or (data.get("exit_code") not in (None, 0)): @@ -3283,7 +3592,7 @@ async def do_adopt_served_model(content: str, owner: Optional[str] = None) -> Di server_up = False try: async with httpx.AsyncClient(timeout=10) as client: - r = await client.post(f"{_COOKBOOK_BASE}/api/shell/exec", + r = await client.post(f"{_INTERNAL_BASE}/api/shell/exec", json={"command": health_cmd}, headers=headers) body = (r.json() or {}).get("stdout", "") if r.headers.get("content-type", "").startswith("application/json") else "" server_up = '"data"' in body or '"object"' in body @@ -3294,7 +3603,7 @@ async def do_adopt_served_model(content: str, owner: Optional[str] = None) -> Di # overwrite the whole file (that'd nuke presets). try: async with httpx.AsyncClient(timeout=10) as client: - r = await client.get(f"{_COOKBOOK_BASE}/api/cookbook/state", headers=headers) + r = await client.get(f"{_INTERNAL_BASE}/api/cookbook/state", headers=headers) state = r.json() if r.headers.get("content-type", "").startswith("application/json") else {} except Exception as e: return {"error": f"could not read cookbook state: {e}", "exit_code": 1} @@ -3330,7 +3639,7 @@ async def do_adopt_served_model(content: str, owner: Optional[str] = None) -> Di state["tasks"] = tasks try: async with httpx.AsyncClient(timeout=10) as client: - await client.post(f"{_COOKBOOK_BASE}/api/cookbook/state", + await client.post(f"{_INTERNAL_BASE}/api/cookbook/state", json=state, headers=headers) except Exception as e: return {"error": f"could not save cookbook state: {e}", "exit_code": 1} @@ -3407,7 +3716,7 @@ async def do_list_serve_presets(content: str, owner: Optional[str] = None) -> Di import httpx try: async with httpx.AsyncClient(timeout=10) as client: - resp = await client.get(f"{_COOKBOOK_BASE}/api/cookbook/state", + resp = await client.get(f"{_INTERNAL_BASE}/api/cookbook/state", headers=_internal_headers()) state = resp.json() or {} except Exception as e: @@ -3455,7 +3764,7 @@ async def do_serve_preset(content: str, owner: Optional[str] = None) -> Dict: try: async with httpx.AsyncClient(timeout=10) as client: - resp = await client.get(f"{_COOKBOOK_BASE}/api/cookbook/state", + resp = await client.get(f"{_INTERNAL_BASE}/api/cookbook/state", headers=_internal_headers()) state = resp.json() or {} except Exception as e: @@ -3499,7 +3808,7 @@ async def do_serve_preset(content: str, owner: Optional[str] = None) -> Dict: try: async with httpx.AsyncClient(timeout=30) as client: - resp = await client.post(f"{_COOKBOOK_BASE}/api/model/serve", + resp = await client.post(f"{_INTERNAL_BASE}/api/model/serve", json=payload, headers=_internal_headers()) data = resp.json() if data.get("ok"): @@ -3516,38 +3825,133 @@ async def do_serve_preset(content: str, owner: Optional[str] = None) -> Dict: async def do_list_cached_models(content: str, owner: Optional[str] = None) -> Dict: - """List models already cached locally (or on a remote host).""" + """List models already cached locally and/or on remote hosts. + + With no `host` arg, scans EVERY configured Cookbook server (and local) + and aggregates — so the agent sees the full inventory in one call + instead of having to query each server individually. + """ import httpx try: args = _parse_tool_args(content) if content.strip() else {} except ValueError: return {"error": "Invalid JSON arguments", "exit_code": 1} - params: Dict[str, str] = {} raw_host = (args.get("host") or "").strip() - host = await _resolve_cookbook_host(raw_host) if raw_host else "" - if host: - params["host"] = host - if args.get("model_dir"): - params["model_dir"] = args["model_dir"] - if args.get("ssh_port"): - params["ssh_port"] = str(args["ssh_port"]) - if args.get("platform"): - params["platform"] = args["platform"] + headers = _internal_headers() + + async def _scan_one(host_label: str, host_val: str, ssh_port: str = "", + platform: str = "", model_dir: str = "") -> list: + """Hit /api/model/cached for one host; tag each returned model with its source.""" + p: Dict[str, str] = {} + if host_val: + p["host"] = host_val + # Caller-provided override beats per-server config beats nothing. + if args.get("model_dir"): + p["model_dir"] = args["model_dir"] + elif model_dir: + p["model_dir"] = model_dir + if ssh_port: + p["ssh_port"] = ssh_port + elif args.get("ssh_port"): + p["ssh_port"] = str(args["ssh_port"]) + if platform: + p["platform"] = platform + elif args.get("platform"): + p["platform"] = args["platform"] + try: + async with httpx.AsyncClient(timeout=60) as client: + resp = await client.get(f"{_INTERNAL_BASE}/api/model/cached", + params=p, headers=headers) + data = resp.json() + ms = data.get("models", []) if isinstance(data, dict) else (data or []) + for m in ms: + m["host"] = host_label or "local" + return ms or [] + except Exception as e: + logger.debug(f"list_cached_models scan({host_label}) failed: {e}") + return [] + + # When the caller specifies a host explicitly, scan only that one (old behaviour). + # Otherwise iterate every configured server + local so the agent doesn't + # have to repeat the call per server. try: - async with httpx.AsyncClient(timeout=60) as client: - resp = await client.get(f"{_COOKBOOK_BASE}/api/model/cached", - params=params, headers=_internal_headers()) - data = resp.json() - models = data.get("models", []) if isinstance(data, dict) else data + # Pull configured servers from cookbook state (used for resolving + # modelDirs both when caller specifies a host and when we scan all). + servers: list = [] + try: + async with httpx.AsyncClient(timeout=10) as client: + st = await client.get(f"{_INTERNAL_BASE}/api/cookbook/state", headers=headers) + st_data = st.json() if st.headers.get("content-type", "").startswith("application/json") else {} + servers = (st_data.get("env", {}) or {}).get("servers") or [] + except Exception as e: + logger.debug(f"server list fetch failed: {e}") + st_data = {} + + def _dirs_for(server_record: Dict[str, Any]) -> str: + """Comma-joined modelDirs from a saved server record (Settings). + + Filters out the HF cache (~/.cache/huggingface/hub) — the backend + scan script always scans it by default, so re-passing it as an + extra model_dir is redundant AND confuses some path-handling + edge cases where the extra dir suppresses the deeper scan. + We only need to forward the NON-default dirs (e.g. /mnt/HADES/models). + """ + mds = server_record.get("modelDirs") if isinstance(server_record, dict) else None + HF_DEFAULTS = {"~/.cache/huggingface/hub", "~/.cache/huggingface"} + if isinstance(mds, list): + extras = [d for d in mds if isinstance(d, str) and d.strip() and d.strip() not in HF_DEFAULTS] + return ",".join(extras) + if isinstance(mds, str) and mds.strip() not in HF_DEFAULTS: + return mds + return "" + + if raw_host: + host = await _resolve_cookbook_host(raw_host) + # Find this host's saved record so its modelDirs apply too. + srv = next( + (s for s in servers if isinstance(s, dict) + and (s.get("name") == raw_host or s.get("host") == host or s.get("host") == raw_host)), + {}, + ) + models = await _scan_one(raw_host, host, model_dir=_dirs_for(srv)) + else: + # Always include local. Local's saved record is the one with no host. + local_srv = next((s for s in servers if isinstance(s, dict) and not (s.get("host") or "").strip()), {}) + scans: list = [_scan_one("local", "", model_dir=_dirs_for(local_srv))] + for s in servers: + if not isinstance(s, dict): + continue + name = s.get("name") or s.get("host") + host_val = s.get("host") or "" + if not host_val: + continue + scans.append(_scan_one( + name, + host_val, + ssh_port=str(s.get("port") or ""), + platform=s.get("platform") or "", + model_dir=_dirs_for(s), + )) + results = await asyncio.gather(*scans, return_exceptions=False) + # Dedupe by (host, repo_id) — same model could appear in both HF cache + Ollama list. + seen = set() + models: list = [] + for batch in results: + for m in batch: + key = (m.get("host", ""), m.get("repo_id", "")) + if key in seen: + continue + seen.add(key) + models.append(m) if not models: - # Filesystem cache scans can miss models downloaded into the HF - # default cache when the server has no explicit model_dir configured. - # Still surface completed Cookbook downloads so the agent doesn't - # incorrectly assume a model is absent and re-download it. + # Cache scans can miss models downloaded into the HF default cache + # when the server has no explicit model_dir configured. Surface + # completed Cookbook download tasks so the agent doesn't conclude + # a model is absent and re-download it. downloaded = [] try: async with httpx.AsyncClient(timeout=10) as client: - st = await client.get(f"{_COOKBOOK_BASE}/api/cookbook/state", headers=_internal_headers()) + st = await client.get(f"{_INTERNAL_BASE}/api/cookbook/state", headers=headers) state = st.json() if st.headers.get("content-type", "").startswith("application/json") else {} for t in (state.get("tasks") or []): if not isinstance(t, dict) or t.get("type") != "download": @@ -3555,27 +3959,44 @@ async def do_list_cached_models(content: str, owner: Optional[str] = None) -> Di if (t.get("status") or "").lower() not in {"done", "completed"}: continue task_host = t.get("remoteHost") or (t.get("payload") or {}).get("remote_host") or "" - if host and task_host != host: + if raw_host and task_host != raw_host: continue repo = t.get("modelId") or t.get("repoId") or (t.get("payload") or {}).get("repo_id") or t.get("name") if repo and repo not in downloaded: downloaded.append(repo) except Exception: downloaded = [] + host_str = f" on {raw_host}" if raw_host else "" if downloaded: - host_str = f" on {raw_host or host}" if (raw_host or host) else "" lines = [f"No cache paths were detected{host_str}, but Cookbook has completed download task(s):"] lines.extend(f"- {repo} — downloaded via Cookbook task" for repo in downloaded) return {"output": "\n".join(lines), "models": [{"repo_id": repo, "source": "cookbook_task"} for repo in downloaded], "exit_code": 0} - host_str = f" on {raw_host or host}" if (raw_host or host) else "" return {"output": f"No cached models found{host_str}.", "exit_code": 0} - lines = [f"{len(models)} cached model(s):"] - for m in models: - name = m.get("repo_id", "?") - sz = m.get("size") or (f"{m.get('size_bytes', 0) / (1024**3):.1f}GB" if m.get("size_bytes") else "") - inc = " (incomplete)" if m.get("has_incomplete") else "" - kind = " [diffusion]" if m.get("is_diffusion") else "" - lines.append(f"- {name}{kind} — {sz}{inc}") + # Multi-host scan: group by host so the agent sees inventory per server. + # Single-host scan: flat list (matches old output shape). + if raw_host: + lines = [f"{len(models)} cached model(s) on {raw_host}:"] + for m in models: + name = m.get("repo_id", "?") + sz = m.get("size") or (f"{m.get('size_bytes', 0) / (1024**3):.1f}GB" if m.get("size_bytes") else "") + inc = " (incomplete)" if m.get("has_incomplete") else "" + kind = " [diffusion]" if m.get("is_diffusion") else "" + lines.append(f"- {name}{kind} — {sz}{inc}") + else: + from collections import defaultdict as _dd + by_host = _dd(list) + for m in models: + by_host[m.get("host", "local")].append(m) + lines = [f"{len(models)} cached model(s) across {len(by_host)} server(s):"] + for host_name in sorted(by_host.keys()): + lines.append(f"\n[{host_name}]") + for m in by_host[host_name]: + name = m.get("repo_id", "?") + sz = m.get("size") or (f"{m.get('size_bytes', 0) / (1024**3):.1f}GB" if m.get("size_bytes") else "") + inc = " (incomplete)" if m.get("has_incomplete") else "" + kind = " [diffusion]" if m.get("is_diffusion") else "" + backend = f" ({m.get('backend')})" if m.get("backend") else "" + lines.append(f"- {name}{kind}{backend} — {sz}{inc}") return {"output": "\n".join(lines), "models": models, "exit_code": 0} except Exception as e: return {"error": str(e), "exit_code": 1} @@ -3601,7 +4022,7 @@ async def do_edit_image(content: str, owner: Optional[str] = None) -> Dict: payload["scale"] = args["scale"] try: async with httpx.AsyncClient(timeout=120) as client: - resp = await client.post(f"http://localhost:7000/api/gallery/{action}", json=payload) + resp = await client.post(f"{_INTERNAL_BASE}/api/gallery/{action}", json=payload) data = resp.json() if data.get("success") or data.get("id"): return {"output": f"Image edited ({action}). New image ID: {data.get('id', '?')}", "exit_code": 0} @@ -3626,7 +4047,7 @@ async def do_manage_research(content: str, owner: Optional[str] = None) -> Dict: args = {} action = (args.get("action") or "list").lower() rid = (args.get("id") or args.get("session_id") or args.get("research_id") or "").strip() - data_dir = _Path("data/deep_research") + data_dir = _Path(DEEP_RESEARCH_DIR) # SECURITY: the research id is interpolated straight into a filesystem # path (data/deep_research/.json) for read AND delete. Without this @@ -3717,7 +4138,7 @@ async def do_trigger_research(content: str, owner: Optional[str] = None) -> Dict payload["search_provider"] = args["search_provider"] try: async with httpx.AsyncClient(timeout=30) as client: - resp = await client.post(f"{_COOKBOOK_BASE}/api/research/start", + resp = await client.post(f"{_INTERNAL_BASE}/api/research/start", json=payload, headers=_internal_headers(owner)) if resp.status_code >= 400: return {"error": f"research/start returned HTTP {resp.status_code}: {resp.text[:200]}", "exit_code": 1} @@ -3777,7 +4198,7 @@ async def do_resolve_contact(content: str, owner: Optional[str] = None) -> Dict: async with httpx.AsyncClient(timeout=30) as client: # 2. Email history (sent/received) try: - resp = await client.get("http://localhost:7000/api/email/resolve-contact", params={"name": name}) + resp = await client.get(f"{_INTERNAL_BASE}/api/email/resolve-contact", params={"name": name}) if resp.status_code == 200: for c in (resp.json().get("contacts") or []): email = (c.get("email") or "").strip().lower() @@ -3871,7 +4292,7 @@ async def do_manage_contact(content: str, owner: Optional[str] = None) -> Dict: def _load_vault_config() -> Dict: """Load Vaultwarden config from data/vault.json.""" from pathlib import Path - p = Path("data/vault.json") + p = Path(VAULT_FILE) if p.exists(): try: return json.loads(p.read_text(encoding="utf-8")) @@ -4013,7 +4434,9 @@ async def do_vault_unlock(content: str, owner: Optional[str] = None) -> Dict: if not master_password: return {"error": "master_password is required", "exit_code": 1} - stdout, stderr, rc = await _run_bw(["unlock", master_password, "--raw"]) + # Do not pass the master password as an argv element. Local process lists + # can expose argv to other users; stdin keeps the secret out of `ps`. + stdout, stderr, rc = await _run_bw(["unlock", "--raw"], input_text=master_password + "\n") if rc != 0: return {"error": f"Unlock failed: {stderr[:300]}", "exit_code": 1} @@ -4023,7 +4446,7 @@ async def do_vault_unlock(content: str, owner: Optional[str] = None) -> Dict: # Save session to vault.json from pathlib import Path - p = Path("data/vault.json") + p = Path(VAULT_FILE) cfg = {} if p.exists(): try: diff --git a/src/tool_index.py b/src/tool_index.py index f8e8faef7..3f8010801 100644 --- a/src/tool_index.py +++ b/src/tool_index.py @@ -12,6 +12,14 @@ import re import time from typing import Dict, List, Optional, Set +from src.embedding_lanes import ( + LANE_CUSTOM, + LANE_FASTEMBED, + build_embedding_lanes, + dedupe_results, + migrate_legacy_collection, +) + try: import numpy as np except ImportError: @@ -20,20 +28,20 @@ except ImportError: logger = logging.getLogger(__name__) # Tools that are ALWAYS included regardless of retrieval results. -# These are the most commonly needed and should never be missing. +# Keep this deliberately tiny. Domain tools (web, documents, email, +# cookbook/model serving, files, settings, etc.) are injected by retrieval or +# keyword intent so a trivial agent prompt like "test" does not carry every +# domain's schemas and rules. ALWAYS_AVAILABLE = frozenset({ - "bash", "python", "web_search", "web_fetch", "read_file", - "api_call", # For configured integrations (Miniflux, Gitea, Linkding, etc.) - # The two genuinely AMBIENT cookbook tools — "what's running" and - # "kill it" can be asked any time without prior cookbook context, - # and need to survive typos. The other cookbook tools (downloads, - # presets, serve, cached, servers) are CONTEXTUAL — they fire via - # keyword hints when the user is actually talking about cookbook. - # Keeping the always-on set small leaves room in the ~16-tool - # budget for manage_tasks / manage_calendar / etc. - "list_served_models", "stop_served_model", - # Generic API loopback — the catch-all when no named tool fits. - "app_api", + # Memory is ambient — "remember this" can follow any message regardless + # of topic. Without this, RAG drops it and the agent falls back to + # app_api /api/memory/add which fails with 422 on first attempt. + "manage_memory", + # Ask the user a multiple-choice question for a decision/clarification. + # Always reachable so the agent can pause and ask at any point. + "ask_user", + # Write back to the active plan (tick steps done / revise) during execution. + "update_plan", }) # Tools that the Personal Assistant always has access to during scheduled @@ -59,13 +67,17 @@ COLLECTION_NAME = "odysseus_tool_index" # Each tool gets a searchable description that helps retrieval. # These are richer than the system prompt one-liners — they're for embedding. BUILTIN_TOOL_DESCRIPTIONS: Dict[str, str] = { - "bash": "Run shell commands on the server. Install packages, check files, git operations, curl, system info, process management, networking.", - "python": "Execute Python code for computation, data processing, math, scripting, parsing, API calls. Not for writing code for the user.", - "web_search": "Quick single web lookup for a fact, current event, or doc mid-task. NOT for 'research X' / 'do research on X' requests — those are deep-research jobs (use trigger_research). web_search = one query; trigger_research = a full researched report in the sidebar.", + "bash": "Run shell commands on the server. Install packages, check files, git operations, system info, and process management. Do not use for web lookup/search; use web_search or web_fetch when web tools are available.", + "python": "Execute Python code for computation, data processing, math, scripting, and parsing. Not for writing code for the user. Do not use for web lookup/search; use web_search or web_fetch when web tools are available.", + "web_search": "Quick single web lookup for a fact, current event, latest/current information, or doc mid-task. Use this instead of bash/curl/python/requests for web searches. NOT for 'research X' / 'do research on X' requests — those are deep-research jobs (use trigger_research). web_search = one query; trigger_research = a full researched report in the sidebar.", "web_fetch": "Fetch and read the text content of a specific URL/website the user names (e.g. 'check example.com', 'open this link'). Use when you have a concrete URL; for open-ended lookups use web_search instead.", - "read_file": "Read a file from disk and return its contents. View source code, config files, logs.", - "write_file": "Write content to a file on disk. Create new files, save output, update configs.", - "create_document": "Create a new document in the editor panel. For code, articles, text content longer than 15 lines. Specify title, language, and content.", + "read_file": "Read a file from disk and return its contents. View source code, config files, logs. Supports an optional line range (offset/limit) for large files.", + "grep": "Search file CONTENTS for a regex across a directory tree (ripgrep-backed, honours .gitignore). Returns file:line:match. Use to find where code/symbols/strings live — prefer over bash grep.", + "glob": "Find FILES by glob pattern (e.g. '**/*.py'), newest first. Use to locate files by name/extension — prefer over bash find/ls.", + "ls": "List a directory's entries (folders then files with sizes). Use to see what's in a folder — prefer over bash ls.", + "write_file": "Write/create or fully rewrite a file ON DISK (source code, configs, project files). Use for new files or full rewrites — NOT create_document (editor panel) and NOT a bash heredoc.", + "edit_file": "Edit an existing file ON DISK by exact string replacement (fix a bug, change a function). Shows a diff. The tool for changing files on disk — NOT edit_document (editor panel) and NOT bash sed/heredoc.", + "create_document": "Create a new document in the editor panel. For code, articles, text content longer than 15 lines, unless an already-open document/email draft is the obvious target. If an email compose draft is open, edit that draft instead of creating another document.", "edit_document": "Preferred tool for editing an existing document — targeted find-and-replace. Use for any small change: add a function, fix a bug, tweak a section, rename things.", "update_document": "Replace the entire active document content. ONLY for full rewrites (>50% changed). Do not use for small edits — use edit_document instead.", "suggest_document": "Suggest changes to the active document with explanations. For code review, proofreading, feedback requests.", @@ -88,7 +100,9 @@ BUILTIN_TOOL_DESCRIPTIONS: Dict[str, str] = { "create_session": "Create a new chat with a name and model.", "list_sessions": "List all chats with their metadata (the UI calls these 'chats'). Use for 'list my chats', 'rename all my chats' (list first, then manage_session to rename each).", "send_to_session": "Send a message to another chat. Cross-chat communication.", - "search_chats": "Search through chat history across all sessions.", + "search_chats": "Search past session transcripts across chats.", + "ask_user": "Ask the user a multiple-choice question to get a decision or clarification. Use this when the task is genuinely ambiguous and the answer changes what you do next — pick between approaches, confirm an assumption, choose among options — instead of guessing. Provide a clear `question` and 2-6 `options` (each with a short `label`, optional `description`). Calling this ENDS your turn: the user sees clickable buttons and their choice arrives as your next message. Don't use it for things you can decide from context or sensible defaults, or for irreversible-action confirmation if a dedicated flow exists.", + "update_plan": "Write back to the ACTIVE PLAN while executing an approved plan: mark steps done or revise them. After finishing a step call this with the full checklist and that step marked done; when the user asks to change the plan call it with the revised checklist. Always pass the COMPLETE markdown checklist (`- [ ]` / `- [x]`), not a diff. The user's docked plan window updates live. No effect when there is no active plan.", "ui_control": "Control the UI and toggle tools on/off. Use this to turn off / turn on / disable / enable individual tools and features: shell (bash), search (web), research, browser, documents, incognito. Open panels (documents library, gallery, email inbox, sessions, notes, memories/brain, skills, settings, cookbook) via `open_panel `. Use `open_email_reply reply` to open an email reply draft document without sending. Also switches between chat/agent modes, changes the current model, and applies/creates themes.", "list_email_accounts": "List configured email accounts and default status. Use before reading or sending mail when the user mentions Gmail, work mail, custom domain mail, another mailbox, or asks to compare/check multiple inboxes.", "list_emails": "List emails for a folder/account, newest first, including read messages by default. Shows subject, sender, date, UID, account, and AI summary. Check inbox, find emails needing replies. Supports account from list_email_accounts for Gmail/work/custom mailboxes. For last/latest/newest email, use max_results=1 and unread_only=false.", @@ -102,11 +116,12 @@ BUILTIN_TOOL_DESCRIPTIONS: Dict[str, str] = { "resolve_contact": "Look up a contact's email address by name. Searches CardDAV address book and sent email history. Use when the user says 'message [name]', 'email [name]', or 'send to [name]' without an email address.", "manage_contact": "Create, update, delete, or list CardDAV contacts. Use to save a new contact, change an existing one's email/phone, or remove one. Action=list returns uids needed for update/delete. Use when the user says 'save this contact', 'add [name] to contacts', 'update [name]'s email', 'delete [name] from contacts'. Do not use for user identity facts like 'my name is '; those are memory.", "manage_notes": "Create and manage notes and checklists (Google Keep-style). ALWAYS use this for note/todo/checklist/reminder creation — NEVER hit /api/notes via app_api. Accepts natural-language `due_date` like 'tomorrow at 9am' or '11pm today' (parsed in the USER'S timezone). The due_date IS the reminder — it fires a notification at that time, so do NOT also create a calendar event for the same reminder. Set colors, labels, pin, archive. Do NOT use manage_memory for note content.", - "manage_calendar": "Calendar event management: list, create, update, delete. Each event can carry a tag/category (event_type — work/personal/health/travel/meal/social/admin/other) and importance (low/normal/high/critical). Use ISO datetimes; supports all-day events. For event reminders/alarms, pass reminder_minutes; this creates the Notes reminder, so do not also call manage_notes for the same reminder.", + "manage_calendar": "Calendar event management: list, create, update, delete. Each event can carry a tag/category (event_type — work/personal/health/travel/meal/social/admin/other) and importance (low/normal/high/critical). Resolve today/tomorrow using the Current date and time context, then use ISO datetimes in the user's local wall time; supports all-day events. For event reminders/alarms, pass reminder_minutes; this creates the Notes reminder, so do not also call manage_notes for the same reminder.", "download_model": "Download a HuggingFace model to a local or remote server. Specify repo_id (e.g. 'Qwen/Qwen3-8B'), optional server host, and optional include filter for specific files.", - "serve_model": "Start serving a model with vLLM, SGLang, llama.cpp, Ollama, or Diffusers. For image/inpainting/diffusion use python3 scripts/diffusion_server.py --model --port 8100. After launch, call list_served_models for readiness/errors and retry suggestions.", + "serve_model": "Start serving a model with vLLM, SGLang, llama.cpp, Ollama, or Diffusers. cmd MUST start with the binary directly — e.g. `vllm serve /mnt/HADES/models/Qwen3.5-397B-A17B-AWQ --port 8003 --tensor-parallel-size 8 …`. NEVER prefix with `cd …`, `source …`, or chain with `&&`/`||` — those get rejected by the validator. The venv activation (env_prefix) and CUDA env are added automatically from the target host's saved settings. For image/inpainting/diffusion use python3 scripts/diffusion_server.py --model --port 8100. After launch, call list_served_models for readiness/errors and retry suggestions. If serve_model fails with 'Invalid characters in cmd', simplify to the bare binary + args.", "list_served_models": "List currently running model servers in the Cookbook — shows status (loading, ready, idle, error), model name, port, throughput, and serve failure diagnosis/retry suggestions. Use when the user asks 'what's running', 'show my cookbook', 'which models are up', 'what's serving'.", "stop_served_model": "Stop a running model server in the Cookbook by session ID or model name. Use when the user says 'kill my cookbook', 'stop the model', 'kill the serve', 'shut down vLLM', 'cancel the running model'.", + "tail_serve_output": "Read the actual tmux stderr/traceback of a cookbook serve/download task. Use to debug WHY a task is `crashed`/`error` (compute_89 nvcc mismatch, OOM, missing kernels, wrong attention backend, etc.) so you can call serve_model with adjusted flags. Pass session_id from list_served_models; tail defaults to 300, bump if the error references 'see root cause above'.", "list_downloads": "List in-progress HuggingFace model downloads in the Cookbook. Shows model name, phase, percent, session ID. Use for 'what's downloading', 'show my downloads', 'check download progress'.", "cancel_download": "Cancel an in-progress model download by tmux session ID. Use for 'cancel the download', 'stop downloading X', 'kill the download'. Call list_downloads first to get the session_id.", "search_hf_models": "Search HuggingFace for models matching a query (e.g. 'qwen 8B', 'flux', 'llama-3 instruct'). Returns ranked repo IDs with sizes and download counts. Use for 'find a model', 'search huggingface for X', 'what models are there for Y'.", @@ -115,7 +130,7 @@ BUILTIN_TOOL_DESCRIPTIONS: Dict[str, str] = { "serve_preset": "Launch a saved Cookbook serve preset by name. Reuses the exact tmux command + host the user already saved. Use for 'run stable diffusion 3.5', 'serve vllm-qwen', 'start the inpaint model' — preset-name matches the user's UI labels.", "adopt_served_model": "Register an existing tmux model server (one started manually or outside the cookbook flow) into Cookbook tracking AND add it as a chat endpoint. Use when the user (or a previous turn) launched something via ssh+tmux and now wants it visible in the UI, stoppable via stop_served_model, and usable in the model picker.", "list_cookbook_servers": "List the cookbook's configured servers (remote GPU boxes + local) and which is the current default. Use this BEFORE download_model/serve_model when the user didn't name a host — to decide where to run, or to ask the user which server when ambiguous. Downloads/serves default to the cookbook's selected server, NOT localhost.", - "app_api": "Generic loopback to ANY Odysseus internal endpoint. Use this when the user wants something the UI can do but there's no named tool for it. Covers calendar, gallery, library/documents, memory, notes, tasks, settings, research, compare, cookbook GPUs/state — every UI button hits some /api/* endpoint and you can hit it too. action='endpoints' with filter= lists available endpoints. action='call' takes method+path+body. Hits same routes the UI uses — auth flows free. NOTE: themes are NOT an API endpoint — use the ui_control tool (create_theme / set_theme), not app_api. SESSIONS/CHATS: do NOT use app_api for these — GET /api/sessions returns EMPTY for tool calls (it's owner-filtered and tool calls authenticate as a different identity). EMAIL ACCOUNTS: do NOT use /api/email/accounts via app_api; use list_email_accounts, list_emails, and read_email instead. To list/rename/archive/delete/fork chats use the list_sessions and manage_session tools instead.", + "app_api": "Generic loopback to allowed Odysseus internal endpoints. Use this when the user wants something the UI can do but there's no named tool for it. Covers calendar, gallery, library/documents, memory, notes, tasks, settings, research, compare, cookbook GPUs/state — allowed UI buttons hit /api/* endpoints and you can hit them too. Sensitive auth/user/admin/shell paths and host-control Cookbook mutation routes are blocked; do NOT use app_api for shell commands, package installs, engine rebuilds, or PID signalling. Use named command tooling for shell commands. action='endpoints' with filter= lists available endpoints. action='call' takes method+path+body. Hits same routes the UI uses — auth flows free. NOTE: themes are NOT an API endpoint — use the ui_control tool (create_theme / set_theme), not app_api. SESSIONS/CHATS: do NOT use app_api for these — GET /api/sessions returns EMPTY for tool calls (it's owner-filtered and tool calls authenticate as a different identity). EMAIL ACCOUNTS: do NOT use /api/email/accounts via app_api; use list_email_accounts, list_emails, and read_email instead. To list/rename/archive/delete/fork chats use the list_sessions and manage_session tools instead.", "edit_image": "Edit an image in the gallery: upscale (increase resolution), remove background (rembg), inpaint (fill selected area), or harmonize (blend edits). Specify image ID and action.", "trigger_research": "Start a deep research job on any topic — appears in the Deep Research sidebar, streams progress, produces a detailed report. Use for 'research X', 'look into Y', 'do deep research on Z', 'investigate'. NOT a scheduled task — it runs now and surfaces in the sidebar.", } @@ -125,32 +140,30 @@ class ToolIndex: """ChromaDB-backed tool index for RAG-based tool selection.""" def __init__(self): - from src.chroma_client import get_chroma_client - from src.embeddings import get_embedding_client - - self._embedder = get_embedding_client() - if not self._embedder: - raise RuntimeError("No embedding client available") - - client = get_chroma_client() - self._collection = client.get_or_create_collection( - name=COLLECTION_NAME, - metadata={"hnsw:space": "cosine"}, + self._lanes = build_embedding_lanes(COLLECTION_NAME) + if not self._lanes: + raise RuntimeError("No embedding lanes available") + self._embedder = self._lanes[0].client + self._collection = next( + (lane.collection for lane in self._lanes if lane.name == LANE_FASTEMBED), + self._lanes[0].collection, ) + migrate_legacy_collection(COLLECTION_NAME, self._lanes) self._fingerprint = "" self._mcp_generation = -1 self._healthy = True - logger.info("ToolIndex initialized") + logger.info("ToolIndex initialized (lanes=%s)", [lane.name for lane in self._lanes]) @property def healthy(self): return self._healthy def _embed(self, texts: List[str]) -> List[List[float]]: - vecs = self._embedder.encode(texts, normalize_embeddings=True) + if not self._lanes: + return [] + vecs = self._lanes[0].encode(texts) if np is not None: return np.array(vecs, dtype=np.float32).tolist() - # Fallback without numpy return [list(v) for v in vecs] def index_builtin_tools(self): @@ -171,23 +184,31 @@ class ToolIndex: # registry (e.g. removed tools like the old vault_* set). # Without this, upsert leaves them in place and RAG keeps # surfacing tools that no longer exist. - try: - existing = self._collection.get(where={"tool_type": "builtin"}) - existing_ids = (existing or {}).get("ids") or [] - stale = [i for i in existing_ids if i not in set(ids)] - if stale: - self._collection.delete(ids=stale) - logger.info(f"Pruned {len(stale)} stale builtin tool entries from index") - except Exception as e: - logger.debug(f"Stale-pruning skipped: {e}") + indexed = False + for lane in self._lanes: + try: + existing = lane.collection.get(where={"tool_type": "builtin"}) + existing_ids = (existing or {}).get("ids") or [] + stale = [i for i in existing_ids if i not in set(ids)] + if stale: + lane.collection.delete(ids=stale) + logger.info(f"Pruned {len(stale)} stale builtin tool entries from {lane.name} index") + except Exception as e: + logger.debug(f"Stale-pruning skipped for {lane.name}: {e}") - embeddings = self._embed(docs) - self._collection.upsert( - ids=ids, - documents=docs, - embeddings=embeddings, - metadatas=metadatas, - ) + try: + lane.collection.upsert( + ids=ids, + documents=docs, + embeddings=lane.encode(docs), + metadatas=metadatas, + ) + indexed = True + except Exception as e: + logger.warning("Builtin tool indexing failed in %s lane: %s", lane.name, e) + if not indexed: + self._healthy = False + raise RuntimeError("Builtin tool indexing failed in all embedding lanes") self._fingerprint = hashlib.sha256( ",".join(sorted(BUILTIN_TOOL_DESCRIPTIONS.keys())).encode() ).hexdigest() @@ -202,15 +223,15 @@ class ToolIndex: gen = getattr(mcp_mgr, '_generation', 0) if gen == self._mcp_generation: return - self._mcp_generation = gen # Remove old MCP entries - try: - existing = self._collection.get(where={"tool_type": "mcp"}) - if existing and existing["ids"]: - self._collection.delete(ids=existing["ids"]) - except Exception: - pass + for lane in self._lanes: + try: + existing = lane.collection.get(where={"tool_type": "mcp"}) + if existing and existing["ids"]: + lane.collection.delete(ids=existing["ids"]) + except Exception: + pass # Get current MCP tools try: @@ -219,6 +240,7 @@ class ToolIndex: all_tools = "" if not all_tools: + self._mcp_generation = gen return # Parse MCP tool descriptions from the prompt text @@ -246,39 +268,59 @@ class ToolIndex: metadatas.append({"tool_name": name, "tool_type": "mcp"}) if not docs: + self._mcp_generation = gen return - embeddings = self._embed(docs) - self._collection.upsert( - ids=ids, - documents=docs, - embeddings=embeddings, - metadatas=metadatas, - ) + indexed = False + for lane in self._lanes: + try: + lane.collection.upsert( + ids=ids, + documents=docs, + embeddings=lane.encode(docs), + metadatas=metadatas, + ) + indexed = True + except Exception as e: + logger.warning("MCP tool indexing failed in %s lane: %s", lane.name, e) + if not indexed: + logger.warning("MCP tool indexing failed in all embedding lanes") + return + self._mcp_generation = gen logger.info(f"Indexed {len(docs)} MCP tools") def retrieve(self, query: str, k: int = 8) -> List[str]: """Retrieve the top-K most relevant tool names for a query.""" - try: - query_embedding = self._embed([query]) - results = self._collection.query( - query_embeddings=query_embedding, - n_results=min(k, self._collection.count() or k), - include=["metadatas", "distances"], - ) - if not results or not results.get("metadatas"): - return [] - - tool_names = [] - for meta_list in results["metadatas"]: - for meta in meta_list: - name = meta.get("tool_name", "") - if name and name not in tool_names: - tool_names.append(name) - return tool_names - except Exception as e: - logger.warning(f"Tool retrieval failed: {e}") - return [] + rows = [] + lane_priority = {LANE_CUSTOM: 0, LANE_FASTEMBED: 1} + for lane in self._lanes: + try: + count = lane.count() + if count == 0: + continue + results = lane.collection.query( + query_embeddings=lane.encode([query]), + n_results=min(k, count), + include=["metadatas", "distances"], + ) + if not results or not results.get("metadatas"): + continue + distances = results.get("distances") or [] + for list_idx, meta_list in enumerate(results["metadatas"]): + distance_list = distances[list_idx] if list_idx < len(distances) else [] + for idx, meta in enumerate(meta_list): + name = meta.get("tool_name", "") + if name: + distance = distance_list[idx] if idx < len(distance_list) else 1.0 + rows.append({ + "tool_name": name, + "score": round(1.0 - distance, 4), + "embedding_lane": lane.name, + }) + except Exception as e: + logger.warning("Tool retrieval failed in %s lane: %s", lane.name, e) + rows.sort(key=lambda row: (-row["score"], lane_priority.get(row["embedding_lane"], 99))) + return [row["tool_name"] for row in dedupe_results(rows, id_key="tool_name", limit=k)] # Structural recurring-schedule intent. Typo-resilient (matches "every dya" # via "every "), and catches bare clock times ("at 7:30 am", "7am"). @@ -293,7 +335,11 @@ class ToolIndex: # Keyword hints: if the query mentions these words, force-include the tools. _KEYWORD_HINTS = { - frozenset({"email", "mail", "gmail", "googlemail", "message", "send", "reply", "inbox", "unread", "tell"}): + # NOTE: "tell" was removed from this set. It fired on any "tell me ..." + # request (e.g. "visit and tell me the title"), force-including the + # whole email toolset and crowding out the relevant tools — the model then + # believed it had only email tools and refused web/other tasks (#1707). + frozenset({"email", "emails", "mail", "mails", "gmail", "googlemail", "message", "messages", "send", "reply", "replies", "inbox", "unread"}): {"list_email_accounts", "list_emails", "read_email", "send_email", "reply_to_email", "bulk_email", "delete_email", "archive_email", "mark_email_read", "resolve_contact", "ui_control"}, frozenset({"calendar", "event", "meeting", "schedule", "appointment"}): {"manage_calendar"}, @@ -357,14 +403,14 @@ class ToolIndex: # Document edit/update intent frozenset({"edit", "change", "fix", "rewrite", "update", "replace", "add a", "tweak", "modify", "rename", "paragraph", - "section", "line", "the doc", "the document", "in the doc"}): + "section", "line", "the doc", "the docs", "the document", "the documents", "in the doc", "in the docs", "in document"}): {"edit_document", "update_document", "create_document", "suggest_document"}, # Document deletion / management — include generic open/find/read/show # verbs + file/doc synonyms so "open my ", "find the ", "delete # " reach manage_documents even without the literal word "document". frozenset({"delete this doc", "delete the doc", "delete document", - "remove document", "remove the doc", "trash", "list documents", - "list docs", "all my docs", "my documents", "my docs", "my files", + "remove document", "remove the doc", "trash", "list document", "list documents", + "list doc", "list docs", "all my docs", "my document", "my documents", "my doc", "my docs", "my files", "open the", "open my", "open document", "open doc", "find the", "find my", "find document", "read the", "read my", "show me the", "show my", "the file", "my file", "the report", "the write-up", @@ -431,10 +477,14 @@ class ToolIndex: base = set(always_include or ALWAYS_AVAILABLE) retrieved = self.retrieve(query, k=k) base.update(retrieved) - # Keyword-based force-include for common intents + # Keyword-based force-include for common intents. Match on word + # boundaries, not raw substrings, so short hints like "fix", "line", + # "serve", "reply" or "unread" don't fire inside unrelated words + # ("prefix", "deadline"/"online", "observe"/"reserve", "replying", + # "unreadable"). Same word-boundary matching used in topic_analyzer. ql = query.lower() for keywords, tools in self._KEYWORD_HINTS.items(): - if any(kw in ql for kw in keywords): + if any(re.search(rf"\b{re.escape(kw)}\b", ql) for kw in keywords): base.update(tools) # Structural scheduling-intent detection — typo-resilient (the literal # keyword "every day" misses "every dya"). Catches "every ", @@ -473,3 +523,10 @@ def get_tool_index() -> Optional[ToolIndex]: logger.warning(f"ToolIndex init failed (will retry in {_RETRY_INTERVAL}s): {e}") _tool_index = None return None + + +def reset_tool_index() -> None: + """Clear the singleton so embedding endpoint changes rebuild tool lanes.""" + global _tool_index, _last_attempt + _tool_index = None + _last_attempt = 0.0 diff --git a/src/tool_parsing.py b/src/tool_parsing.py index 6d7aae3e3..3f296c2e6 100644 --- a/src/tool_parsing.py +++ b/src/tool_parsing.py @@ -5,9 +5,10 @@ Regex-based parsing of tool invocations from LLM response text. Supports fenced code blocks, [TOOL_CALL] blocks, and XML-style blocks. """ -import re +import ast import json import logging +import re from typing import List, Optional from src.agent_tools import ToolBlock, TOOL_TAGS @@ -69,6 +70,8 @@ _TOOL_CODE_RE = re.compile( # fullwidth (U+FF5C) and ascii '|' in any count. _DSML_PIPES = r"[||]+" def _normalize_dsml(text: str) -> str: + if not isinstance(text, str): + return "" if "DSML" not in text: return text t = text @@ -95,6 +98,9 @@ _TOOL_NAME_MAP = { "search": "web_search", "web_search": "web_search", "websearch": "web_search", + "google_search": "web_search", + "google_search_retrieval": "web_search", + "google_search_grounding": "web_search", "web_fetch": "web_fetch", "webfetch": "web_fetch", "fetch_url": "web_fetch", @@ -171,11 +177,108 @@ _TOOL_NAME_MAP = { "todos": "manage_notes", } +_MISFENCED_WEB_TOOL_NAMES = { + "web_search": "web_search", + "websearch": "web_search", + "google_search": "web_search", + "google_search_retrieval": "web_search", + "google_search_grounding": "web_search", + "web_fetch": "web_fetch", + "webfetch": "web_fetch", + "fetch_url": "web_fetch", +} + # --------------------------------------------------------------------------- # Parsing functions # --------------------------------------------------------------------------- +def _literal_string(value) -> Optional[str]: + """Return a string from a small literal AST node, or None.""" + try: + parsed = ast.literal_eval(value) + except (ValueError, SyntaxError, TypeError): + return None + if isinstance(parsed, str): + return parsed.strip() + if isinstance(parsed, list): + for item in parsed: + if isinstance(item, str) and item.strip(): + return item.strip() + return None + + +def _parse_misfenced_web_lookup(content: str) -> Optional[ToolBlock]: + """Recover simple web_search/web_fetch calls wrapped in python/bash fences. + + Some local fenced-tool models write: + + ```python + web_search("latest python release") + ``` + + That is an intended tool call, not Python code. Keep this intentionally + narrow: only a single bare function call to a known web tool alias converts. + """ + try: + module = ast.parse(content.strip(), mode="exec") + except SyntaxError: + return None + if len(module.body) != 1 or not isinstance(module.body[0], ast.Expr): + return None + call = module.body[0].value + if not isinstance(call, ast.Call) or not isinstance(call.func, ast.Name): + return None + + mapped = _MISFENCED_WEB_TOOL_NAMES.get(call.func.id.lower()) + if mapped not in ("web_search", "web_fetch"): + return None + if len(call.args) > 1: + return None + + args = {} + if call.args: + key = "url" if mapped == "web_fetch" else "query" + value = _literal_string(call.args[0]) + if not value: + return None + args[key] = value + + allowed = {"query", "queries", "url", "time_filter", "freshness", "max_pages"} + for keyword in call.keywords: + if keyword.arg not in allowed: + return None + key = "query" if keyword.arg == "queries" else keyword.arg + value = _literal_string(keyword.value) + if value is not None: + args[key] = value + continue + try: + parsed = ast.literal_eval(keyword.value) + except (ValueError, SyntaxError, TypeError): + return None + if key == "max_pages" and isinstance(parsed, int): + args[key] = parsed + continue + return None + + if mapped == "web_search": + query = args.get("query") + if not query: + return None + payload = {"query": query} + for key in ("time_filter", "freshness", "max_pages"): + if key in args: + payload[key] = args[key] + if len(payload) == 1: + return ToolBlock("web_search", query) + return ToolBlock("web_search", json.dumps(payload)) + + url = args.get("url") + if not url: + return None + return ToolBlock("web_fetch", url) + def _parse_tool_call_block(raw: str) -> Optional[ToolBlock]: """Parse a [TOOL_CALL] block into a ToolBlock. @@ -324,7 +427,7 @@ def _parse_tool_code_block(raw: str) -> Optional[ToolBlock]: return None -def parse_tool_blocks(text: str) -> List[ToolBlock]: +def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]: """Extract executable tool blocks from LLM response text. Supports multiple formats: @@ -333,6 +436,17 @@ def parse_tool_blocks(text: str) -> List[ToolBlock]: 3. XML-style / blocks 4. blocks (MiniMax-M2.5 style) 5. DeepSeek DSML markup (normalized to first) + + `skip_fenced`: when True, Pattern 1 (fenced ```bash/```python/```json code + blocks) is not matched at all. Native function-calling models (GPT/Claude/ + Grok/Qwen3/DeepSeek-V, etc.) commonly write illustrative fenced examples in + prose; for those models we trust the structured tool_calls channel for real + invocations and treat a bare fence as display text rather than an action + (issue #3222). Patterns 2-5 — explicit [TOOL_CALL]///DSML + markup that leaked into content as text — stay fully active regardless, + since that markup is never an illustrative example and dropping it would + silently lose real calls (e.g. DeepSeek-V falling back to DSML when it + can't emit structured tool_calls). """ blocks = [] @@ -340,24 +454,31 @@ def parse_tool_blocks(text: str) -> List[ToolBlock]: # XML patterns below catch it. text = _normalize_dsml(text) - # Pattern 1: fenced code blocks - for m in _TOOL_BLOCK_RE.finditer(text): - tag = m.group(1).lower() - content = m.group(2).strip() - if not content: - continue - # If a code block's content is an XML call (some models wrap - # tool calls in ```python or ```xml fences), parse the invoke instead. - if ' XML call (some models wrap + # tool calls in ```python or ```xml fences), parse the invoke instead. + if ' markup, not literal code. Whether or + # not any call converted, never fall through to append the raw XML as + # a python/bash block — e.g. a hyphenated/namespaced tool name that + # _XML_INVOKE_RE's \w+ can't match would otherwise be executed as code. + continue + if tag in ("python", "bash"): + block = _parse_misfenced_web_lookup(content) if block: blocks.append(block) - invoked = True - if invoked: - continue - blocks.append(ToolBlock(tag, content)) + continue + blocks.append(ToolBlock(tag, content)) # Pattern 2: [TOOL_CALL] blocks (only if no fenced blocks found) if not blocks: @@ -391,12 +512,23 @@ def parse_tool_blocks(text: str) -> List[ToolBlock]: return blocks -def strip_tool_blocks(text: str) -> str: - """Remove executable tool blocks from text for clean display.""" +def strip_tool_blocks(text: str, skip_fenced: bool = False) -> str: + """Remove executable tool blocks from text for clean display. + + `skip_fenced`: when True, fenced ```bash/```python/```json code blocks + (Pattern 1) are left intact instead of being stripped. This must mirror + whatever `skip_fenced` value `parse_tool_blocks` was called with for the + same response: if a fence wasn't executed as a tool call (because it's an + illustrative example from a native function-calling model), it shouldn't + vanish from the persisted/displayed text either — otherwise the example + streams once and then disappears on reload (issue #3222 follow-up). + Patterns 2-5 + DSML markup are always stripped, since that markup should + never reach the user regardless of whether it converted to a tool call. + """ # Normalize DSML first so its markup gets stripped by the # / removers below instead of leaking to the user. text = _normalize_dsml(text) - cleaned = _TOOL_BLOCK_RE.sub('', text) + cleaned = text if skip_fenced else _TOOL_BLOCK_RE.sub('', text) cleaned = _TOOL_CALL_RE.sub('', cleaned) cleaned = _XML_TOOL_CALL_RE.sub('', cleaned) cleaned = _TOOL_CODE_RE.sub('', cleaned) diff --git a/src/tool_policy.py b/src/tool_policy.py new file mode 100644 index 000000000..b70b5c3be --- /dev/null +++ b/src/tool_policy.py @@ -0,0 +1,209 @@ +"""Per-turn tool policy composition for agent execution.""" + +from __future__ import annotations + +import re +from dataclasses import dataclass, field +from types import MappingProxyType +from typing import Iterable, Mapping, Optional, Set, Tuple + + +GUIDE_ONLY_DIRECTIVE = ( + "## GUIDE-ONLY MODE - TOOL POLICY\n" + "The latest user turn explicitly forbids tool use. Do not call tools, do not " + "run shell commands, and do not inspect local files or the environment. " + "Respond in normal text by guiding the user or asking them to paste the " + "output they will produce locally." +) + + +_COMMON_TOOL_NAMES = { + "api_call", + "app_api", + "archive_email", + "ask_teacher", + "ask_user", + "bash", + "bulk_email", + "builtin_browser", + "cancel_download", + "chat_with_model", + "create_document", + "create_session", + "delete_email", + "download_model", + "edit_document", + "edit_file", + "edit_image", + "generate_image", + "glob", + "grep", + "list_cached_models", + "list_cookbook_servers", + "list_downloads", + "list_emails", + "list_models", + "list_serve_presets", + "list_served_models", + "list_sessions", + "ls", + "manage_calendar", + "manage_contact", + "manage_documents", + "manage_endpoints", + "manage_mcp", + "manage_memory", + "manage_notes", + "manage_research", + "manage_session", + "manage_settings", + "manage_skills", + "manage_tasks", + "manage_tokens", + "manage_webhooks", + "mark_email_read", + "pipeline", + "python", + "read_email", + "read_file", + "reply_to_email", + "resolve_contact", + "search_chats", + "search_hf_models", + "send_email", + "send_to_session", + "serve_model", + "serve_preset", + "stop_served_model", + "suggest_document", + "trigger_research", + "ui_control", + "update_document", + "update_plan", + "vault_get", + "vault_search", + "vault_unlock", + "web_fetch", + "web_search", + "write_file", +} + + +_GUIDE_ONLY_PATTERNS: Tuple[Tuple[re.Pattern[str], str], ...] = tuple( + (re.compile(pattern, re.IGNORECASE), reason) + for pattern, reason in ( + (r"\bguide[-\s]?only mode\b", "guide-only mode requested"), + (r"\bno[-\s]?tools? mode\b", "no-tools mode requested"), + (r"\bdo not use (?:any )?tools?\b", "user forbade tool use"), + (r"\bdon'?t use (?:any )?tools?\b", "user forbade tool use"), + (r"\bnot allowed to use (?:any )?tools?\b", "user forbade tool use"), + (r"\bnot allowed to:?.{0,120}\buse (?:any )?tools?\b", "user forbade tool use"), + (r"\bask (?:me )?(?:for confirmation )?before using tools?\b", "user requested confirmation before tools"), + ) +) + + +@dataclass(frozen=True) +class ToolPolicy: + """Effective tool behavior for one agent turn.""" + + disabled_tools: frozenset[str] = frozenset() + hidden_tools: frozenset[str] = frozenset() + reasons: Mapping[str, str] = field(default_factory=dict) + mode: str = "normal" + block_all_tool_calls: bool = False + disable_mcp: bool = False + + def all_disabled_names(self) -> Set[str]: + return set(self.disabled_tools) | set(self.hidden_tools) + + def blocks(self, tool_name: Optional[str]) -> bool: + if not tool_name: + return False + return self.block_all_tool_calls or tool_name in self.disabled_tools or tool_name in self.hidden_tools + + def reason_for(self, tool_name: Optional[str]) -> str: + if tool_name and tool_name in self.reasons: + return self.reasons[tool_name] + if self.block_all_tool_calls and self.mode == "guide_only": + return "Tool use is disabled for this guide-only turn." + return "Tool use is disabled for this turn." + + +def detect_guide_only_turn(message: object) -> Optional[str]: + """Return a reason when the latest user turn strongly requests no tools.""" + + if not isinstance(message, str) or not message.strip(): + return None + text = re.sub(r"\s+", " ", message.strip()) + for pattern, reason in _GUIDE_ONLY_PATTERNS: + if pattern.search(text): + return reason + return None + + +def known_tool_names() -> Set[str]: + """Best-effort set of native tool names for prompt hiding and denylisting.""" + + names = set(_COMMON_TOOL_NAMES) + try: + from src.tool_schemas import FUNCTION_TOOL_SCHEMAS + + for schema in FUNCTION_TOOL_SCHEMAS: + name = (schema.get("function") or {}).get("name") or schema.get("name") + if name: + names.add(name) + except Exception: + pass + try: + from src.agent_loop import TOOL_SECTIONS + + names.update(TOOL_SECTIONS.keys()) + except Exception: + pass + try: + from src.tool_security import PLAN_MODE_READONLY_TOOLS, _PLAN_MODE_KNOWN_MUTATORS + + names.update(PLAN_MODE_READONLY_TOOLS) + names.update(_PLAN_MODE_KNOWN_MUTATORS) + except Exception: + pass + return names + + +def build_effective_tool_policy( + *, + disabled_tools: Optional[Iterable[str]] = None, + last_user_message: object = "", +) -> ToolPolicy: + """Compose the effective policy for one agent turn. + + Existing callers still provide the already-composed disabled-tool denylist. + This function adds higher-level turn policy on top so enforcement is not + delegated to prompt compliance. + """ + + disabled = {str(t) for t in (disabled_tools or []) if t} + hidden: Set[str] = set() + reasons = {tool: "Tool is disabled for this request." for tool in disabled} + + guide_reason = detect_guide_only_turn(last_user_message) + if guide_reason: + all_tools = known_tool_names() + disabled.update(all_tools) + hidden.update(all_tools) + reasons.update({tool: f"{guide_reason}." for tool in all_tools}) + return ToolPolicy( + disabled_tools=frozenset(disabled), + hidden_tools=frozenset(hidden), + reasons=MappingProxyType(dict(reasons)), + mode="guide_only", + block_all_tool_calls=True, + disable_mcp=True, + ) + + return ToolPolicy( + disabled_tools=frozenset(disabled), + hidden_tools=frozenset(hidden), + reasons=MappingProxyType(dict(reasons)), + ) diff --git a/src/tool_schemas.py b/src/tool_schemas.py index f0a69e002..e0d01f008 100644 --- a/src/tool_schemas.py +++ b/src/tool_schemas.py @@ -82,16 +82,65 @@ FUNCTION_TOOL_SCHEMAS = [ "type": "function", "function": { "name": "read_file", - "description": "Read a file from disk", + "description": "Read a file from disk. Optionally read a line range with offset/limit for large files.", "parameters": { "type": "object", "properties": { - "path": {"type": "string", "description": "File path to read"} + "path": {"type": "string", "description": "File path to read"}, + "offset": {"type": "integer", "description": "1-based line to start reading from (optional)"}, + "limit": {"type": "integer", "description": "Max number of lines to read from offset (optional)"} }, "required": ["path"] } } }, + { + "type": "function", + "function": { + "name": "grep", + "description": "Search file contents for a regular expression across a directory tree (uses ripgrep when available, respecting .gitignore). Returns file:line:match. PREFER this over `bash grep/rg` for code search — confined to the allowed roots, structured output.", + "parameters": { + "type": "object", + "properties": { + "pattern": {"type": "string", "description": "Regular expression to search for"}, + "path": {"type": "string", "description": "Directory or file to search (optional; defaults to the project root)"}, + "glob": {"type": "string", "description": "Only search files matching this glob, e.g. '*.py' (optional)"}, + "ignore_case": {"type": "boolean", "description": "Case-insensitive match (optional)"}, + "max_results": {"type": "integer", "description": "Max matches to return (optional)"} + }, + "required": ["pattern"] + } + } + }, + { + "type": "function", + "function": { + "name": "glob", + "description": "Find files by glob pattern (recursive), newest first. e.g. '**/*.py'. PREFER this over `bash find/ls` for locating files — confined to the allowed roots.", + "parameters": { + "type": "object", + "properties": { + "pattern": {"type": "string", "description": "Glob pattern, e.g. '**/*.ts' or 'src/**/test_*.py'"}, + "path": {"type": "string", "description": "Base directory (optional; defaults to the project root)"} + }, + "required": ["pattern"] + } + } + }, + { + "type": "function", + "function": { + "name": "ls", + "description": "List the entries of a directory (folders first, then files with sizes). PREFER this over `bash ls` — confined to the allowed roots.", + "parameters": { + "type": "object", + "properties": { + "path": {"type": "string", "description": "Directory to list (optional; defaults to the project root)"} + }, + "required": [] + } + } + }, { "type": "function", "function": { @@ -107,11 +156,28 @@ FUNCTION_TOOL_SCHEMAS = [ } } }, + { + "type": "function", + "function": { + "name": "edit_file", + "description": "Edit a file ON DISK by exact string replacement (home folder, project files, any real path like ~/sweden.txt or /path/to/file). This is the right tool for files on disk — NOT edit_document (that's for editor-panel documents). PREFER this over bash (sed/echo) — it shows a diff. old_string must match the file exactly and be unique (or set replace_all). Use write_file to create a new file.", + "parameters": { + "type": "object", + "properties": { + "path": {"type": "string", "description": "File path to edit"}, + "old_string": {"type": "string", "description": "Exact text to replace (must match the file, including indentation)"}, + "new_string": {"type": "string", "description": "Replacement text"}, + "replace_all": {"type": "boolean", "description": "Replace all occurrences instead of requiring a unique match"} + }, + "required": ["path", "old_string", "new_string"] + } + } + }, { "type": "function", "function": { "name": "create_document", - "description": "Create a new document in the editor panel. ALWAYS use this when the user asks to write, create, build, or generate code, scripts, programs, games, apps, or any substantial content (>15 lines). NEVER put large code blocks directly in chat — use this tool instead.", + "description": "Create a new document in the editor panel. Use this when the user asks to write, create, build, or generate code, scripts, programs, games, apps, or any substantial content (>15 lines) AND there is no already-open document/email draft that the request refers to. If an email compose draft is open, edit that draft instead of creating another document. NEVER put large code blocks directly in chat — use this tool instead.", "parameters": { "type": "object", "properties": { @@ -127,7 +193,7 @@ FUNCTION_TOOL_SCHEMAS = [ "type": "function", "function": { "name": "edit_document", - "description": "PREFERRED way to change an existing document. Targeted find-and-replace with multiple FIND/REPLACE pairs per call. Use this for any edit smaller than a full rewrite: adding a function, fixing a bug, tweaking a section, renaming things. Do NOT send the whole file back via update_document for small edits — it wastes tokens and is hard to review.", + "description": "Edit a document OPEN IN THE EDITOR PANEL (created via create_document) — NOT a file on disk. For files on disk (home folder, project files, anything with a path like ~/x.txt or /path/to/file) use edit_file instead. Targeted find-and-replace with multiple FIND/REPLACE pairs per call; use for any edit smaller than a full rewrite. Do NOT send the whole file back via update_document for small edits.", "parameters": { "type": "object", "properties": { @@ -192,7 +258,7 @@ FUNCTION_TOOL_SCHEMAS = [ "type": "function", "function": { "name": "search_chats", - "description": "Search the user's past chat conversations by keyword. Use when the user asks about previous chats, past conversations, or wants to find a discussion they had before. Returns matching sessions with clickable links.", + "description": "Search the user's past session transcripts by keyword. Use when the user asks about previous chats, past conversations, or when direct transcript evidence is better than persistent memory. Returns matching sessions with clickable links and nearby context.", "parameters": { "type": "object", "properties": { @@ -340,7 +406,7 @@ FUNCTION_TOOL_SCHEMAS = [ "type": "function", "function": { "name": "ui_control", - "description": "Control the user interface. Actions: toggle (turn tools on/off), open_panel (open a modal: documents/library, gallery, email, sessions, notes, memories/brain, skills, settings, cookbook), open_email_reply (open an email reply draft document; does NOT send), set_mode, switch_model, set_theme (presets: dark, light, midnight, paper, nord, monokai, gruvbox, dracula, cyberpunk, retrowave, forest, ocean, ume, copper, terminal, vaporwave, lavender, gpt, coffee, claude), create_theme (CREATE any custom theme with a name + colors object — pick distinctive, evocative hex colors that match the requested aesthetic, NOT generic defaults. The theme auto-applies after creation). When a user asks for ANY theme not in the preset list, ALWAYS use create_theme.", + "description": "Control the user interface. Actions: toggle (turn tools on/off), open_panel (open a modal: documents/library, gallery, email, sessions, notes, memories/brain, skills, settings, cookbook), open_email_reply (open an email reply draft document; does NOT send), set_mode, switch_model, set_theme (built-in presets: dark, light, midnight, paper, cyberpunk, retrowave, forest, ocean, ume, copper, terminal, organs, lavender, gpt, claude, cute), create_theme (CREATE any custom theme with a name + colors object — pick distinctive, evocative hex colors that match the requested aesthetic, NOT generic defaults. The theme auto-applies after creation). When a user asks for ANY theme not in the built-in preset list, ALWAYS use create_theme.", "parameters": { "type": "object", "properties": { @@ -381,6 +447,47 @@ FUNCTION_TOOL_SCHEMAS = [ } } }, + { + "type": "function", + "function": { + "name": "ask_user", + "description": "Ask the user a multiple-choice question to get a decision or clarification when the task is genuinely ambiguous and the answer changes what you do next (e.g. pick between approaches, confirm an assumption, choose a target). The user sees clickable option buttons; calling this ENDS your turn and their selection arrives as your next message. Prefer sensible defaults over asking — only ask when you truly cannot proceed well without the user's input. Do NOT use it to confirm irreversible/destructive actions that have a dedicated confirmation flow.", + "parameters": { + "type": "object", + "properties": { + "question": {"type": "string", "description": "The question to ask. Be specific and self-contained."}, + "options": { + "type": "array", + "description": "2-6 mutually exclusive choices. Each is an object with a short `label` and an optional `description` explaining the trade-off.", + "items": { + "type": "object", + "properties": { + "label": {"type": "string", "description": "Concise choice text the user clicks (1-5 words)."}, + "description": {"type": "string", "description": "Optional one-line explanation of this choice."} + }, + "required": ["label"] + } + }, + "multi": {"type": "boolean", "description": "Set true to let the user select multiple options instead of one. Default false."} + }, + "required": ["question", "options"] + } + } + }, + { + "type": "function", + "function": { + "name": "update_plan", + "description": "Write back to the ACTIVE PLAN: mark steps done or revise them. Use this while executing an approved plan — after you finish a step, call update_plan with the full checklist and that step marked `- [x]`; when the user asks to change the plan, call it with the revised checklist. The user's docked plan window updates live. Pass the COMPLETE checklist every time (not a diff). No effect if there is no active plan.", + "parameters": { + "type": "object", + "properties": { + "plan": {"type": "string", "description": "The full updated plan as a GitHub-style markdown checklist — one step per line, `- [ ]` for pending and `- [x]` for done. Always send the whole list."} + }, + "required": ["plan"] + } + } + }, { "type": "function", "function": { @@ -399,7 +506,7 @@ FUNCTION_TOOL_SCHEMAS = [ "action_name": {"type": "string", "enum": [ "tidy_sessions", "tidy_documents", "consolidate_memory", "tidy_research", "summarize_emails", "draft_email_replies", "extract_email_events", - "classify_events", "mark_email_boundaries", "learn_sender_signatures", + "classify_events", "learn_sender_signatures", "test_skills", "audit_skills", "check_email_urgency" ], "description": "Built-in action (for task_type=action)"}, @@ -422,7 +529,7 @@ FUNCTION_TOOL_SCHEMAS = [ "type": "function", "function": { "name": "manage_calendar", - "description": "Manage calendar events: list events in a date range, create, update, delete. Each event can carry a tag/category (event_type) and importance level. Use ISO 8601 datetimes; for all-day events set all_day=true and pass YYYY-MM-DD. For event reminders/alarms, pass reminder_minutes; the tool creates the Odysseus note reminder, so do not also call manage_notes for the same reminder.", + "description": "Manage calendar events: list events in a date range, create, update, delete. Each event can carry a tag/category (event_type) and importance level. Resolve relative dates like today/tomorrow against the 'Current date and time' system context, then pass ISO 8601 datetimes in the user's local wall time; for all-day events set all_day=true and pass YYYY-MM-DD. For event reminders/alarms, pass reminder_minutes; the tool creates the Odysseus note reminder, so do not also call manage_notes for the same reminder.", "parameters": { "type": "object", "properties": { @@ -438,11 +545,47 @@ FUNCTION_TOOL_SCHEMAS = [ "uid": {"type": "string", "description": "Event UID (for update/delete)"}, "calendar_href": {"type": "string", "description": "Specific calendar URL (optional; defaults to first calendar)"}, "calendar": {"type": "string", "description": "Filter list_events by calendar name or href"}, - "start": {"type": "string", "description": "list_events range start (ISO datetime); defaults to today"}, - "end": {"type": "string", "description": "list_events range end (ISO datetime); defaults to +14 days"}, + "start": {"type": "string", "description": "list_events range start (ISO datetime); defaults to today. Prefer start; backend also accepts start_date, range_start, from, dtstart, since."}, + "end": {"type": "string", "description": "list_events range end (ISO datetime); defaults to +14 days. Prefer end; backend also accepts end_date, range_end, to, dtend, until."}, "event_type": {"type": "string", "description": "Tag / category for the event. Common values: work, personal, health, travel, meal, social, admin, other. Aliases accepted: tag, category, type."}, "importance": {"type": "string", "enum": ["low", "normal", "high", "critical"], "description": "Priority level (defaults to 'normal')"}, - "reminder_minutes": {"type": "integer", "description": "For create_event: create an Odysseus reminder this many minutes before the event, e.g. 5 for 'reminder 5 min before'."} + "reminder_minutes": {"type": "integer", "description": "For create_event: create an Odysseus reminder this many minutes before the event, e.g. 5 for 'reminder 5 min before'."}, + "rrule": {"type": "string", "description": "Recurrence rule in iCalendar RRULE format, e.g. 'FREQ=WEEKLY;BYDAY=MO' for weekly on Monday. Use with create_event or update_event."} + }, + "required": ["action"] + } + } + }, + { + "type": "function", + "function": { + "name": "manage_notes", + "description": "Manage notes and checklists (Google Keep-style): list, add, update, delete, toggle_item. IMPORTANT: For to-do lists / checklists, set note_type='checklist' and pass the items as the `checklist_items` array — do NOT serialize them into `content` as plain text. For freeform notes, use note_type='note' and put the body in `content`. `due_date` accepts natural language like 'tomorrow at 9am' (parsed in the user's timezone) and fires a notification — do not also create a calendar event for the same reminder.", + "parameters": { + "type": "object", + "properties": { + "action": {"type": "string", + "enum": ["list", "add", "update", "delete", "toggle_item"], + "description": "The action to perform"}, + "id": {"type": "string", "description": "Note id (for update/delete/toggle_item); 8-char prefix is fine"}, + "title": {"type": "string", "description": "Note title (for add/update)"}, + "content": {"type": "string", "description": "Freeform body text. Use this for note_type='note'. Do NOT use this for checklists — pass `checklist_items` instead."}, + "note_type": {"type": "string", "enum": ["note", "checklist"], + "description": "'note' = freeform text in `content`. 'checklist' = structured to-do items in `checklist_items`. Defaults to 'checklist' if checklist_items is supplied, else 'note'."}, + "checklist_items": {"type": "array", + "items": {"type": "object", + "properties": { + "text": {"type": "string", "description": "The to-do item text"}, + "done": {"type": "boolean", "description": "Whether the item is checked off"} + }, + "required": ["text"]}, + "description": "Checklist items for note_type='checklist'. Each item is {text, done}. REQUIRED for checklists — leaving this empty produces a blank note."}, + "color": {"type": "string", "description": "Optional color label (e.g. 'yellow', 'blue', 'green')"}, + "label": {"type": "string", "description": "Optional category label (also used as a list filter)"}, + "pinned": {"type": "boolean", "description": "Pin the note to the top"}, + "archived": {"type": "boolean", "description": "For update: archive/unarchive. For list: show archived notes when true."}, + "due_date": {"type": "string", "description": "Reminder time. Accepts natural language ('tomorrow at 9am', '11pm today') or ISO 8601. Fires a notification at that time."}, + "index": {"type": "integer", "description": "Checklist item index (for toggle_item, 0-based)"} }, "required": ["action"] } @@ -685,6 +828,21 @@ FUNCTION_TOOL_SCHEMAS = [ } } }, + { + "type": "function", + "function": { + "name": "tail_serve_output", + "description": "Read the last N lines of a cookbook serve/download task's tmux pane. Use ONLY in this exact sequence: (1) the user asked to serve a model, (2) you launched it via serve_model, (3) list_served_models reports the NEW task as crashed/error, (4) call tail_serve_output on the new sessionId to find the root cause, (5) call serve_model again with adjusted flags. DO NOT call this on old stopped/completed download tasks — they are historical and won't tell you anything about the current attempt. DO NOT investigate past failures before launching; the environment may have changed since.", + "parameters": { + "type": "object", + "properties": { + "session_id": {"type": "string", "description": "Tmux session id from list_served_models (e.g. 'serve-abc12345', 'cookbook-a1b2c3d4')."}, + "tail": {"type": "integer", "description": "How many lines of pane scrollback to fetch (default 300, max 4000). Bump this if the error in the visible tail references an earlier line ('see root cause above')."}, + }, + "required": ["session_id"] + } + } + }, { "type": "function", "function": { @@ -792,7 +950,7 @@ FUNCTION_TOOL_SCHEMAS = [ "type": "function", "function": { "name": "app_api", - "description": "Generic loopback to ANY internal Odysseus endpoint. Use this when there's no named tool for what the user wants. Hits the same routes the UI buttons hit (cookbook, gallery, library/documents, memory, notes, calendar, tasks, settings, themes, research, compare, etc.). action='endpoints' returns the OpenAPI surface (use `filter` to narrow). action='call' (default) takes method+path+body. Auth/user/admin paths are blocked for safety. Do not use for email account discovery; use list_email_accounts instead because /api/email/accounts is owner-filtered in tool context.", + "description": "Generic loopback to allowed internal Odysseus endpoints. Use this when there's no named tool for what the user wants. Hits the same routes the UI buttons hit (cookbook, gallery, library/documents, memory, notes, calendar, tasks, settings, themes, research, compare, etc.). action='endpoints' returns the OpenAPI surface (use `filter` to narrow). action='call' (default) takes method+path+body. Sensitive auth/user/admin/shell paths and host-control Cookbook mutation routes are blocked for safety. Do not use for shell commands; use named command tooling instead. Do not use for package installs, engine rebuilds, PID signalling, or email account discovery; use list_email_accounts for email accounts because /api/email/accounts is owner-filtered in tool context.", "parameters": { "type": "object", "properties": { @@ -1038,7 +1196,16 @@ def function_call_to_tool_block(name: str, arguments: str) -> Optional[ToolBlock logger.error(f"Failed to parse function call arguments for {name}: {arguments}") return None + # Some models emit valid JSON that isn't an object (e.g. a bare array + # ["ls -la"], string, or number) as the function arguments. Every branch + # below assumes a dict and calls args.get(...), so a non-dict would raise + # AttributeError and abort the whole agent stream. Coerce to {} instead. + if not isinstance(args, dict): + logger.warning(f"Non-object function call arguments for {name}: {args!r}; treating as empty") + args = {} + tool_type = _TOOL_NAME_MAP.get(name, name) + # Allow MCP tools through (namespaced as mcp__serverid__toolname) if tool_type.startswith("mcp__"): content = json.dumps(args) if args else "{}" @@ -1058,11 +1225,31 @@ def function_call_to_tool_block(name: str, arguments: str) -> Optional[ToolBlock elif tool_type == "python": content = args.get("code", "") elif tool_type == "web_search": - content = args.get("query", "") + queries = args.get("queries") + if isinstance(queries, list) and queries: + content = str(queries[0]) + elif queries: + content = str(queries) + else: + content = args.get("query", "") + # Preserve the model-requested freshness filter — the web_search schema + # advertises time_filter and the executor parses {"query","time_filter"}, + # but a bare query string dropped it. Mirrors the read_file JSON idiom. + tf = args.get("time_filter") + if content and isinstance(tf, str) and tf in ("day", "week", "month", "year"): + content = json.dumps({"query": content, "time_filter": tf}) elif tool_type == "read_file": - content = args.get("path", "") + # Plain path (back-compat) unless a line range is requested → JSON. + if args.get("offset") or args.get("limit"): + content = json.dumps(args) + else: + content = args.get("path", "") + elif tool_type in ("grep", "glob", "ls"): + content = json.dumps(args) if args else "{}" elif tool_type == "write_file": content = args.get("path", "") + "\n" + args.get("content", "") + elif tool_type == "edit_file": + content = json.dumps(args) elif tool_type == "create_document": parts = [args.get("title", "Untitled")] if args.get("language"): @@ -1071,14 +1258,24 @@ def function_call_to_tool_block(name: str, arguments: str) -> Optional[ToolBlock content = "\n".join(parts) elif tool_type == "edit_document": blocks = [] - for edit in args.get("edits", []): + edits = args.get("edits", []) + if not isinstance(edits, list): + edits = [] + for edit in edits: + if not isinstance(edit, dict): + continue blocks.append( f'<<>>\n{edit.get("find", "")}\n<<>>\n{edit.get("replace", "")}\n<<>>' ) content = "\n".join(blocks) elif tool_type == "suggest_document": blocks = [] - for s in args.get("suggestions", []): + suggestions = args.get("suggestions", []) + if not isinstance(suggestions, list): + suggestions = [] + for s in suggestions: + if not isinstance(s, dict): + continue blocks.append( f'<<>>\n{s.get("find", "")}\n<<>>\n{s.get("replace", "")}\n<<>>\n{s.get("reason", "")}\n<<>>' ) diff --git a/src/tool_security.py b/src/tool_security.py index eea95426b..82d2c3d67 100644 --- a/src/tool_security.py +++ b/src/tool_security.py @@ -16,6 +16,10 @@ NON_ADMIN_BLOCKED_TOOLS = { "python", "read_file", "write_file", + "edit_file", + "grep", + "glob", + "ls", "search_chats", "manage_memory", "manage_skills", @@ -40,16 +44,120 @@ NON_ADMIN_BLOCKED_TOOLS = { "vault_unlock", "download_model", "serve_model", + "serve_preset", "stop_served_model", "cancel_download", "adopt_served_model", } +# Plan mode: the agent may investigate but must not mutate anything. Only these +# read-only/inspection tools stay enabled; everything else (writes, sends, +# manage_*, model serving, MCP, etc.) is blocked. Allowlist rather than blocklist +# so any newly added tool defaults to BLOCKED in plan mode — fail safe. +# +# bash/python are deliberately NOT here: the shell can mutate (write files, hit +# the network) and can't be constrained to read-only at the tool layer, so plan +# mode blocks it outright rather than relying on a prompt to keep it well-behaved. +# Code/file discovery is covered by the dedicated read-only tools below +# (read_file, grep, glob, ls) instead of freestyle shell. +PLAN_MODE_READONLY_TOOLS = { + "read_file", + "grep", + "glob", + "ls", + "web_search", + "web_fetch", + "search_chats", + "list_models", + "list_sessions", + "list_emails", + "read_email", + "list_served_models", + "list_downloads", + "list_cached_models", + "search_hf_models", + "list_serve_presets", + "list_cookbook_servers", + "resolve_contact", + "chat_with_model", + "ask_teacher", +} + + +# The agent's tool gate is a DENYLIST: execute_tool_block blocks any tool whose +# name is in `disabled_tools`. Plan mode's policy is the opposite — an allowlist +# (PLAN_MODE_READONLY_TOOLS). To apply an allowlist through a denylist, plan mode +# returns the inverse: every known tool name minus the allowlist. +# +# Known tool names come from FUNCTION_TOOL_SCHEMAS, but that source is imperfect: +# some tools are only XML-invocable (e.g. manage_notes, generate_image) and never +# appear there, and the import can fail outright. Either gap would drop a mutating +# tool from the subtraction and silently leave it enabled. This set is the static +# backstop for both: union it in so known mutators are always subtracted, and so a +# failed import still blocks them (fail closed, never open). Only mutators belong +# here — read-only tools are covered by the allowlist. Keep in sync when adding +# new mutating tools. +_PLAN_MODE_KNOWN_MUTATORS = { + "write_file", "create_document", "edit_document", "update_document", + "suggest_document", "manage_documents", "create_session", "manage_session", + "send_to_session", "pipeline", "manage_memory", "manage_skills", + "manage_tasks", "manage_notes", "manage_endpoints", "manage_mcp", + "manage_webhooks", "manage_tokens", "manage_settings", "manage_contact", + "manage_calendar", "api_call", "app_api", "ui_control", + "send_email", "reply_to_email", "bulk_email", "delete_email", + "archive_email", "mark_email_read", "download_model", "serve_model", + "stop_served_model", "cancel_download", "adopt_served_model", "serve_preset", + "generate_image", "edit_image", "trigger_research", "manage_research", + # Shell is never read-only-safe; block it explicitly so it stays out of plan + # mode even if the schema list fails to load. + "bash", "python", +} + + +def plan_mode_disabled_tools() -> Set[str]: + """Tool names to add to the denylist in plan mode. + + Plan mode allows only PLAN_MODE_READONLY_TOOLS. The gate is a denylist, so + return the inverse: every known tool name minus the allowlist. Known names + come from the function-tool schemas, backstopped by _PLAN_MODE_KNOWN_MUTATORS + (see above) so XML-only tools and a failed schema import can't leave a mutator + enabled. MCP tools are handled separately — the loop drops the MCP manager + entirely in plan mode.""" + try: + # agent_tools / tool_parsing / tool_schemas form a mutually-circular + # cluster that only resolves cleanly when entered via agent_tools. + # Import it first so the lazy schema import works even from a cold + # import (e.g. tests) — not just after the app has wired everything up. + import src.agent_tools # noqa: F401 + from src.tool_schemas import FUNCTION_TOOL_SCHEMAS + + all_names = { + (t.get("function") or {}).get("name") + for t in FUNCTION_TOOL_SCHEMAS + } + all_names.discard(None) + except Exception as exc: + logger.warning("Unable to load tool schemas for plan-mode gating: %s", exc) + all_names = set() + # Subtract the allowlist from all known tool names (schema-derived plus the + # static mutator backstop). Fail closed: if the schema import failed above, + # the backstop alone still blocks known mutators. + return (all_names | _PLAN_MODE_KNOWN_MUTATORS) - PLAN_MODE_READONLY_TOOLS + + def is_public_blocked_tool(tool_name: Optional[str]) -> bool: - """Return True when a non-admin/public user must not execute this tool.""" - if not tool_name: + """Return True when a non-admin/public user must not execute this tool. + + This is a security gate, so it fails CLOSED: a malformed non-string tool + name can't be matched against the blocklist or the ``mcp__`` namespace, so + it is treated as blocked rather than silently allowed through. ``None`` / + empty string means there is no tool to gate. + """ + if tool_name is None or tool_name == "": return False + if not isinstance(tool_name, str): + return True return tool_name in NON_ADMIN_BLOCKED_TOOLS or tool_name.startswith("mcp__") diff --git a/src/tool_utils.py b/src/tool_utils.py new file mode 100644 index 000000000..cf71e78c5 --- /dev/null +++ b/src/tool_utils.py @@ -0,0 +1,39 @@ +""" +This module intentionally imports NOTHING from the project (except +src.constants which imports nothing from src). Adding a project import here +will reintroduce the circular dependency that this module exists to break. +""" + +from src.constants import MAX_OUTPUT_CHARS + +_mcp_manager = None + +# --------------------------------------------------------------------------- +# MCP Manager singleton +# --------------------------------------------------------------------------- + +def set_mcp_manager(manager): + """Set the global MCP manager instance.""" + global _mcp_manager + _mcp_manager = manager + +def get_mcp_manager(): + """Get the global MCP manager instance.""" + return _mcp_manager + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- +def _truncate(text: str, limit: int = MAX_OUTPUT_CHARS) -> str: + """ + Truncate text to *limit* characters with a suffix note. + + Callers treat the result as text, so always return a string: coerce a + non-string (None -> "", otherwise str(...)) instead of returning it raw, + which would just move the crash downstream. + """ + if not isinstance(text, str): + text = "" if text is None else str(text) + if len(text) > limit: + return text[:limit] + f"\n... (truncated, {len(text)} chars total)" + return text diff --git a/src/topic_analyzer.py b/src/topic_analyzer.py index 0f1dae8db..4509baf84 100644 --- a/src/topic_analyzer.py +++ b/src/topic_analyzer.py @@ -23,22 +23,41 @@ def analyze_topics(session_manager, owner: str = None) -> Dict[str, Any]: Scan non-archived sessions and return topic frequency data. If owner is set, only include sessions belonging to that user. + When `owner` is None or empty the helper returns an empty result. The + unauthenticated-loopback path in `app.py` produces a None owner, and + silently aggregating topic frequencies in that case is a cross-tenant + data leak. Callers that want a system-wide aggregate must pass an + explicit `owner` string (e.g. a documented "admin" pseudo-owner) or + the route must reject the request with 401. + Returns dict with "topics" list and "total_topics" count. """ + if not owner: + return {"topics": [], "total_topics": 0} + topic_counts: Dict[str, int] = {t: 0 for t in TOPIC_KEYWORDS} topic_matches: Dict[str, list] = {t: [] for t in TOPIC_KEYWORDS} for session_id, session_data in session_manager.sessions.items(): if session_data.get("archived", False): continue - # SECURITY: strict ownership — the previous predicate let any - # null-owner session feed into another user's topic analysis. - if owner: - sess_owner = session_data.get("owner") or getattr(session_data, "owner", None) - if sess_owner != owner: - continue + # Strict ownership: any session whose owner does not match the + # caller is excluded. Ownerless sessions are never included + # unless the caller is itself ownerless (which the early return + # above already prevents). + sess_owner = session_data.get("owner") or getattr(session_data, "owner", None) + if sess_owner != owner: + continue - for msg in session_data.get("history", []): + # Hydrate session to load history from DB if needed + if hasattr(session_manager, "get_session"): + hydrated_session = session_manager.get_session(session_id) + history = hydrated_session.history + else: + hydrated_session = session_data + history = session_data.get("history", []) + + for msg in history: content_raw = msg.get("content") if isinstance(msg, dict) else getattr(msg, "content", None) if not content_raw: continue @@ -49,11 +68,11 @@ def analyze_topics(session_manager, owner: str = None) -> Dict[str, Any]: for topic, keywords in TOPIC_KEYWORDS.items(): for kw in keywords: - if kw in content: + if re.search(rf"\b{re.escape(kw)}\b", content): topic_counts[topic] += 1 sentences = re.split(r'[.!?]', str(content_raw)) for sentence in sentences: - if kw in sentence.lower(): + if re.search(rf"\b{re.escape(kw)}\b", sentence.lower()): topic_matches[topic].append({ "session_id": session_id, "session_name": session_name, diff --git a/src/upload_handler.py b/src/upload_handler.py index 9dce6983c..95bce306d 100644 --- a/src/upload_handler.py +++ b/src/upload_handler.py @@ -6,10 +6,16 @@ import uuid import time import hashlib import mimetypes +import shutil +import tempfile import threading from datetime import datetime, timedelta from typing import Dict, Any, Optional from fastapi import HTTPException, UploadFile + +from src.upload_limits import format_byte_limit, get_chat_upload_max_bytes + + def secure_filename(filename: str) -> str: """Sanitize a filename (replaces werkzeug.utils.secure_filename).""" import unicodedata @@ -29,14 +35,58 @@ import logging logger = logging.getLogger(__name__) +# The extension is optional: save_upload builds the id as `{uuid.hex}{ext}`, +# and a file with no extension (Dockerfile, README, ...) yields a bare 32-hex +# id. Requiring `.ext` made those ids fail validation, so the stored file +# could never be resolved or downloaded again. +UPLOAD_ID_RE = re.compile(r"^[0-9a-fA-F]{32}(?:\.[A-Za-z0-9]+)?$") + + +def is_valid_upload_id(upload_id: str) -> bool: + """Return True when *upload_id* matches the canonical uploads.json id format.""" + return UPLOAD_ID_RE.fullmatch(upload_id or "") is not None + + +def _build_upload_id(safe_filename: str) -> str: + """Build a unique upload id whose extension matches UPLOAD_ID_RE. + + secure_filename keeps '_' and '-', so an extension like '.jpg-1' (the + suffix browsers append to duplicate downloads) or '.v1_final' produced an + id that failed is_valid_upload_id, making the saved file permanently + unreadable (every read path gates on validate_upload_id). Sanitize the + extension to the single-alnum shape the id contract requires. + """ + _, ext = os.path.splitext(safe_filename or "") + ext = re.sub(r"[^A-Za-z0-9]", "", ext) + return uuid.uuid4().hex + (("." + ext) if ext else "") + + +def count_recent_uploads(timestamps, now: float, window: float = 10.0) -> int: + """Number of upload events in *timestamps* within the last *window* seconds. + + Used by the per-IP concurrency guard. The count is of genuine prior upload + events — it must NOT scale with how many files are in the *current* request, + or a single multi-file batch would reject itself (issue #1346).""" + if not timestamps: + return 0 + cutoff = now - window + return sum(1 for t in timestamps if t > cutoff) + + class UploadHandler: def __init__(self, base_dir: str, upload_dir: str): self.base_dir = base_dir self.upload_dir = upload_dir - self.max_upload_size = 10 * 1024 * 1024 # 10MB + self.max_upload_size = get_chat_upload_max_bytes() self.max_concurrent_uploads = 3 self.cleanup_days = 30 - self.upload_rate_limit = 5 # Max 5 uploads per minute per IP + # Per-IP per-minute cap. save_upload() counts EACH file, and the chat + # composer lets a user attach up to MAX_FILES (10, static/js/fileHandler.js) + # in one batch — so this must comfortably exceed 10, or a single 6+ file + # attach is rejected mid-batch (issue #1346: "5 work, 6 fail"). Burst abuse + # is separately bounded by max_concurrent_uploads. Headroom for a few full + # batches per minute. + self.upload_rate_limit = 60 # max 60 file-uploads per minute per IP self.upload_rate_window = 60 # 60 seconds # Track upload rates @@ -44,6 +94,13 @@ class UploadHandler: self._upload_rate_lock = threading.Lock() self._upload_rate_counter = 0 self._upload_rate_max_entries = 1000 + # Serialise the read-modify-write of uploads.json within one + # Python process. Scope: single FastAPI worker (the default + # uvicorn deployment). Cross-process / multi-worker deployments + # need an additional file-level lock (flock) or a database; + # the atomic-rename write below keeps on-disk state consistent + # on its own but does not serialise writers across processes. + self._index_lock = threading.Lock() # Create upload directory os.makedirs(self.upload_dir, exist_ok=True) @@ -120,14 +177,19 @@ class UploadHandler: def is_document_file(self, filename: str, content_type: str = None) -> bool: """Check if a file is a document based on extension or content type.""" document_extensions = { - '.pdf', '.docx', '.txt', '.py', '.js', '.html', '.htm', - '.css', '.json', '.md', '.csv', '.log', '.xml', '.yml', - '.yaml', '.sql', '.sh', '.bash', '.c', '.cpp', '.h', + '.pdf', '.docx', '.xlsx', '.pptx', '.xls', '.epub', + '.txt', '.py', '.js', '.html', '.htm', + '.css', '.json', '.md', '.csv', '.log', '.xml', '.yml', + '.yaml', '.nix', '.sql', '.sh', '.bash', '.c', '.cpp', '.h', '.java', '.go', '.rs', '.php', '.rb', '.ts', '.jsx', '.tsx' } document_mime_types = { 'application/pdf', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + 'application/vnd.openxmlformats-officedocument.presentationml.presentation', + 'application/vnd.ms-excel', + 'application/epub+zip', 'text/plain' } @@ -223,8 +285,7 @@ class UploadHandler: def validate_upload_id(self, upload_id: str) -> bool: """Validate that the upload ID matches the expected pattern.""" - pattern = r'^[0-9a-fA-F]{32}\.[A-Za-z0-9]+$' - return re.fullmatch(pattern, upload_id) is not None + return is_valid_upload_id(upload_id) def _inside_upload_dir(self, path: str) -> bool: """Check if path is inside the upload directory.""" @@ -235,17 +296,52 @@ class UploadHandler: except Exception: return False + def _atomic_write_json(self, path: str, data: dict) -> None: + """Write `data` to `path` atomically: write to a temp file in the + same directory, then `os.replace` onto the target. The kernel + guarantees `os.replace` is atomic on POSIX, so a reader either + sees the old contents or the new contents, never a half-written + file. Also keeps a `.bak` sibling of the previous good state. + """ + directory = os.path.dirname(path) or "." + fd, tmp = tempfile.mkstemp(prefix=".uploads-", suffix=".tmp", dir=directory) + try: + with os.fdopen(fd, "w", encoding="utf-8") as f: + json.dump(data, f, indent=2) + f.flush() + os.fsync(f.fileno()) + if os.path.exists(path): + bak = path + ".bak" + try: + shutil.copy2(path, bak) + except OSError: + pass + os.replace(tmp, path) + except Exception: + try: + os.unlink(tmp) + except OSError: + pass + raise + def _load_upload_index(self) -> Dict[str, Any]: uploads_db_path = os.path.join(self.upload_dir, "uploads.json") if not os.path.exists(uploads_db_path): return {} - try: - with open(uploads_db_path, "r") as f: - data = json.load(f) - return data if isinstance(data, dict) else {} - except Exception as e: - logger.warning(f"Failed to read uploads database: {e}") - return {} + # Try the live file first, fall back to the .bak sibling if the + # live file is truncated/corrupted (e.g. a previous writer was + # SIGKILL'd mid-rename before the new code path was deployed). + for candidate in (uploads_db_path, uploads_db_path + ".bak"): + if not os.path.exists(candidate): + continue + try: + with open(candidate, "r", encoding="utf-8") as f: + data = json.load(f) + return data if isinstance(data, dict) else {} + except Exception as e: + logger.warning(f"Failed to read uploads database ({candidate}): {e}") + continue + return {} def get_upload_info(self, upload_id: str) -> Optional[Dict[str, Any]]: """Return the uploads.json metadata row for an upload ID, if present.""" @@ -426,7 +522,7 @@ class UploadHandler: if file_size > self.max_upload_size: raise HTTPException( status_code=400, - detail=f"File size exceeds {self.max_upload_size/1024/1024}MB limit" + detail=f"File size exceeds {format_byte_limit(self.max_upload_size)} limit" ) # Get original filename and sanitize it @@ -446,56 +542,79 @@ class UploadHandler: # Calculate file hash for deduplication file_hash = self.calculate_file_hash(file_obj) - # Check for duplicate files + # Check for duplicate files. + # The duplicate-detection lookup AND the write must both happen + # under _index_lock: a duplicate upload racing with a new-entry + # insert must not overwrite a newer snapshot of the index with + # the stale one read before the insert. uploads_db_path = os.path.join(self.upload_dir, "uploads.json") - existing_files = {} - - if os.path.exists(uploads_db_path): - try: - with open(uploads_db_path, "r", encoding="utf-8") as f: - existing_files = json.load(f) - except Exception as e: - logger.warning(f"Failed to read uploads database: {e}") - - # Check if this hash already exists for the same owner. Uploads are - # access-controlled by owner, so cross-user dedupe must not return a - # shared file ID. - existing_key = None existing_file = None - for key, info in existing_files.items(): - if info.get("hash") == file_hash and info.get("owner") == owner: - existing_key = key - existing_file = info - break + existing_key = None + with self._index_lock: + existing_files = self._load_upload_index() + stale_keys = [] + for key, info in existing_files.items(): + if info.get("hash") == file_hash and info.get("owner") == owner: + stored_path = info.get("path") + if stored_path and os.path.exists(stored_path) and self._inside_upload_dir(stored_path): + existing_key = key + existing_file = info + break + stale_keys.append(key) + if stale_keys: + for key in stale_keys: + existing_files.pop(key, None) + try: + self._atomic_write_json(uploads_db_path, existing_files) + logger.info("Removed %d stale upload index entries for missing duplicates", len(stale_keys)) + except Exception as e: + logger.warning(f"Failed to remove stale upload index entries: {e}") if existing_file: logger.info(f"Duplicate file upload detected: {original_filename} -> {existing_file['id']}") - + existing_file["last_accessed"] = datetime.now().isoformat() - existing_files[existing_key] = existing_file - - try: - with open(uploads_db_path, "w", encoding="utf-8") as f: - json.dump(existing_files, f, indent=2) - except Exception as e: - logger.warning(f"Failed to update uploads database: {e}") - - return { - "id": existing_file["id"], - "path": existing_file["path"], - "mime": existing_file["mime"], - "size": existing_file["size"], - "name": existing_file["original_name"], - "hash": file_hash, - "uploaded_at": existing_file["uploaded_at"], - "owner": existing_file.get("owner"), - "width": existing_file.get("width"), - "height": existing_file.get("height"), - "is_duplicate": True - } + with self._index_lock: + try: + current = self._load_upload_index() + # Re-resolve the key inside the lock: a concurrent + # insert can have changed the dict's keys. + live_key = existing_key + if live_key not in current: + for k, v in current.items(): + if v.get("hash") == file_hash and v.get("owner") == owner: + live_key = k + existing_file = v + break + if live_key is None: + # No matching entry anymore (e.g. cleaned up between + # the outer read and the write). Fall through to the + # fresh-insert path below; release the lock first. + raise LookupError("upload entry vanished mid-dedupe") + existing_file["last_accessed"] = datetime.now().isoformat() + current[live_key] = existing_file + self._atomic_write_json(uploads_db_path, current) + except LookupError: + existing_file = None + except Exception as e: + logger.warning(f"Failed to update uploads database: {e}") + + if existing_file: + return { + "id": existing_file["id"], + "path": existing_file["path"], + "mime": existing_file["mime"], + "size": existing_file["size"], + "name": existing_file["original_name"], + "hash": file_hash, + "uploaded_at": existing_file["uploaded_at"], + "owner": existing_file.get("owner"), + "width": existing_file.get("width"), + "height": existing_file.get("height"), + "is_duplicate": True + } # Generate unique ID and determine save location - _, ext = os.path.splitext(safe_filename) - file_id = f"{uuid.uuid4().hex}{ext}" + file_id = _build_upload_id(safe_filename) # Create date-based directory structure upload_dir = self.get_upload_dir() @@ -536,24 +655,14 @@ class UploadHandler: logger.warning(f"Failed to read image dimensions for {file_id}: {e}") # Update uploads database - try: - if os.path.exists(uploads_db_path): - try: - with open(uploads_db_path, "r", encoding="utf-8") as f: - all_files = json.load(f) - except Exception: - all_files = {} - else: - all_files = {} - - storage_key = f"{owner}:{file_hash}" if owner else file_hash - all_files[storage_key] = file_metadata - - with open(uploads_db_path, "w", encoding="utf-8") as f: - json.dump(all_files, f, indent=2) - - except Exception as e: - logger.warning(f"Failed to update uploads database: {e}") + with self._index_lock: + try: + current = self._load_upload_index() if os.path.exists(uploads_db_path) else {} + storage_key = f"{owner}:{file_hash}" if owner else file_hash + current[storage_key] = file_metadata + self._atomic_write_json(uploads_db_path, current) + except Exception as e: + logger.warning(f"Failed to update uploads database: {e}") logger.info(f"File uploaded successfully: {original_filename} ({file_size} bytes)") return file_metadata diff --git a/src/upload_limits.py b/src/upload_limits.py new file mode 100644 index 000000000..2be42077b --- /dev/null +++ b/src/upload_limits.py @@ -0,0 +1,72 @@ +"""Small helpers for route-local upload size caps.""" + +import os + +from fastapi import HTTPException, UploadFile + +DEFAULT_CHAT_UPLOAD_MAX_BYTES = 10 * 1024 * 1024 +CHAT_UPLOAD_MAX_BYTES_ENV = "ODYSSEUS_CHAT_UPLOAD_MAX_BYTES" + + +def format_byte_limit(limit: int) -> str: + if limit % (1024 * 1024) == 0: + return f"{limit // (1024 * 1024)} MB" + if limit % 1024 == 0: + return f"{limit // 1024} KB" + return f"{limit} bytes" + + +def read_byte_limit_env(name: str, default: int) -> int: + raw = os.getenv(name) + if raw is None or not raw.strip(): + return default + try: + limit = int(raw) + except ValueError as exc: + raise ValueError(f"{name} must be an integer byte count") from exc + if limit < 1: + raise ValueError(f"{name} must be greater than 0") + return limit + + +def get_chat_upload_max_bytes() -> int: + return read_byte_limit_env(CHAT_UPLOAD_MAX_BYTES_ENV, DEFAULT_CHAT_UPLOAD_MAX_BYTES) + + +# Per-route upload byte-limits, single-sourced here (issue #3364). Each is +# validated + env-overridable via read_byte_limit_env: set the matching +# ODYSSEUS_*_MAX_BYTES env var to an integer byte count to tune it; an invalid +# value fails fast at import rather than crashing mid-request. Defaults match +# the prior per-route values, so behavior is unchanged unless an env var is set. +GALLERY_UPLOAD_MAX_BYTES = read_byte_limit_env( + "ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES", 100 * 1024 * 1024 +) +GALLERY_TRANSFORM_UPLOAD_MAX_BYTES = read_byte_limit_env( + "ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES", 25 * 1024 * 1024 +) +MEMORY_IMPORT_MAX_BYTES = read_byte_limit_env( + "ODYSSEUS_MEMORY_IMPORT_MAX_BYTES", 10 * 1024 * 1024 +) +PERSONAL_UPLOAD_MAX_BYTES = read_byte_limit_env( + "ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES", 25 * 1024 * 1024 +) +EMAIL_COMPOSE_UPLOAD_MAX_BYTES = read_byte_limit_env( + "ODYSSEUS_EMAIL_COMPOSE_UPLOAD_MAX_BYTES", 25 * 1024 * 1024 +) +STT_MAX_AUDIO_BYTES = read_byte_limit_env( + "ODYSSEUS_STT_MAX_AUDIO_BYTES", 25 * 1024 * 1024 +) +ICS_MAX_BYTES = read_byte_limit_env( + "ODYSSEUS_ICS_MAX_BYTES", 10 * 1024 * 1024 +) + + +async def read_upload_limited(upload: UploadFile, limit: int, label: str = "Upload") -> bytes: + """Read an UploadFile with a hard byte cap.""" + data = await upload.read(limit + 1) + if len(data) > limit: + raise HTTPException( + status_code=413, + detail=f"{label} exceeds {format_byte_limit(limit)} limit", + ) + return data diff --git a/src/url_safety.py b/src/url_safety.py new file mode 100644 index 000000000..cc681703a --- /dev/null +++ b/src/url_safety.py @@ -0,0 +1,90 @@ +"""Outbound URL safety checks (SSRF hardening). + +Run before the server makes a request to a *user-supplied* URL — e.g. the custom +embedding endpoint set via ``POST /api/embeddings/endpoint``, which then triggers +an outbound ``httpx`` call. + +Odysseus is local-first: pointing the embedding endpoint at a loopback or LAN +address (a local vLLM / llama.cpp / Ollama server) is a normal, intended setup. +So this guard does **not** blanket-block private addresses by default — that would +break the primary use case. What it *always* rejects: + + - a non-HTTP(S) scheme (``file://``, ``gopher://``, ``ftp://`` …), and + - the link-local range (``169.254.0.0/16`` / ``fe80::/10``), i.e. the cloud + instance-metadata SSRF credential-exfil vector — nobody serves embeddings + there — plus multicast / reserved / unspecified addresses. + +For exposed multi-tenant deployments, set ``EMBEDDING_BLOCK_PRIVATE_IPS=true`` to +additionally reject all private and loopback targets (full SSRF lockdown). +""" + +import ipaddress +import socket +from typing import Callable, List, Optional, Tuple +from urllib.parse import urlparse + +ALLOWED_SCHEMES = ("http", "https") + + +def _default_resolver(host: str) -> List[str]: + """Resolve a hostname to the list of IP strings it maps to (A + AAAA).""" + return [info[4][0] for info in socket.getaddrinfo(host, None)] + + +def _classify(ip: ipaddress._BaseAddress, *, block_private: bool) -> Optional[str]: + """Return a rejection reason for an IP, or None if it is allowed.""" + # IPv4-mapped IPv6 (e.g. ::ffff:169.254.169.254) — judge the embedded v4. + if isinstance(ip, ipaddress.IPv6Address) and ip.ipv4_mapped is not None: + ip = ip.ipv4_mapped + if ip.is_link_local: + return f"link-local address blocked (SSRF metadata risk): {ip}" + if ip.is_multicast or ip.is_reserved or ip.is_unspecified: + return f"disallowed address: {ip}" + if block_private and (ip.is_private or ip.is_loopback): + return f"private/loopback address blocked: {ip}" + return None + + +def check_outbound_url( + url: str, + *, + block_private: bool = False, + resolver: Optional[Callable[[str], List[str]]] = None, +) -> Tuple[bool, str]: + """Validate a user-supplied outbound URL. + + Returns ``(ok, reason)``. ``ok`` is True only when the URL is safe to fetch. + ``resolver`` is injectable so callers/tests can avoid real DNS. + """ + if not isinstance(url, str): + return False, "URL must be a string" + if not url or not url.strip(): + return False, "URL is required" + try: + parsed = urlparse(url.strip()) + except Exception as e: # pragma: no cover - urlparse is very tolerant + return False, f"unparseable URL: {e}" + + if parsed.scheme.lower() not in ALLOWED_SCHEMES: + return False, f"scheme must be http or https, got '{parsed.scheme or '(none)'}'" + host = parsed.hostname + if not host: + return False, "URL has no host" + + resolve = resolver or _default_resolver + try: + raw_ips = resolve(host) + except Exception as e: + return False, f"host does not resolve: {e}" + if not raw_ips: + return False, "host does not resolve" + + for raw in raw_ips: + try: + ip = ipaddress.ip_address(raw.split("%")[0]) # strip IPv6 zone id + except ValueError: + continue + reason = _classify(ip, block_private=block_private) + if reason: + return False, reason + return True, "ok" diff --git a/src/url_security.py b/src/url_security.py new file mode 100644 index 000000000..8deb04883 --- /dev/null +++ b/src/url_security.py @@ -0,0 +1,94 @@ +"""URL validation helpers for server-side outbound requests.""" + +from __future__ import annotations + +import ipaddress +import socket +from urllib.parse import urlparse + + +_INTERNAL_HOSTNAMES = { + "localhost", + "metadata", + "metadata.google.internal", +} + +_INTERNAL_SUFFIXES = ( + ".localhost", + ".local", + ".internal", + ".lan", + ".intranet", +) + +_BLOCKED_NETWORKS = ( + ipaddress.ip_network("0.0.0.0/8"), + ipaddress.ip_network("10.0.0.0/8"), + ipaddress.ip_network("100.64.0.0/10"), + ipaddress.ip_network("127.0.0.0/8"), + ipaddress.ip_network("169.254.0.0/16"), + ipaddress.ip_network("172.16.0.0/12"), + ipaddress.ip_network("192.168.0.0/16"), + ipaddress.ip_network("::/128"), + ipaddress.ip_network("::1/128"), + ipaddress.ip_network("fc00::/7"), + ipaddress.ip_network("fe80::/10"), +) + + +def _resolve_hostname_ips(hostname: str) -> list[ipaddress._BaseAddress]: + ips: list[ipaddress._BaseAddress] = [] + for family, _, _, _, sockaddr in socket.getaddrinfo(hostname, None): + if family in (socket.AF_INET, socket.AF_INET6): + ips.append(ipaddress.ip_address(sockaddr[0])) + return ips + + +def _blocked_ip(addr: ipaddress._BaseAddress) -> bool: + return ( + any(addr in net for net in _BLOCKED_NETWORKS) + or addr.is_private + or addr.is_loopback + or addr.is_link_local + or addr.is_multicast + or addr.is_unspecified + or addr.is_reserved + ) + + +def _host_resolves_publicly(hostname: str) -> bool: + host = hostname.strip().lower() + if host in _INTERNAL_HOSTNAMES or host.endswith(_INTERNAL_SUFFIXES): + return False + try: + return not _blocked_ip(ipaddress.ip_address(host)) + except ValueError: + pass + try: + addrs = _resolve_hostname_ips(host) + except OSError: + return False + return bool(addrs) and all(not _blocked_ip(addr) for addr in addrs) + + +def is_public_http_url(url: str) -> bool: + parsed = urlparse((url or "").strip()) + if parsed.scheme not in ("http", "https") or not parsed.hostname: + return False + return _host_resolves_publicly(parsed.hostname) + + +def validate_public_http_url(url: str, *, max_length: int = 2048) -> str: + """Validate a user/API-token supplied server-side HTTP(S) endpoint. + + This is for untrusted outbound URLs, not admin-created model endpoints + that are intentionally allowed to point at private model providers. DNS + failures fail closed, and DNS checks reduce obvious private-network + targets but do not eliminate every DNS rebinding race by themselves. + """ + cleaned = (url or "").strip() + if len(cleaned) > max_length: + raise ValueError("URL is too long") + if not is_public_http_url(cleaned): + raise ValueError("URL must point to a public HTTP(S) endpoint") + return cleaned diff --git a/src/user_time.py b/src/user_time.py new file mode 100644 index 000000000..44519c0fb --- /dev/null +++ b/src/user_time.py @@ -0,0 +1,138 @@ +"""Per-request user-local time helpers. + +Chat routes set this context from browser headers. Prompt builders and tools +can then resolve relative dates against the user's clock instead of the server. +""" + +from __future__ import annotations + +import re +from contextvars import ContextVar +from datetime import datetime, timedelta, timezone +from typing import Optional + + +_USER_TZ_OFFSET_MIN: ContextVar[Optional[int]] = ContextVar("user_tz_offset_min", default=None) +_USER_TZ_NAME: ContextVar[Optional[str]] = ContextVar("user_tz_name", default=None) + + +def set_user_tz_offset(offset_min) -> None: + """Set the current user's UTC offset in minutes east of UTC.""" + if offset_min in (None, ""): + _USER_TZ_OFFSET_MIN.set(None) + return + try: + value = int(offset_min) + except (TypeError, ValueError): + return + if -14 * 60 <= value <= 14 * 60: + _USER_TZ_OFFSET_MIN.set(value) + + +def get_user_tz_offset() -> Optional[int]: + """Return minutes east of UTC for the current user, if known.""" + return _USER_TZ_OFFSET_MIN.get() + + +def set_user_tz_name(name) -> None: + """Set a safe IANA timezone label for the current request context.""" + if not name: + _USER_TZ_NAME.set(None) + return + first_token = str(name).strip().split()[0] if str(name).strip() else "" + cleaned = re.sub(r"[^A-Za-z0-9_+\-./]", "", first_token)[:80] + _USER_TZ_NAME.set(cleaned or None) + + +def get_user_tz_name() -> Optional[str]: + """Return the current user's browser timezone name, if provided.""" + return _USER_TZ_NAME.get() + + +def clear_user_time_context() -> None: + """Clear user-local time context for tests and non-browser entry points.""" + _USER_TZ_OFFSET_MIN.set(None) + _USER_TZ_NAME.set(None) + + +def format_utc_offset(offset_min: Optional[int]) -> str: + """Format minutes east of UTC as +HH:MM or -HH:MM.""" + if offset_min is None: + offset_min = 0 + sign = "+" if offset_min >= 0 else "-" + total = abs(int(offset_min)) + hours, minutes = divmod(total, 60) + return f"{sign}{hours:02d}:{minutes:02d}" + + +def user_timezone() -> timezone: + """Return the best known user timezone as a fixed-offset tzinfo.""" + offset = get_user_tz_offset() + if offset is None: + name = get_user_tz_name() + if name: + try: + from zoneinfo import ZoneInfo + return ZoneInfo(name) + except Exception: + pass + return datetime.now().astimezone().tzinfo or timezone.utc + return timezone(timedelta(minutes=offset)) + + +def now_user_local(now_utc: Optional[datetime] = None) -> datetime: + """Return the current time in the user's timezone.""" + if now_utc is None: + now_utc = datetime.now(timezone.utc) + elif now_utc.tzinfo is None: + now_utc = now_utc.replace(tzinfo=timezone.utc) + return now_utc.astimezone(user_timezone()) + + +def _date_label(dt: datetime) -> str: + return f"{dt.strftime('%A')}, {dt.strftime('%B')} {dt.day}, {dt.year}" + + +def _clock_label(dt: datetime) -> str: + hour = dt.hour % 12 or 12 + return f"{hour}:{dt.minute:02d} {dt.strftime('%p')}" + + +def timezone_label(dt: Optional[datetime] = None) -> str: + """Return a concise display label such as Australia/Brisbane, UTC+10:00.""" + offset = get_user_tz_offset() + if offset is None: + if dt is None: + dt = datetime.now().astimezone() + offset = int((dt.utcoffset() or timedelta()).total_seconds() // 60) + offset_label = f"UTC{format_utc_offset(offset)}" + name = get_user_tz_name() + return f"{name}, {offset_label}" if name else offset_label + + +def current_datetime_prompt(now_utc: Optional[datetime] = None) -> str: + """Build reusable system prompt text for date/time reasoning.""" + if now_utc is None: + utc_now = datetime.now(timezone.utc) + elif now_utc.tzinfo is None: + utc_now = now_utc.replace(tzinfo=timezone.utc) + else: + utc_now = now_utc.astimezone(timezone.utc) + + local_now = now_user_local(utc_now) + tomorrow = local_now + timedelta(days=1) + return ( + "## Current date and time\n" + f"Today is {_date_label(local_now)} ({local_now.strftime('%Y-%m-%d')}). " + f"User local time is {_clock_label(local_now)} ({timezone_label(local_now)}); " + f"current UTC time is {utc_now.strftime('%H:%M')}.\n" + f"Tomorrow is {_date_label(tomorrow)} ({tomorrow.strftime('%Y-%m-%d')}) " + "in the user's local timezone.\n" + "Use this for any 'today', 'tomorrow', 'tonight', 'this week', or other " + "relative-date reasoning. Do not ask for an exact date just because the " + "user used a relative date.\n" + "When scheduling calendar events with manage_calendar, pass local ISO " + "datetimes resolved against this user-local date/time.\n" + "When scheduling a task with manage_tasks, scheduled_time is in UTC: " + "convert the user's stated local time using the UTC offset above.\n\n" + ) diff --git a/src/visual_report.py b/src/visual_report.py index 47cc55e19..b15c8001a 100644 --- a/src/visual_report.py +++ b/src/visual_report.py @@ -19,13 +19,33 @@ import re from datetime import datetime from typing import Dict, List, Optional, Tuple +from bs4 import BeautifulSoup + from src.research_utils import strip_thinking from urllib.parse import urlparse import markdown +import nh3 logger = logging.getLogger(__name__) +# Tags/attributes permitted in rendered research-report HTML. Starts from nh3's +# safe defaults (which drop @@ -432,14 +441,14 @@