diff --git a/.env.example b/.env.example index f282880bc..5382c23c7 100644 --- a/.env.example +++ b/.env.example @@ -56,6 +56,13 @@ SEARXNG_INSTANCE=http://localhost:8080 # SQLite database path (default: sqlite:///./data/app.db) # DATABASE_URL=sqlite:///./data/app.db +# ============================================================ +# Data directory +# ============================================================ +# Move everything that lives under data/ - settings, sessions, database, auth, +# cache, uploads, etc. - to another path: +# ODYSSEUS_DATA_DIR=C:\path\to\dir + # ============================================================ # Auth & Security # ============================================================ @@ -112,6 +119,9 @@ SEARXNG_INSTANCE=http://localhost:8080 # Default: http://{LLM_HOST}:11434/v1/embeddings (ollama) # EMBEDDING_URL=http://localhost:11434/v1/embeddings +# Embedding API key (if there's one) +# EMBEDDING_API_KEY=embedding_api_key_here + # Embedding model name (must be available at the endpoint above) # EMBEDDING_MODEL=all-minilm:l6-v2 @@ -144,6 +154,21 @@ SEARXNG_INSTANCE=http://localhost:8080 # if you intentionally want scheduled scripts to run remotely. # ODYSSEUS_SCRIPT_HOST=localhost +# Chat / agent attachment size cap in bytes (default: 10 MB). +# Raise this for local installs that need larger PDFs or text documents. +# Example: 52428800 = 50 MB. +# ODYSSEUS_CHAT_UPLOAD_MAX_BYTES=10485760 + +# Other per-feature upload size caps in bytes. All are validated and optional; +# defaults shown. An invalid value (non-integer or < 1) fails fast at startup. +# ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES=104857600 # gallery image upload (100 MB) +# ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES=26214400 # gallery transform input (25 MB) +# ODYSSEUS_MEMORY_IMPORT_MAX_BYTES=10485760 # memory import file (10 MB) +# ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES=26214400 # personal document upload (25 MB) +# ODYSSEUS_EMAIL_COMPOSE_UPLOAD_MAX_BYTES=26214400 # email compose attachment (25 MB) +# ODYSSEUS_STT_MAX_AUDIO_BYTES=26214400 # speech-to-text audio (25 MB) +# ODYSSEUS_ICS_MAX_BYTES=10485760 # calendar .ics import (10 MB) + # ============================================================ # GPU support (Docker Compose) # ============================================================ diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 67d84b1ff..64f2d7dcf 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -23,7 +23,7 @@ body: required: true - label: This is **not** a security vulnerability. (Vulnerabilities go to [GitHub Security Advisories](https://github.com/pewdiepie-archdaemon/odysseus/security/advisories/new) — see [SECURITY.md](https://github.com/pewdiepie-archdaemon/odysseus/blob/main/SECURITY.md).) required: true - - label: I am running the latest code from `main`. + - label: I am running the latest code from the `dev` branch (the default branch you get on clone, where fixes land first) and the bug still reproduces there. Please `git pull` the latest `dev` before filing. required: true - type: dropdown diff --git a/.github/scripts/check-pr-description.js b/.github/scripts/check-pr-description.js index 2a06c2b36..f5dabea5d 100644 --- a/.github/scripts/check-pr-description.js +++ b/.github/scripts/check-pr-description.js @@ -103,14 +103,21 @@ module.exports = async ({ github, context, core }) => { async function swapLabel(num, add, remove) { if (await labelExists(add)) { - await github.rest.issues.addLabels({ owner, repo, issue_number: num, labels: [add] }); + try { + await github.rest.issues.addLabels({ owner, repo, issue_number: num, labels: [add] }); + } catch (e) { + // Fail soft on a token that can't write labels so a label permission + // problem never masks the actual description verdict. + if (e.status !== 403) throw e; + core.warning(`Could not add "${add}" — token lacks label write here; skipping.`); + } } else { core.warning(`Label "${add}" does not exist in the repo — skipping. Create it once to enable labelling.`); } try { await github.rest.issues.removeLabel({ owner, repo, issue_number: num, name: remove }); } catch (e) { - if (e.status !== 404 && e.status !== 410) throw e; + if (e.status !== 404 && e.status !== 410 && e.status !== 403) throw e; } } diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3978ef5f7..818495d14 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,6 +20,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + with: + persist-credentials: false - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: python-version: "3.11" @@ -31,6 +33,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + with: + persist-credentials: false - uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4 with: node-version: "20" @@ -51,10 +55,40 @@ jobs: continue-on-error: true steps: - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + with: + fetch-depth: 0 + persist-credentials: false + + # Detect whether this PR only touches documentation files. + # If so, skip the expensive pytest run while still reporting a passing check. + - name: Check for docs-only changes + id: docs-check + run: | + if [ "${{ github.event_name }}" = "pull_request" ]; then + BASE="${{ github.event.pull_request.base.sha }}" + HEAD="${{ github.event.pull_request.head.sha }}" + else + BASE="${{ github.event.before }}" + HEAD="${{ github.sha }}" + fi + # List all changed files; if every file matches docs/markdown patterns, skip pytest. + changed=$(git diff --name-only "$BASE" "$HEAD" 2>/dev/null || git diff --name-only HEAD~1 HEAD) + non_docs=$(echo "$changed" | grep -Ev '^(docs/|.*\.md$|\.github/[^/]+\.md$)' || true) + if [ -z "$non_docs" ]; then + echo "docs_only=true" >> "$GITHUB_OUTPUT" + echo "Docs-only change detected — skipping pytest." + else + echo "docs_only=false" >> "$GITHUB_OUTPUT" + fi + - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + if: steps.docs-check.outputs.docs_only != 'true' with: python-version: "3.11" cache: pip - run: pip install -r requirements.txt + if: steps.docs-check.outputs.docs_only != 'true' - run: mkdir -p data # sqlite DB lives at ./data/app.db + if: steps.docs-check.outputs.docs_only != 'true' - run: python -m pytest -q + if: steps.docs-check.outputs.docs_only != 'true' diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml new file mode 100644 index 000000000..5e822ab07 --- /dev/null +++ b/.github/workflows/docker-publish.yml @@ -0,0 +1,140 @@ +name: ci / docker publish + +# Build the Odysseus image and publish to GHCR. +# push to main -> :latest, :X.Y.Z (curated release; main is fast-forwarded at releases) +# push to dev -> :dev, :X.Y.Z-dev. (rolling dev + an immutable, traceable pin) +# Multi-arch (linux/amd64 + linux/arm64): each arch builds on its own native +# runner and pushes by digest, then a merge job stitches the digests into one +# manifest list and applies the tags (faster + cleaner than QEMU emulation). +# Registry: ghcr.io//. + +on: + push: + branches: [dev, main] + paths-ignore: + - '**.md' + - 'docs/**' + - '.github/ISSUE_TEMPLATE/**' + +concurrency: + group: docker-publish-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +jobs: + build: + name: build (${{ matrix.arch }}) + runs-on: ${{ matrix.runner }} + permissions: + contents: read + packages: write + strategy: + fail-fast: false + matrix: + include: + - platform: linux/amd64 + arch: amd64 + runner: ubuntu-latest + - platform: linux/arm64 + arch: arm64 + runner: ubuntu-24.04-arm + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + - name: Set up Buildx + uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5 # v4.1.0 + - name: Log in to GHCR + uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Build and push by digest + id: build + uses: docker/build-push-action@f9f3042f7e2789586610d6e8b85c8f03e5195baf # v7.2.0 + with: + context: . + platforms: ${{ matrix.platform }} + outputs: type=image,name=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true + cache-from: type=gha,scope=${{ matrix.arch }} + cache-to: type=gha,mode=max,scope=${{ matrix.arch }} + - name: Export digest + run: | + mkdir -p /tmp/digests + digest="${{ steps.build.outputs.digest }}" + touch "/tmp/digests/${digest#sha256:}" + - name: Upload digest + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: digest-${{ matrix.arch }} + path: /tmp/digests/* + if-no-files-found: error + retention-days: 1 + + merge: + name: merge manifest + tag + runs-on: ubuntu-latest + needs: build + permissions: + contents: read + packages: write + steps: + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 + with: + persist-credentials: false + - name: Read APP_VERSION + short sha + id: ver + run: | + v=$(grep -E '^APP_VERSION' src/constants.py | head -1 | sed -E 's/.*"([^"]+)".*/\1/') + [ -n "$v" ] || { echo "APP_VERSION not found"; exit 1; } + echo "version=$v" >> "$GITHUB_OUTPUT" + echo "short=${GITHUB_SHA::7}" >> "$GITHUB_OUTPUT" + - name: Download digests + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + path: /tmp/digests + pattern: digest-* + merge-multiple: true + - name: Set up Buildx + uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5 # v4.1.0 + - name: Log in to GHCR + uses: docker/login-action@650006c6eb7dba73a995cc03b0b2d7f5ca915bee # v4.2.0 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Compute tags + id: meta + uses: docker/metadata-action@80c7e94dd9b9319bd5eb7a0e0fe9291e23a2a2e9 # v6.1.0 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }} + type=raw,value=${{ steps.ver.outputs.version }},enable=${{ github.ref == 'refs/heads/main' }} + type=raw,value=dev,enable=${{ github.ref == 'refs/heads/dev' }} + type=raw,value=${{ steps.ver.outputs.version }}-dev.${{ steps.ver.outputs.short }},enable=${{ github.ref == 'refs/heads/dev' }} + - name: Create manifest list + push tags + working-directory: /tmp/digests + run: | + tags=$(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") + digests=$(printf "${REGISTRY}/${IMAGE_NAME}@sha256:%s " *) + # word-splitting is intended: $tags and $digests each expand to multiple args + # shellcheck disable=SC2086 + docker buildx imagetools create $tags $digests + env: + REGISTRY: ${{ env.REGISTRY }} + IMAGE_NAME: ${{ env.IMAGE_NAME }} + - name: Inspect + run: | + if [ "$GITHUB_REF" = "refs/heads/main" ]; then ref=latest; else ref=dev; fi + docker buildx imagetools inspect "${REGISTRY}/${IMAGE_NAME}:${ref}" + env: + REGISTRY: ${{ env.REGISTRY }} + IMAGE_NAME: ${{ env.IMAGE_NAME }} diff --git a/.github/workflows/issue-description-check.yml b/.github/workflows/issue-description-check.yml index 5dc3fdf82..3d0cf094e 100644 --- a/.github/workflows/issue-description-check.yml +++ b/.github/workflows/issue-description-check.yml @@ -14,10 +14,11 @@ jobs: # Skip bots (Dependabot, release-drafter, etc.) if: ${{ github.event.issue.user.type != 'Bot' }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 with: sparse-checkout: .github/scripts + persist-credentials: false - - uses: actions/github-script@v7 + - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 with: script: return require('./.github/scripts/check-issue-description.js')({github, context, core}) diff --git a/.github/workflows/pr-description-check.yml b/.github/workflows/pr-description-check.yml index 9ac05b373..c8fbe4b0f 100644 --- a/.github/workflows/pr-description-check.yml +++ b/.github/workflows/pr-description-check.yml @@ -1,28 +1,109 @@ -name: ci / PR description check +name: ci / PR checks on: - pull_request_target: - types: [opened, edited, synchronize, reopened] + # pull_request_target runs in the base-repo context (has secrets) so the check + # works on fork PRs. Safe here: the checkout pins to the base branch (no fork + # code runs) and the scripts only read context.payload and call the GitHub API. + pull_request_target: # zizmor: ignore[dangerous-triggers] + types: [opened, edited, synchronize, reopened, ready_for_review] -# pull_request_target runs in the base-repo context (has secrets). -# The checkout below pins to the base branch so no fork code is executed. -# The script only reads context.payload and calls the GitHub API. -permissions: - issues: write - pull-requests: write +# Default-deny at the workflow level; each job opts into only the scopes it needs. +# Note: modifying a PR's labels/comments needs pull-requests:write even though the +# REST path is under /issues/{n}/...; issues:write alone returns 403 on PRs. +permissions: {} jobs: check-description: name: Check PR description runs-on: ubuntu-latest - # Skip bots — they open PRs programmatically and have their own process. + permissions: + contents: read + pull-requests: write + issues: write + # Skip bots: they open PRs programmatically and have their own process. if: github.event.pull_request.user.type != 'Bot' steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 with: ref: ${{ github.base_ref }} sparse-checkout: .github/scripts + persist-credentials: false - - uses: actions/github-script@v7 + - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 with: script: return require('./.github/scripts/check-pr-description.js')({github, context, core}) + + check-title: + name: Check PR title (Conventional Commits) + runs-on: ubuntu-latest + permissions: {} + # Skip bots: they open PRs programmatically and have their own process. + if: github.event.pull_request.user.type != 'Bot' + steps: + - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 + with: + script: | + const title = context.payload.pull_request.title || ""; + // Conventional Commits: type(optional-scope)(optional !): summary + const re = /^(feat|fix|docs|style|refactor|perf|test|build|ci|chore|revert)(\([\w .\/-]+\))?!?: .+/; + if (!re.test(title)) { + core.setFailed( + `PR title is not in Conventional Commits format:\n "${title}"\n\n` + + `Expected: type(scope): summary\n` + + `Example: fix(search): handle empty query\n` + + `Types: feat, fix, docs, style, refactor, perf, test, build, ci, chore, revert.` + ); + } else { + core.info(`PR title OK: ${title}`); + } + + check-mergeable: + name: Flag unmergeable PRs + runs-on: ubuntu-latest + permissions: + pull-requests: write + issues: write + # Skip bots: they open PRs programmatically and have their own process. + if: github.event.pull_request.user.type != 'Bot' + steps: + - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0 + with: + script: | + const repo = { owner: context.repo.owner, repo: context.repo.repo }; + const number = context.payload.pull_request.number; + const READY = "ready for review"; + const CONFLICT = "merge conflict"; + + // Ensure the conflict label exists (red). Ignore if already present. + try { + await github.rest.issues.getLabel({ ...repo, name: CONFLICT }); + } catch { + await github.rest.issues.createLabel({ + ...repo, name: CONFLICT, color: "B60205", + description: "Conflicts with the base branch; needs a rebase before review.", + }).catch(() => {}); + } + + // mergeable is computed asynchronously and is often null right after + // an event, so poll a few times until GitHub has resolved it. + let pr = null; + for (let i = 0; i < 5; i++) { + const { data } = await github.rest.pulls.get({ ...repo, pull_number: number }); + if (data.mergeable !== null) { pr = data; break; } + await new Promise(r => setTimeout(r, 3000)); + } + if (!pr || pr.draft) return; + const labels = pr.labels.map(l => l.name); + + if (pr.mergeable === false) { + if (labels.includes(READY)) { + await github.rest.issues.removeLabel({ ...repo, issue_number: number, name: READY }).catch(() => {}); + } + if (!labels.includes(CONFLICT)) { + await github.rest.issues.addLabels({ ...repo, issue_number: number, labels: [CONFLICT] }); + } + } else if (pr.mergeable === true) { + if (labels.includes(CONFLICT)) { + await github.rest.issues.removeLabel({ ...repo, issue_number: number, name: CONFLICT }).catch(() => {}); + } + } diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2302c4198..174a4f2f6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -94,6 +94,18 @@ Before submitting any change that affects what the app looks like — buttons, i If you are unsure whether a change is "visual," it is. Default to attaching a screenshot. +## Code conventions + +Don't hardcode values that the project already exposes through a constant or a helper. Hardcoded literals drift out of sync, break on non-default deployments, and reintroduce bugs we've already fixed. + +- **Filesystem paths:** never build writable paths from `Path(__file__)...` into the source tree, hardcode `/app/...`, or use a relative `"data/..."` string. Every persisted file and directory has a named constant in `src/constants.py` (for example `AUTH_FILE`, `USER_PREFS_FILE`, `SETTINGS_FILE`, `TTS_CACHE_DIR`, `CHROMA_DIR`). Import and use that named constant; do not re-derive the path locally with `os.path.join(DATA_DIR, "x.json")` or `DATA_DIR / "x.json"`. `DATA_DIR` is the single place that reads `ODYSSEUS_DATA_DIR`, so use it directly only for dynamic paths that have no fixed name (for example per-owner files). If a data file or directory has no constant yet, add one to `src/constants.py`. The source tree is read-only in Docker and `/app/...` does not exist on native runs; guard directory creation so an unwritable path degrades gracefully instead of crashing at import. +- **Internal API / loopback URLs:** don't hardcode `http://localhost:7000`. Use `internal_api_base()` from `src.constants` (it honors `ODYSSEUS_INTERNAL_BASE` / `APP_PORT`). +- **Ports, limits, model lists, and similar:** reuse the existing constant if one exists; if it doesn't and the value is used in more than one place, add a constant rather than copying the literal. + +If you need a value that has no constant or helper yet, add it to `src/constants.py` (the single source of truth for paths and config; `core/constants.py` only re-exports it for backward compatibility) and import it, rather than repeating a literal across files. + +**Commits:** use [Conventional Commits](https://www.conventionalcommits.org), `type(scope): summary` (e.g. `fix(search): ...`, `feat(notes): ...`, `docs(contributing): ...`). Common types: `fix`, `feat`, `refactor`, `docs`, `test`, `chore`, `ci`. Keep the subject short and imperative; put the "why" in the body when it isn't obvious. + ## Issue Reports For bugs, include: diff --git a/README.md b/README.md index 638089fd7..4fae1d76b 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # Odysseus +> **Branch note:** `dev` is the default branch and contains the latest development changes, but it may be unstable. For the more stable curated branch, use [`main`](https://github.com/pewdiepie-archdaemon/odysseus/tree/main). + ``` ─────────────────────────────────────────────── ⊹ ࣪ ˖ ૮( ˶ᵔ ᵕ ᵔ˶ )っ Odysseus vers. 1.0 @@ -331,6 +333,12 @@ To expose Odysseus on a local network or Tailscale with HTTPS: | `PyMuPDF` | PDF page rendering in the side viewer panel and form-filling. (Note: AGPL-3.0) | | `markitdown` | Office/EPUB document text extraction (converts .docx/.xlsx/.pptx/.xls/.epub to Markdown). | +### Outlook / Office 365 email +Odysseus email accounts currently use IMAP/SMTP username-password auth. Outlook +and Microsoft 365 generally require OAuth instead, so normal Microsoft mailbox +passwords will fail. See [docs/email-outlook.md](docs/email-outlook.md) for the +current limitation and the planned integration direction. + ## Security Notes Odysseus is a self-hosted workspace with powerful local tools: shell access, file uploads, model downloads, web research, email/calendar integrations, and API tokens. Treat it like an admin console. @@ -394,6 +402,16 @@ Key settings: | `CHROMADB_HOST` | `localhost` | ChromaDB host for vector memory. Docker overrides this to `chromadb`. | | `CHROMADB_PORT` | `8100` | ChromaDB port for manual host runs. Docker overrides this to `8000`. | | `EMBEDDING_URL` | -- | OpenAI-compatible embeddings endpoint | +| `ODYSSEUS_CHAT_UPLOAD_MAX_BYTES` | `10485760` | Chat/agent attachment cap in bytes. Raise for larger local PDFs or text documents. | +| `ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES` | `104857600` | Gallery image upload cap in bytes (100 MB). | +| `ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES` | `26214400` | Gallery transform input cap in bytes (25 MB). | +| `ODYSSEUS_MEMORY_IMPORT_MAX_BYTES` | `10485760` | Memory import file cap in bytes (10 MB). | +| `ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES` | `26214400` | Personal document upload cap in bytes (25 MB). | +| `ODYSSEUS_EMAIL_COMPOSE_UPLOAD_MAX_BYTES` | `26214400` | Email compose attachment cap in bytes (25 MB). | +| `ODYSSEUS_STT_MAX_AUDIO_BYTES` | `26214400` | Speech-to-text audio cap in bytes (25 MB). | +| `ODYSSEUS_ICS_MAX_BYTES` | `10485760` | Calendar `.ics` import cap in bytes (10 MB). | + +All upload-limit vars are validated (must be a positive integer) and optional; an invalid value fails fast at startup. ### Built-in MCP servers (optional setup) diff --git a/app.py b/app.py index f02fb6c4a..97906bd46 100644 --- a/app.py +++ b/app.py @@ -51,10 +51,10 @@ from starlette.middleware.base import BaseHTTPMiddleware # Core imports from core.constants import ( BASE_DIR, STATIC_DIR, SESSIONS_FILE, - REQUEST_TIMEOUT, OPENAI_API_KEY, + REQUEST_TIMEOUT, OPENAI_API_KEY, AUTH_FILE, ) from core.database import SessionLocal, ApiToken -from core.middleware import SecurityHeadersMiddleware +from core.middleware import SecurityHeadersMiddleware, is_cors_preflight from core.auth import AuthManager from core.exceptions import ( SessionNotFoundError, InvalidFileUploadError, @@ -64,6 +64,7 @@ from core.exceptions import ( import bcrypt as _bcrypt from src.app_helpers import abs_join +from src.generated_images import GENERATED_IMAGE_HEADERS, resolve_generated_image_path from starlette.responses import RedirectResponse # ========= LOGGING ========= @@ -252,6 +253,15 @@ if AUTH_ENABLED: class AuthMiddleware(BaseHTTPMiddleware): async def dispatch(self, request: Request, call_next): path = request.url.path + # A genuine CORS preflight (OPTIONS + Access-Control-Request-Method) + # carries no credentials by design and must reach CORSMiddleware to be + # answered. AuthMiddleware is the outermost middleware, so gating the + # preflight on auth 401s it before CORS can respond -- which blocks + # every cross-origin browser/WebView client before the real request + # is sent. Let real preflights through (only OPTIONS w/ the ACRM + # header; never a credentialed request). + if is_cors_preflight(request.method, request.headers): + return await call_next(request) if _is_auth_exempt(path): return await call_next(request) # In-process internal-tool token bypass. Used by the agent @@ -387,13 +397,7 @@ app.mount("/static", _RevalidatingStatic(directory="static"), name="static") @app.get("/api/generated-image/{filename}") async def serve_generated_image(filename: str, request: Request): """Serve generated images from the data directory.""" - from pathlib import Path - import re - if not re.match(r'^[a-f0-9]{8,64}\.(png|jpg|jpeg|webp|gif|mp4|mov|webm|mkv|m4v)$', filename): - raise HTTPException(status_code=400, detail="Invalid filename") - img_path = Path("data/generated_images") / filename - if not img_path.exists(): - raise HTTPException(status_code=404, detail="Image not found") + img_path = resolve_generated_image_path(filename) # SECURITY: filename is the only key, so anyone who knows / guesses a # 12-hex content hash could pull another user's image bytes. Require # auth and verify ownership via the gallery row (when one exists). @@ -429,7 +433,7 @@ async def serve_generated_image(filename: str, request: Request): return FileResponse( str(img_path), media_type=mime, - headers={"Cache-Control": "public, max-age=31536000, immutable"}, + headers=GENERATED_IMAGE_HEADERS, ) # ========= YOUTUBE INIT ========= @@ -594,6 +598,10 @@ app.include_router(setup_model_routes(model_discovery)) from routes.copilot_routes import setup_copilot_routes app.include_router(setup_copilot_routes()) +# ChatGPT Subscription device-flow login +from routes.chatgpt_subscription_routes import setup_chatgpt_subscription_routes +app.include_router(setup_chatgpt_subscription_routes()) + # TTS from routes.tts_routes import setup_tts_routes app.include_router(setup_tts_routes(tts_service)) @@ -789,6 +797,8 @@ async def serve_backgrounds(request: Request): @app.get("/login") async def serve_login(request: Request): + if not AUTH_ENABLED: + return RedirectResponse(url="/", status_code=302) return _serve_html_with_nonce(request, abs_join(BASE_DIR, "static/login.html")) @app.get("/api/version") @@ -948,7 +958,7 @@ async def _startup_event(): owners = set() try: import json as _json - auth_path = "data/auth.json" + auth_path = AUTH_FILE with open(auth_path, encoding="utf-8") as f: users = _json.load(f).get("users", {}) owners.update(users.keys()) @@ -995,7 +1005,7 @@ async def _startup_event(): # does not make an existing library look empty after auth/account changes. try: import json as _json - auth_path = "data/auth.json" + auth_path = AUTH_FILE with open(auth_path, encoding="utf-8") as f: users = _json.load(f).get("users", {}) primary_owner = None diff --git a/companion/pairing.py b/companion/pairing.py index 48197302b..c4ea62345 100644 --- a/companion/pairing.py +++ b/companion/pairing.py @@ -14,6 +14,8 @@ import uuid import bcrypt +from src.constants import AUTH_FILE + PAIRING_VERSION = 1 COMPANION_SCOPE = "chat" @@ -61,7 +63,7 @@ def lan_ip_candidates() -> list[str]: def find_admin_user() -> str | None: """Resolve an admin username from data/auth.json (schema uses is_admin), falling back to the first user.""" - auth_path = os.path.join("data", "auth.json") + auth_path = AUTH_FILE try: with open(auth_path, "r", encoding="utf-8") as f: data = json.load(f) diff --git a/core/auth.py b/core/auth.py index d4f5d36f3..5db2fed4c 100644 --- a/core/auth.py +++ b/core/auth.py @@ -30,14 +30,24 @@ DEFAULT_PRIVILEGES = { "can_manage_memory": True, "max_messages_per_day": 0, "allowed_models": [], + "allowed_models_restricted": False, + # Explicit "block every model" sentinel. An empty `allowed_models` list is + # ambiguous — it's also what gets sent when the admin clicks "[All]" — so + # we need a dedicated flag to express "this user may use no models at all" + # distinctly from "this user has no restriction". + "block_all_models": False, } # Admins get everything ADMIN_PRIVILEGES = {k: (True if isinstance(v, bool) else (0 if isinstance(v, int) else [])) for k, v in DEFAULT_PRIVILEGES.items()} +ADMIN_PRIVILEGES["allowed_models_restricted"] = False +# Admins must never be blocked from using models — the generic dict +# comprehension above flips every boolean default to True, which would be +# backwards for this sentinel. +ADMIN_PRIVILEGES["block_all_models"] = False -DEFAULT_AUTH_PATH = os.path.join( - Path(__file__).parent.parent, "data", "auth.json" -) +from src.constants import AUTH_FILE +DEFAULT_AUTH_PATH = AUTH_FILE TOKEN_TTL = 60 * 60 * 24 * 7 # 7 days # Usernames the auth + middleware layer reserve as internal "synthetic owner" @@ -76,6 +86,10 @@ class AuthManager: # Guards mutations of self._sessions and the on-disk sessions.json. # Validate/create/revoke run concurrently from the FastAPI threadpool. self._sessions_lock = threading.RLock() + # Guards all mutations of self._config and the on-disk auth.json so + # concurrent create/delete/rename/privilege operations don't interleave + # and corrupt the user database. + self._config_lock = threading.Lock() # Guards the first-run setup check-and-write so concurrent requests # cannot both observe is_configured==False and both create admin accounts. self._setup_lock = threading.Lock() @@ -172,8 +186,9 @@ class AuthManager: @signup_enabled.setter def signup_enabled(self, value: bool): - self._config["signup_enabled"] = value - self._save() + with self._config_lock: + self._config["signup_enabled"] = value + self._save() @property def is_configured(self) -> bool: @@ -198,17 +213,18 @@ class AuthManager: if username in RESERVED_USERNAMES: logger.warning("Refused to create reserved username '%s'", username) return False - if username in self.users: - return False - if "users" not in self._config: - self._config["users"] = {} - self._config["users"][username] = { - "password_hash": _hash_password(password), - "created": time.time(), - "is_admin": is_admin, - "privileges": dict(ADMIN_PRIVILEGES if is_admin else DEFAULT_PRIVILEGES), - } - self._save() + with self._config_lock: + if username in self.users: + return False + if "users" not in self._config: + self._config["users"] = {} + self._config["users"][username] = { + "password_hash": _hash_password(password), + "created": time.time(), + "is_admin": is_admin, + "privileges": dict(ADMIN_PRIVILEGES if is_admin else DEFAULT_PRIVILEGES), + } + self._save() logger.info(f"Created user '{username}' (admin={is_admin})") return True @@ -221,14 +237,15 @@ class AuthManager: their cookie expired naturally (default ~30 days). """ username = username.strip().lower() - if username not in self.users: - return False - if username == requesting_user: - return False - if not self.users.get(requesting_user, {}).get("is_admin"): - return False - del self._config["users"][username] - self._save() + with self._config_lock: + if username not in self.users: + return False + if username == requesting_user: + return False + if not self.users.get(requesting_user, {}).get("is_admin"): + return False + del self._config["users"][username] + self._save() # Purge all sessions belonging to this user. validate_token doesn't # cross-check `self.users`, so without this step a deleted user's # cookie keeps authenticating. @@ -266,14 +283,15 @@ class AuthManager: if new_username in RESERVED_USERNAMES: logger.warning("Refused to rename '%s' into reserved username '%s'", old_username, new_username) return False - if old_username not in self.users: - return False - if new_username in self.users: - return False - if not self.users.get(requesting_user, {}).get("is_admin"): - return False - self._config.setdefault("users", {})[new_username] = self._config["users"].pop(old_username) - self._save() + with self._config_lock: + if old_username not in self.users: + return False + if new_username in self.users: + return False + if not self.users.get(requesting_user, {}).get("is_admin"): + return False + self._config.setdefault("users", {})[new_username] = self._config["users"].pop(old_username) + self._save() renamed_sessions = 0 with self._sessions_lock: @@ -311,17 +329,18 @@ class AuthManager: def set_privileges(self, username: str, privileges: Dict[str, Any]) -> bool: """Update privileges for a user. Can't modify admin privileges.""" username = username.strip().lower() - if username not in self.users: - return False - if self.users[username].get("is_admin"): - return False # admins always have full access - # Only allow known privilege keys - current = self.get_privileges(username) - for k, v in privileges.items(): - if k in DEFAULT_PRIVILEGES: - current[k] = v - self._config["users"][username]["privileges"] = current - self._save() + with self._config_lock: + if username not in self.users: + return False + if self.users[username].get("is_admin"): + return False # admins always have full access + # Only allow known privilege keys + current = self.get_privileges(username) + for k, v in privileges.items(): + if k in DEFAULT_PRIVILEGES: + current[k] = v + self._config["users"][username]["privileges"] = current + self._save() logger.info(f"Updated privileges for '{username}': {current}") return True @@ -331,8 +350,9 @@ class AuthManager: return False if not _verify_password(current_password, self.users[username]["password_hash"]): return False - self._config["users"][username]["password_hash"] = _hash_password(new_password) - self._save() + with self._config_lock: + self._config["users"][username]["password_hash"] = _hash_password(new_password) + self._save() return True # ------------------------------------------------------------------ @@ -350,8 +370,9 @@ class AuthManager: if username not in self.users: return None secret = pyotp.random_base32() - self._config["users"][username]["totp_secret_pending"] = secret - self._save() + with self._config_lock: + self._config["users"][username]["totp_secret_pending"] = secret + self._save() return secret def totp_get_provisioning_uri(self, username: str, secret: str) -> str: @@ -370,13 +391,14 @@ class AuthManager: if not totp.verify(code, valid_window=1): return False # Enable 2FA - self._config["users"][username]["totp_secret"] = secret - self._config["users"][username]["totp_enabled"] = True - self._config["users"][username].pop("totp_secret_pending", None) - # Generate backup codes - backup = [secrets.token_hex(4) for _ in range(8)] - self._config["users"][username]["totp_backup_codes"] = backup - self._save() + with self._config_lock: + self._config["users"][username]["totp_secret"] = secret + self._config["users"][username]["totp_enabled"] = True + self._config["users"][username].pop("totp_secret_pending", None) + # Generate backup codes + backup = [secrets.token_hex(4) for _ in range(8)] + self._config["users"][username]["totp_backup_codes"] = backup + self._save() logger.info(f"2FA enabled for '{username}'") return True @@ -395,9 +417,10 @@ class AuthManager: # Check backup codes first backup = user.get("totp_backup_codes", []) if code in backup: - backup.remove(code) - self._config["users"][username]["totp_backup_codes"] = backup - self._save() + with self._config_lock: + backup.remove(code) + self._config["users"][username]["totp_backup_codes"] = backup + self._save() logger.info(f"Backup code used for '{username}' ({len(backup)} remaining)") return True totp = pyotp.TOTP(secret) @@ -408,11 +431,12 @@ class AuthManager: username = username.strip().lower() if not self.verify_password(username, password): return False - self._config["users"][username].pop("totp_secret", None) - self._config["users"][username].pop("totp_secret_pending", None) - self._config["users"][username].pop("totp_backup_codes", None) - self._config["users"][username]["totp_enabled"] = False - self._save() + with self._config_lock: + self._config["users"][username].pop("totp_secret", None) + self._config["users"][username].pop("totp_secret_pending", None) + self._config["users"][username].pop("totp_backup_codes", None) + self._config["users"][username]["totp_enabled"] = False + self._save() logger.info(f"2FA disabled for '{username}'") return True @@ -431,6 +455,12 @@ class AuthManager: username = username.strip().lower() if not self.verify_password(username, password): return None + return self.create_session_trusted(username) + + def create_session_trusted(self, username: str) -> str: + """Issue a session token for an already-verified user. + Call only after verify_password (and TOTP if enabled) have passed.""" + username = username.strip().lower() token = secrets.token_hex(32) with self._sessions_lock: self._sessions[token] = { diff --git a/core/constants.py b/core/constants.py index 5dcf9e91e..d71bb0aed 100644 --- a/core/constants.py +++ b/core/constants.py @@ -1,40 +1,12 @@ -# src/constants.py -"""Application-wide constants and configuration values.""" -import os +# core/constants.py +"""Backward-compatible shim — the single source of truth is src/constants.py. -APP_VERSION = "0.9.1" - -# Base paths -BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + "/" -STATIC_DIR = os.path.join(BASE_DIR, "static") -DATA_DIR = os.path.join(BASE_DIR, "data") - -# Data file paths -SESSIONS_FILE = os.path.join(DATA_DIR, "sessions.json") -MEMORY_FILE = os.path.join(DATA_DIR, "memory.json") -MEMORY_DOC = os.path.join(DATA_DIR, "memory_doc.md") -PERSONAL_DIR = os.path.join(DATA_DIR, "personal_docs") -RUNBOOK_DIR = os.path.join(PERSONAL_DIR, "runbook") -UPLOAD_DIR = os.path.join(DATA_DIR, "uploads") -FEATURES_FILE = os.path.join(DATA_DIR, "features.json") -SETTINGS_FILE = os.path.join(DATA_DIR, "settings.json") - -# API Configuration -MAX_CONTEXT_MESSAGES = 90 -REQUEST_TIMEOUT = 20 -OPENAI_COMPAT_PATH = "/v1/chat/completions" - -# Environment variables with defaults -DEFAULT_HOST = os.getenv("LLM_HOST", "localhost") -LLM_HOSTS = [h.strip() for h in os.getenv("LLM_HOSTS", "").split(",") if h.strip()] -OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") -SEARXNG_INSTANCE = os.getenv('SEARXNG_INSTANCE', 'http://localhost:8080') - - -# Cleanup configuration -CLEANUP_ENABLED = os.getenv("CLEANUP_ENABLED", "True").lower() == "true" -CLEANUP_INTERVAL_HOURS = int(os.getenv("CLEANUP_INTERVAL_HOURS", "24")) - -# Default parameters -DEFAULT_TEMPERATURE = 1.0 -DEFAULT_MAX_TOKENS = 0 +Historically there were two copies of this module (this one lagged behind at +APP_VERSION 0.9.1 and was missing the consolidated tool-output constants). To +kill the drift, this now simply re-exports everything from src.constants so +there is exactly one place that defines paths and reads ODYSSEUS_DATA_DIR. +internal_api_base() also lives in src.constants now and is re-exported here so +existing `from core.constants import internal_api_base` callers keep working. +""" +from src.constants import * # noqa: F401,F403 +from src.constants import internal_api_base # noqa: F401 (explicit: functions aren't covered by some linters' * checks) diff --git a/core/database.py b/core/database.py index 8a88b2854..ee365c30c 100644 --- a/core/database.py +++ b/core/database.py @@ -29,8 +29,9 @@ class TimestampMixin: def updated_at(cls): return Column(DateTime, default=utcnow_naive, onupdate=utcnow_naive, nullable=False) -# Get database URL from environment, default to SQLite -DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///./data/app.db") +# Get database URL from environment, default to SQLite in DATA_DIR +from src.constants import DATA_DIR, AUTH_FILE, MEMORY_FILE, USER_PREFS_FILE, SETTINGS_FILE +DATABASE_URL = os.getenv("DATABASE_URL", f"sqlite:///{DATA_DIR}/app.db") # Create engine engine = create_engine( @@ -360,6 +361,24 @@ class ModelEndpoint(TimestampMixin, Base): # is the historical default. When non-null, the model picker only shows # the endpoint to that user (admins always see everything). owner = Column(String, nullable=True, index=True) + # Optional OAuth/session-backed credential row. Used by subscription-backed + # providers that need refresh tokens instead of a static API key. + provider_auth_id = Column(String, nullable=True, index=True) + + +class ProviderAuthSession(TimestampMixin, Base): + """Encrypted OAuth/session credentials for refresh-aware model providers.""" + __tablename__ = "provider_auth_sessions" + + id = Column(String, primary_key=True, index=True) + provider = Column(String, nullable=False, index=True) + owner = Column(String, nullable=True, index=True) + label = Column(String, nullable=True) + base_url = Column(String, nullable=False) + access_token = Column(EncryptedText, nullable=True) + refresh_token = Column(EncryptedText, nullable=True) + last_refresh = Column(DateTime, nullable=True) + auth_mode = Column(String, nullable=True) class McpServer(TimestampMixin, Base): """Admin-configured MCP (Model Context Protocol) tool servers.""" @@ -800,6 +819,26 @@ def _migrate_add_model_endpoint_owner_column(): logging.getLogger(__name__).warning(f"model_endpoints.owner migration failed: {e}") +def _migrate_add_provider_auth_id_column(): + """Add provider_auth_id column to model_endpoints if it doesn't exist.""" + import sqlite3 + db_path = DATABASE_URL.replace("sqlite:///", "") + if not os.path.exists(db_path): + return + try: + conn = sqlite3.connect(db_path) + cursor = conn.execute("PRAGMA table_info(model_endpoints)") + columns = [row[1] for row in cursor.fetchall()] + if columns and "provider_auth_id" not in columns: + conn.execute("ALTER TABLE model_endpoints ADD COLUMN provider_auth_id VARCHAR") + conn.execute("CREATE INDEX IF NOT EXISTS ix_model_endpoints_provider_auth_id ON model_endpoints(provider_auth_id)") + conn.commit() + logging.getLogger(__name__).info("Migrated: added 'provider_auth_id' column + index to model_endpoints") + conn.close() + except Exception as e: + logging.getLogger(__name__).warning(f"model_endpoints.provider_auth_id migration failed: {e}") + + def _migrate_add_model_type_column(): """Add model_type column to model_endpoints if it doesn't exist.""" import sqlite3 @@ -1065,7 +1104,7 @@ def _migrate_assign_legacy_owner(): # fell through to "first user" every time. auth_path = os.path.join(os.path.dirname(DATABASE_URL.replace("sqlite:///", "")), "auth.json") if not os.path.isabs(auth_path): - auth_path = os.path.join("data", "auth.json") + auth_path = AUTH_FILE admin_user = None try: with open(auth_path, "r", encoding="utf-8") as f: @@ -1118,7 +1157,7 @@ def _migrate_assign_legacy_owner(): logger.warning(f"Legacy owner migration failed: {e}") # Also migrate memory.json - mem_path = os.path.join("data", "memory.json") + mem_path = MEMORY_FILE try: if os.path.exists(mem_path): with open(mem_path, "r", encoding="utf-8") as f: @@ -1136,7 +1175,7 @@ def _migrate_assign_legacy_owner(): logger.warning(f"memory.json legacy migration failed: {e}") # Also migrate user_prefs.json to per-user format - prefs_path = os.path.join("data", "user_prefs.json") + prefs_path = USER_PREFS_FILE try: if os.path.exists(prefs_path): with open(prefs_path, "r", encoding="utf-8") as f: @@ -1458,7 +1497,11 @@ class CalendarCal(TimestampMixin, Base): owner = Column(String, nullable=True, index=True) name = Column(String, nullable=False) color = Column(String, default="#5b8abf") - source = Column(String, default="local") # "local" or "timetree" + source = Column(String, default="local") # "local" or "caldav" + # UUID of the CalDAV account in user prefs that owns this calendar. + # NULL for local calendars and for CalDAV calendars created before + # multi-account support was added (treated as "use any configured account"). + account_id = Column(String, nullable=True, index=True) events = relationship("CalendarEvent", back_populates="calendar", cascade="all, delete-orphan") @@ -1526,7 +1569,7 @@ def _migrate_seed_email_account(): import json as _json import uuid as _uuid from pathlib import Path - settings_file = Path("data/settings.json") + settings_file = Path(SETTINGS_FILE) if not settings_file.exists(): return try: @@ -1594,6 +1637,7 @@ def init_db(): _migrate_add_model_type_column() _migrate_add_model_endpoint_refresh_columns() _migrate_add_model_endpoint_owner_column() + _migrate_add_provider_auth_id_column() _migrate_add_supports_tools_column() _migrate_add_task_run_model_column() _migrate_add_owner_column() @@ -1622,9 +1666,105 @@ def init_db(): _migrate_add_calendar_metadata() _migrate_add_calendar_is_utc() _migrate_add_calendar_origin() + _migrate_add_calendar_account_id() + _migrate_chat_messages_fts() _migrate_encrypt_email_passwords() _migrate_encrypt_signatures() _migrate_encrypt_endpoint_keys() + _migrate_backfill_task_folders() + + +def _migrate_backfill_task_folders(): + """Backfill folder='Tasks' on pre-existing task/research sessions. + + Sessions created by the task scheduler (LLM tasks, action tasks, research + runs) now set folder='Tasks' at creation time. This migration tags any + older sessions that predate that assignment. Idempotent — only touches + rows where folder is NULL or empty and the title matches known prefixes. + """ + try: + with engine.connect() as conn: + cols = [r[1] for r in conn.execute(text("PRAGMA table_info(sessions)"))] + if "folder" not in cols: + return + res = conn.execute(text( + "UPDATE sessions SET folder = 'Tasks' " + "WHERE (folder IS NULL OR folder = '') " + "AND (name LIKE '[Task] %' OR name LIKE '[Research] %')" + )) + conn.commit() + if res.rowcount: + logging.getLogger(__name__).info( + f"Backfilled folder='Tasks' on {res.rowcount} task/research sessions") + except Exception as e: + logging.getLogger(__name__).warning(f"task folder backfill: {e}") + + +def _migrate_chat_messages_fts(): + """Create and backfill the session transcript FTS index for SQLite.""" + if not DATABASE_URL.startswith("sqlite"): + return + + db_path = DATABASE_URL.replace("sqlite:///", "") + if db_path == ":memory:": + return + conn = None + try: + conn = sqlite3.connect(db_path) + try: + conn.execute("CREATE VIRTUAL TABLE IF NOT EXISTS temp._odysseus_fts5_probe USING fts5(content)") + conn.execute("DROP TABLE IF EXISTS temp._odysseus_fts5_probe") + except Exception as e: + logging.getLogger(__name__).warning(f"chat_messages FTS migration skipped; FTS5 unavailable: {e}") + return + + conn.executescript( + """ + CREATE VIRTUAL TABLE IF NOT EXISTS chat_messages_fts USING fts5( + content, + message_id UNINDEXED, + session_id UNINDEXED, + role UNINDEXED + ); + + CREATE TRIGGER IF NOT EXISTS chat_messages_fts_ai + AFTER INSERT ON chat_messages BEGIN + INSERT INTO chat_messages_fts(content, message_id, session_id, role) + VALUES (COALESCE(new.content, ''), new.id, new.session_id, new.role); + END; + + CREATE TRIGGER IF NOT EXISTS chat_messages_fts_ad + AFTER DELETE ON chat_messages BEGIN + DELETE FROM chat_messages_fts WHERE message_id = old.id; + END; + + CREATE TRIGGER IF NOT EXISTS chat_messages_fts_au + AFTER UPDATE ON chat_messages BEGIN + DELETE FROM chat_messages_fts WHERE message_id = old.id; + INSERT INTO chat_messages_fts(content, message_id, session_id, role) + VALUES (COALESCE(new.content, ''), new.id, new.session_id, new.role); + END; + """ + ) + conn.execute( + """ + INSERT INTO chat_messages_fts(content, message_id, session_id, role) + SELECT COALESCE(cm.content, ''), cm.id, cm.session_id, cm.role + FROM chat_messages cm + WHERE NOT EXISTS ( + SELECT 1 FROM chat_messages_fts fts + WHERE fts.message_id = cm.id + ) + """ + ) + conn.commit() + except Exception as e: + logging.getLogger(__name__).warning(f"chat_messages FTS migration failed: {e}") + finally: + try: + conn.close() + except Exception: + pass def _migrate_add_email_smtp_security(): @@ -1786,6 +1926,27 @@ def _migrate_add_calendar_origin(): logging.getLogger(__name__).warning(f"calendar_events.origin migration failed: {e}") +def _migrate_add_calendar_account_id(): + """Add `account_id` to calendars so each CalDAV-backed calendar knows which + credential set (from caldav_accounts in user prefs) owns it. Idempotent.""" + import sqlite3 + db_path = DATABASE_URL.replace("sqlite:///", "") + if not os.path.exists(db_path): + return + try: + conn = sqlite3.connect(db_path) + cursor = conn.execute("PRAGMA table_info(calendars)") + columns = [row[1] for row in cursor.fetchall()] + if columns and "account_id" not in columns: + conn.execute("ALTER TABLE calendars ADD COLUMN account_id TEXT") + conn.execute("CREATE INDEX IF NOT EXISTS ix_calendars_account_id ON calendars(account_id)") + conn.commit() + logging.getLogger(__name__).info("Migrated: added 'account_id' column to calendars") + conn.close() + except Exception as e: + logging.getLogger(__name__).warning(f"calendars.account_id migration failed: {e}") + + def _migrate_add_calendar_metadata(): """Add importance/event_type/last_pinged columns to calendar_events table.""" import sqlite3 diff --git a/core/middleware.py b/core/middleware.py index 82d1d0324..550ee3bd7 100644 --- a/core/middleware.py +++ b/core/middleware.py @@ -17,6 +17,15 @@ INTERNAL_TOOL_TOKEN = os.environ.get("ODYSSEUS_INTERNAL_TOKEN") or secrets.token INTERNAL_TOOL_HEADER = "X-Odysseus-Internal-Token" +def is_cors_preflight(method: str, headers) -> bool: + """True for a genuine CORS preflight: an OPTIONS request carrying the + Access-Control-Request-Method header. Such requests are credential-less by + design and must reach CORSMiddleware to be answered -- gating them on auth + 401s the preflight and breaks every cross-origin browser/WebView client. + Pure so it can be unit-tested without standing up the app.""" + return method == "OPTIONS" and "access-control-request-method" in headers + + def require_admin(request: Request): """Raise 403 if the current user isn't an admin. Allows access when auth is explicitly disabled, or when the request carries @@ -58,11 +67,22 @@ class SecurityHeadersMiddleware(BaseHTTPMiddleware): # Tool render endpoints are served inside iframes — allow framing by self is_tool_render = path.startswith("/api/tools/") and path.endswith("/render") + # PDF previews are embedded by the in-app document library. Keep the + # exception route-scoped so normal app pages remain unframeable. + is_document_pdf_preview = path.startswith("/api/document/") and path.endswith("/render-pdf") # Visual report pages are self-contained HTML — need inline scripts + external images is_report = path.startswith("/api/research/report/") response.headers["X-Content-Type-Options"] = "nosniff" response.headers["Referrer-Policy"] = "no-referrer" + response.headers["Permissions-Policy"] = "camera=(), microphone=(self), geolocation=()" + + is_https = ( + request.url.scheme == "https" + or request.headers.get("X-Forwarded-Proto") == "https" + ) + if is_https: + response.headers["Strict-Transport-Security"] = "max-age=31536000; includeSubDomains" if is_report: response.headers["Content-Security-Policy"] = ( @@ -79,6 +99,12 @@ class SecurityHeadersMiddleware(BaseHTTPMiddleware): # sandbox="allow-scripts" attribute provides isolation. # Don't overwrite the route's own restrictive CSP either. pass + elif is_document_pdf_preview: + response.headers["X-Frame-Options"] = "SAMEORIGIN" + response.headers["Content-Security-Policy"] = ( + "default-src 'none'; " + "frame-ancestors 'self'" + ) else: response.headers["X-Frame-Options"] = "DENY" # NOTE: `style-src 'unsafe-inline'` is intentionally retained. diff --git a/core/platform_compat.py b/core/platform_compat.py index e2339ad33..3eda4a107 100644 --- a/core/platform_compat.py +++ b/core/platform_compat.py @@ -18,10 +18,22 @@ import ntpath import shutil import subprocess from pathlib import Path +import sys from typing import List, Optional +import platform IS_WINDOWS = os.name == "nt" IS_POSIX = not IS_WINDOWS +# Allows APFEL support and ARM-native binary recommendations on Apple Silicon Macs. +IS_APPLE_SILICON = ( + IS_POSIX + and platform.system() == "Darwin" + and platform.machine().lower() + in { + "arm64", + "aarch64", + } +) # ── File permissions ──────────────────────────────────────────────────────── @@ -53,9 +65,8 @@ def detached_popen_kwargs() -> dict: and is detached from any console. """ if IS_WINDOWS: - flags = ( - getattr(subprocess, "CREATE_NEW_PROCESS_GROUP", 0x00000200) - | getattr(subprocess, "DETACHED_PROCESS", 0x00000008) + flags = getattr(subprocess, "CREATE_NEW_PROCESS_GROUP", 0x00000200) | getattr( + subprocess, "DETACHED_PROCESS", 0x00000008 ) return {"creationflags": flags} return {"start_new_session": True} @@ -150,6 +161,29 @@ _WINDOWS_BASH_RELATIVE_PATHS = ( ("usr", "bin", "bash.exe"), ) +# Paths to add to the remote SSH probe command to find tools like nvidia-smi that may not be on PATH. +_SSH_PATH_MEMBERS = ( + "/usr/bin", + "/usr/local/bin", + "/usr/local/cuda/bin", + "/usr/lib/wsl/lib" +) +# Fallback locations for nvidia-smi on WSL and other Linux distros where it may not be on PATH. +NVIDIA_PATH_CANDIDATES = ( + "/usr/bin/nvidia-smi", + "/usr/local/bin/nvidia-smi", + "/usr/local/cuda/bin/nvidia-smi", + "/usr/lib/wsl/lib/nvidia-smi", +) + + +def _ssh_path_override() -> str: + """Build the PATH export snippet used for remote SSH shell probes.""" + return f"export PATH=\"$PATH:{':'.join(_SSH_PATH_MEMBERS)}\"; " + + +SSH_PATH_OVERRIDE = _ssh_path_override() + def _windows_bash_fallbacks() -> List[str]: roots: List[str] = [] @@ -180,6 +214,21 @@ def _is_windows_bash_stub(path: str) -> bool: ) +def git_bash_path(path: str | Path) -> str: + """Convert a path to POSIX style suitable for Git Bash on Windows. + + Transforms drive letters (e.g., 'C:\\path') to POSIX '/c/path', + and uses forward slashes. + """ + p = Path(path) + p_str = p.as_posix() + if IS_WINDOWS and len(p_str) >= 2 and p_str[1] == ":": + drive = p_str[0].lower() + return f"/{drive}{p_str[2:]}" + return p_str + + + def find_bash() -> Optional[str]: """Locate a real ``bash`` interpreter, or None. @@ -242,3 +291,156 @@ def run_script_argv(script_path) -> List[str]: comspec = os.environ.get("ComSpec", "cmd.exe") return [comspec, "/c", str(script_path)] return ["sh", str(script_path)] + + +def is_wsl() -> bool: + """True if running inside Windows Subsystem for Linux (WSL).""" + import sys + if sys.platform.startswith("linux") or os.name == "posix": + try: + with open("/proc/version", "r") as f: + if "microsoft" in f.read().lower(): + return True + except Exception: + pass + return False + + +def translate_path(path_str: str) -> str: + """Translate a path (possibly a Windows path) to the current OS format. + + Particularly handles Windows paths (e.g. C:\\foo or C:/foo) when running + under WSL, translating them to /mnt/c/foo. + Also handles standard path normalization to avoid string breakages. + """ + if not path_str: + return path_str + + if is_wsl(): + path_str = path_str.replace("\\", "/") + import re + m = re.match(r"^([a-zA-Z]):(.*)", path_str) + if m: + drive = m.group(1).lower() + rest = m.group(2) + if not rest.startswith("/"): + rest = "/" + rest + return f"/mnt/{drive}{rest}" + + try: + return str(Path(path_str).resolve()) + except Exception: + return path_str + + +def get_wsl_windows_user_profile() -> Optional[str]: + """Retrieve the Windows host User Profile path from inside WSL.""" + if not is_wsl(): + return None + try: + r = run_wsl_windows_powershell("Write-Output $env:USERPROFILE", timeout=5) + if r.returncode == 0 and r.stdout.strip(): + return translate_path(r.stdout.strip()) + except Exception: + pass + + try: + users_dir = "/mnt/c/Users" + if os.path.isdir(users_dir): + for entry in os.listdir(users_dir): + if entry not in ("All Users", "Default", "Default User", "desktop.ini", "Public"): + path = os.path.join(users_dir, entry) + if os.path.isdir(path): + return path + except Exception: + pass + return None + + +def _ssh_exec_argv( + remote: str, + ssh_port: str | None, + *, + remote_cmd: str | None = None, + connect_timeout: int | None = None, + strict_host_key_checking: bool | None = None, +) -> list[str]: + """Build a consistent ssh argv for remote command execution.""" + argv = ["ssh"] + if connect_timeout is not None: + argv.extend(["-o", f"ConnectTimeout={int(connect_timeout)}"]) + if strict_host_key_checking is not None: + argv.extend( + [ + "-o", + "StrictHostKeyChecking=yes" + if strict_host_key_checking + else "StrictHostKeyChecking=no", + ] + ) + if ssh_port and ssh_port != "22": + argv.extend(["-p", str(ssh_port)]) + argv.append(remote) + if remote_cmd is not None: + argv.append(remote_cmd) + return argv + + +def run_ssh_command( + remote: str, + ssh_port: str | None, + remote_cmd: str, + *, + timeout: float, + connect_timeout: int | None = None, + strict_host_key_checking: bool | None = None, + text: bool = True, +) -> subprocess.CompletedProcess: + """Run an ssh command with centralized timeout and stderr/stdout capture.""" + return subprocess.run( + _ssh_exec_argv( + remote, + ssh_port, + remote_cmd=remote_cmd, + connect_timeout=connect_timeout, + strict_host_key_checking=strict_host_key_checking, + ), + timeout=timeout, + capture_output=True, + text=text, + ) + + +def _windows_powershell_argv( + command: str, + *, + no_profile: bool = True, + non_interactive: bool = True, +) -> List[str]: + argv: List[str] = ["powershell.exe"] + if no_profile: + argv.append("-NoProfile") + if non_interactive: + argv.append("-NonInteractive") + argv.extend(["-Command", command]) + return argv + + +def run_wsl_windows_powershell( + command: str, + *, + timeout: float = 5, +) -> subprocess.CompletedProcess[str]: + """Run a PowerShell command on the Windows host from WSL. + + Raises ``RuntimeError`` when called outside WSL. + """ + + if not is_wsl(): + raise RuntimeError("run_wsl_windows_powershell is only supported in WSL") + return subprocess.run( + _windows_powershell_argv(command), + capture_output=True, + text=True, + timeout=timeout, + ) diff --git a/core/session_manager.py b/core/session_manager.py index 54919295a..ecc23e088 100644 --- a/core/session_manager.py +++ b/core/session_manager.py @@ -14,7 +14,7 @@ import logging from datetime import datetime, timezone, timedelta from typing import Dict, Optional -from .database import Session as DbSession, ChatMessage as DbChatMessage, Document as DbDocument, SessionLocal +from .database import Session as DbSession, ChatMessage as DbChatMessage, Document as DbDocument, SessionLocal, utcnow_naive from .models import Session, ChatMessage logger = logging.getLogger(__name__) @@ -619,7 +619,7 @@ class SessionManager: try: all_sessions = db.query(DbSession).all() - cutoff_date = datetime.now(timezone.utc) - timedelta(days=auto_archive_days) + cutoff_date = utcnow_naive() - timedelta(days=auto_archive_days) for db_session in all_sessions: stats['total_checked'] += 1 diff --git a/docker-compose.gpu-amd.yml b/docker-compose.gpu-amd.yml index 47e0c8550..b95dde1bf 100644 --- a/docker-compose.gpu-amd.yml +++ b/docker-compose.gpu-amd.yml @@ -52,12 +52,14 @@ services: - SECURE_COOKIES=${SECURE_COOKIES:-false} - EMBEDDING_URL=${EMBEDDING_URL:-} - EMBEDDING_MODEL=${EMBEDDING_MODEL:-} + - EMBEDDING_API_KEY=${EMBEDDING_API_KEY:-} - FASTEMBED_MODEL=${FASTEMBED_MODEL:-sentence-transformers/all-MiniLM-L6-v2} - FASTEMBED_CACHE_PATH=${FASTEMBED_CACHE_PATH:-} - CLEANUP_INTERVAL_HOURS=${CLEANUP_INTERVAL_HOURS:-24} - ODYSSEUS_INPROCESS_POLLERS=${ODYSSEUS_INPROCESS_POLLERS:-1} - ODYSSEUS_INPROCESS_TASKS=${ODYSSEUS_INPROCESS_TASKS:-1} - ODYSSEUS_SCRIPT_HOST=${ODYSSEUS_SCRIPT_HOST:-localhost} + - ODYSSEUS_CHAT_UPLOAD_MAX_BYTES=${ODYSSEUS_CHAT_UPLOAD_MAX_BYTES:-10485760} - DATA_BRAVE_API_KEY=${DATA_BRAVE_API_KEY:-} - GOOGLE_API_KEY=${GOOGLE_API_KEY:-} - GOOGLE_PSE_CX=${GOOGLE_PSE_CX:-} diff --git a/docker-compose.gpu-nvidia.yml b/docker-compose.gpu-nvidia.yml index 36ca10efe..fa50896ba 100644 --- a/docker-compose.gpu-nvidia.yml +++ b/docker-compose.gpu-nvidia.yml @@ -51,12 +51,14 @@ services: - SECURE_COOKIES=${SECURE_COOKIES:-false} - EMBEDDING_URL=${EMBEDDING_URL:-} - EMBEDDING_MODEL=${EMBEDDING_MODEL:-} + - EMBEDDING_API_KEY=${EMBEDDING_API_KEY:-} - FASTEMBED_MODEL=${FASTEMBED_MODEL:-sentence-transformers/all-MiniLM-L6-v2} - FASTEMBED_CACHE_PATH=${FASTEMBED_CACHE_PATH:-} - CLEANUP_INTERVAL_HOURS=${CLEANUP_INTERVAL_HOURS:-24} - ODYSSEUS_INPROCESS_POLLERS=${ODYSSEUS_INPROCESS_POLLERS:-1} - ODYSSEUS_INPROCESS_TASKS=${ODYSSEUS_INPROCESS_TASKS:-1} - ODYSSEUS_SCRIPT_HOST=${ODYSSEUS_SCRIPT_HOST:-localhost} + - ODYSSEUS_CHAT_UPLOAD_MAX_BYTES=${ODYSSEUS_CHAT_UPLOAD_MAX_BYTES:-10485760} - DATA_BRAVE_API_KEY=${DATA_BRAVE_API_KEY:-} - GOOGLE_API_KEY=${GOOGLE_API_KEY:-} - GOOGLE_PSE_CX=${GOOGLE_PSE_CX:-} diff --git a/docker-compose.yml b/docker-compose.yml index f3a8dcc49..9841b1dca 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -40,12 +40,14 @@ services: - SECURE_COOKIES=${SECURE_COOKIES:-false} - EMBEDDING_URL=${EMBEDDING_URL:-} - EMBEDDING_MODEL=${EMBEDDING_MODEL:-} + - EMBEDDING_API_KEY=${EMBEDDING_API_KEY:-} - FASTEMBED_MODEL=${FASTEMBED_MODEL:-sentence-transformers/all-MiniLM-L6-v2} - FASTEMBED_CACHE_PATH=${FASTEMBED_CACHE_PATH:-} - CLEANUP_INTERVAL_HOURS=${CLEANUP_INTERVAL_HOURS:-24} - ODYSSEUS_INPROCESS_POLLERS=${ODYSSEUS_INPROCESS_POLLERS:-1} - ODYSSEUS_INPROCESS_TASKS=${ODYSSEUS_INPROCESS_TASKS:-1} - ODYSSEUS_SCRIPT_HOST=${ODYSSEUS_SCRIPT_HOST:-localhost} + - ODYSSEUS_CHAT_UPLOAD_MAX_BYTES=${ODYSSEUS_CHAT_UPLOAD_MAX_BYTES:-10485760} - DATA_BRAVE_API_KEY=${DATA_BRAVE_API_KEY:-} - GOOGLE_API_KEY=${GOOGLE_API_KEY:-} - GOOGLE_PSE_CX=${GOOGLE_PSE_CX:-} diff --git a/docs/email-outlook.md b/docs/email-outlook.md new file mode 100644 index 000000000..1f8b97d5d --- /dev/null +++ b/docs/email-outlook.md @@ -0,0 +1,17 @@ +# Outlook / Office 365 email accounts + +Odysseus email accounts currently use IMAP and SMTP with username/password +authentication. That works for providers that still allow app passwords or +mailbox passwords for IMAP/SMTP. + +Microsoft disables basic authentication for Outlook and Microsoft 365 in most +modern accounts and tenants. If you try to add an Outlook account with a normal +password, Microsoft may return errors such as: + +- `IMAP: AUTHENTICATE failed` +- `SMTP: 535 5.7.139 Authentication unsuccessful, basic authentication is disabled` + +This is expected. Odysseus does not support Microsoft OAuth or Graph Mail yet, +so Outlook / Office 365 accounts cannot currently be added through the password +form. Use another email provider with app-password support, or track the future +Microsoft Graph OAuth integration. diff --git a/mcp_servers/_common.py b/mcp_servers/_common.py deleted file mode 100644 index 341bfe64e..000000000 --- a/mcp_servers/_common.py +++ /dev/null @@ -1,22 +0,0 @@ -""" -_common.py - -Shared constants and helpers for built-in MCP servers. -""" - -MAX_OUTPUT_CHARS = 10_000 -MAX_READ_CHARS = 20_000 -SHELL_TIMEOUT = 60 -PYTHON_TIMEOUT = 30 -SEARCH_TIMEOUT = 30 - - -def truncate(text: str, limit: int = MAX_OUTPUT_CHARS) -> str: - """Truncate text to *limit* characters with a suffix note.""" - if not isinstance(text, str): - # Tool output is occasionally None or a non-string; len(None) would - # raise. Coerce so this shared helper never crashes a tool response. - text = "" if text is None else str(text) - if len(text) > limit: - return text[:limit] + f"\n... (truncated, {len(text)} chars total)" - return text diff --git a/mcp_servers/email_server.py b/mcp_servers/email_server.py index 9382624dd..d1c2ac07e 100644 --- a/mcp_servers/email_server.py +++ b/mcp_servers/email_server.py @@ -31,13 +31,19 @@ sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) server = Server("email") EMAIL_SOCKET_TIMEOUT = float(os.environ.get("EMAIL_SOCKET_TIMEOUT", "20")) -DATA_DIR = Path(__file__).resolve().parent.parent / "data" +from src.constants import DATA_DIR as _DATA_DIR, APP_DB, EMAIL_CACHE_DB, SETTINGS_FILE as _SETTINGS_FILE, MAIL_ATTACHMENTS_DIR +DATA_DIR = Path(_DATA_DIR) def _b(value) -> bytes: return str(value).encode() +def _q(name: str) -> str: + """Quote an IMAP mailbox name for commands that take mailbox args.""" + return '"' + (name or "").replace("\\", "\\\\").replace('"', '\\"') + '"' + + def _uid_fetch_rows(data) -> list: return [d for d in (data or []) if isinstance(d, bytes) and b"UID " in d] @@ -58,7 +64,7 @@ def _clean_header_value(value) -> str: def _db_path() -> Path: - return DATA_DIR / "app.db" + return Path(APP_DB) def _list_accounts_raw() -> list: @@ -157,7 +163,7 @@ def _load_config(account: str | None = None) -> dict: "trash_folder": os.environ.get("TRASH_FOLDER", "Trash"), "cache_db": os.environ.get( "EMAIL_CACHE_DB", - str(DATA_DIR / "email_cache.db"), + EMAIL_CACHE_DB, ), "account_id": None, "account_name": None, @@ -199,7 +205,7 @@ def _load_config(account: str | None = None) -> dict: else: # Legacy fallback: settings.json flat keys try: - settings_path = Path(__file__).resolve().parent.parent / "data" / "settings.json" + settings_path = Path(_SETTINGS_FILE) if settings_path.exists(): settings = json.loads(settings_path.read_text(encoding="utf-8")) for key in ( @@ -239,10 +245,27 @@ def _imap_connect(account: str | None = None): timeout=EMAIL_SOCKET_TIMEOUT, ) if cfg["imap_starttls"]: - conn.starttls() + try: + conn.starttls() + except Exception: + # Don't leak the open plain socket on a rejected STARTTLS. (#3174) + try: + conn.shutdown() + except Exception: + pass + raise if getattr(conn, "sock", None): conn.sock.settimeout(EMAIL_SOCKET_TIMEOUT) - conn.login(cfg["imap_user"], cfg["imap_password"]) + try: + conn.login(cfg["imap_user"], cfg["imap_password"]) + except Exception: + # A failed login otherwise orphans the connected socket; close it + # before propagating (shutdown() is the pre-auth low-level close). (#3174) + try: + conn.shutdown() + except Exception: + pass + raise return conn @@ -418,68 +441,71 @@ def _list_emails(folder="INBOX", max_results=20, unresponded_only=False, Pass unread_only=True and/or unresponded_only=True for attention scans. account selects mailbox (None = default). """ - conn = _imap_connect(account) - select_status, _ = conn.select(folder, readonly=True) - if select_status != "OK": - conn.logout() - raise ValueError(f"IMAP folder not found: {folder}") + conn = None + try: + conn = _imap_connect(account) + select_status, _ = conn.select(_q(folder), readonly=True) + if select_status != "OK": + raise ValueError(f"IMAP folder not found: {folder}") - if unread_only and unresponded_only: - status, data = conn.uid("SEARCH", None, "(UNSEEN UNANSWERED)") - elif unread_only: - status, data = conn.uid("SEARCH", None, "(UNSEEN)") - elif unresponded_only: - # Was missing — unresponded_only=True (without unread_only) fell through - # to "ALL" and returned answered mail too, despite the documented - # "emails without replies" behaviour. - status, data = conn.uid("SEARCH", None, "(UNANSWERED)") - else: - # Include read too — IMAP search "ALL" returns the entire folder - status, data = conn.uid("SEARCH", None, "ALL") + if unread_only and unresponded_only: + status, data = conn.uid("SEARCH", None, "(UNSEEN UNANSWERED)") + elif unread_only: + status, data = conn.uid("SEARCH", None, "(UNSEEN)") + elif unresponded_only: + # Was missing — unresponded_only=True (without unread_only) fell through + # to "ALL" and returned answered mail too, despite the documented + # "emails without replies" behaviour. + status, data = conn.uid("SEARCH", None, "(UNANSWERED)") + else: + # Include read too — IMAP search "ALL" returns the entire folder + status, data = conn.uid("SEARCH", None, "ALL") - if status != "OK" or not data[0]: - conn.logout() - return [] + if status != "OK" or not data[0]: + return [] - uid_list = list(reversed(data[0].split()))[:max_results] - cache = _get_cached_summaries() - results = [] + uid_list = list(reversed(data[0].split()))[:max_results] + cache = _get_cached_summaries() + results = [] - for uid in uid_list: - try: - status, msg_data = conn.uid("FETCH", uid, "(RFC822.HEADER)") - if status != "OK": + for uid in uid_list: + try: + status, msg_data = conn.uid("FETCH", uid, "(RFC822.HEADER)") + if status != "OK": + continue + raw_header = msg_data[0][1] + msg = email.message_from_bytes(raw_header) + + subject = _decode_header(msg.get("Subject", "(no subject)")) + sender = _decode_header(msg.get("From", "unknown")) + date_str = msg.get("Date", "") + message_id = msg.get("Message-ID", "") + + # Parse sender name + sender_name, sender_addr = email.utils.parseaddr(sender) + sender_display = sender_name or sender_addr + + # Check cache for summary + cached = cache.get(subject, {}) + summary = cached.get("summary", "") + + results.append({ + "uid": uid.decode(), + "message_id": message_id, + "subject": subject, + "from": sender_display, + "from_address": sender_addr, + "date": date_str, + "summary": summary, + }) + except Exception: continue - raw_header = msg_data[0][1] - msg = email.message_from_bytes(raw_header) - subject = _decode_header(msg.get("Subject", "(no subject)")) - sender = _decode_header(msg.get("From", "unknown")) - date_str = msg.get("Date", "") - message_id = msg.get("Message-ID", "") - - # Parse sender name - sender_name, sender_addr = email.utils.parseaddr(sender) - sender_display = sender_name or sender_addr - - # Check cache for summary - cached = cache.get(subject, {}) - summary = cached.get("summary", "") - - results.append({ - "uid": uid.decode(), - "message_id": message_id, - "subject": subject, - "from": sender_display, - "from_address": sender_addr, - "date": date_str, - "summary": summary, - }) - except Exception: - continue - - conn.logout() - return results + return results + finally: + if conn: + try: conn.logout() + except Exception: pass def _result_sort_time(result: dict) -> datetime: @@ -542,7 +568,7 @@ def _search_emails(query, folders=None, max_results=20, account=None): try: for folder in folders: try: - status, _ = conn.select(folder, readonly=True) + status, _ = conn.select(_q(folder), readonly=True) if status != "OK": continue status, data = conn.uid("SEARCH", None, search_cmd) @@ -652,54 +678,55 @@ def _extract_attachment_to_disk(msg, index, target_dir): def _read_email(uid=None, message_id=None, folder="INBOX", account=None): """Read full email content by UID or message-ID. account = mailbox selector.""" cfg = _load_config(account) - conn = _imap_connect(account) - conn.select(folder, readonly=True) + conn = None + try: + conn = _imap_connect(account) + conn.select(_q(folder), readonly=True) - if message_id and not uid: - status, data = conn.uid("SEARCH", None, f'(HEADER Message-ID "{message_id}")') - if status != "OK" or not data[0]: - conn.logout() - return {"error": f"Email not found with Message-ID: {message_id}"} - uid = data[0].split()[-1] + if message_id and not uid: + status, data = conn.uid("SEARCH", None, f'(HEADER Message-ID "{message_id}")') + if status != "OK" or not data[0]: + return {"error": f"Email not found with Message-ID: {message_id}"} + uid = data[0].split()[-1] - if not uid: - conn.logout() - return {"error": "No UID or Message-ID provided"} + if not uid: + return {"error": "No UID or Message-ID provided"} - status, msg_data = conn.uid("FETCH", _b(uid), "(BODY.PEEK[])") - if status != "OK": - conn.logout() - return {"error": f"Failed to fetch email UID {uid}"} - if not msg_data or not msg_data[0] or not isinstance(msg_data[0], tuple) or len(msg_data[0]) < 2: - conn.logout() - return {"error": f"Email not found with UID {uid}"} + status, msg_data = conn.uid("FETCH", _b(uid), "(BODY.PEEK[])") + if status != "OK": + return {"error": f"Failed to fetch email UID {uid}"} + if not msg_data or not msg_data[0] or not isinstance(msg_data[0], tuple) or len(msg_data[0]) < 2: + return {"error": f"Email not found with UID {uid}"} - raw = msg_data[0][1] - msg = email.message_from_bytes(raw) + raw = msg_data[0][1] + msg = email.message_from_bytes(raw) - subject = _decode_header(msg.get("Subject", "(no subject)")) - sender = _decode_header(msg.get("From", "unknown")) - date_str = msg.get("Date", "") - message_id_header = msg.get("Message-ID", "") - body = _extract_text(msg) - attachments = _list_attachments_from_msg(msg) + subject = _decode_header(msg.get("Subject", "(no subject)")) + sender = _decode_header(msg.get("From", "unknown")) + date_str = msg.get("Date", "") + message_id_header = msg.get("Message-ID", "") + body = _extract_text(msg) + attachments = _list_attachments_from_msg(msg) - sender_name, sender_addr = email.utils.parseaddr(sender) + sender_name, sender_addr = email.utils.parseaddr(sender) - conn.logout() - return { - "uid": uid.decode() if isinstance(uid, bytes) else str(uid), - "account": cfg.get("account_name") or cfg.get("imap_user") or "default", - "account_email": cfg.get("imap_user") or cfg.get("from_address") or "", - "account_id": cfg.get("account_id"), - "message_id": message_id_header, - "subject": subject, - "from": sender_name or sender_addr, - "from_address": sender_addr, - "date": date_str, - "body": body[:8000], - "attachments": attachments, - } + return { + "uid": uid.decode() if isinstance(uid, bytes) else str(uid), + "account": cfg.get("account_name") or cfg.get("imap_user") or "default", + "account_email": cfg.get("imap_user") or cfg.get("from_address") or "", + "account_id": cfg.get("account_id"), + "message_id": message_id_header, + "subject": subject, + "from": sender_name or sender_addr, + "from_address": sender_addr, + "date": date_str, + "body": body[:8000], + "attachments": attachments, + } + finally: + if conn: + try: conn.logout() + except Exception: pass def _read_email_across_accounts(uid=None, message_id=None, folder="INBOX"): @@ -768,7 +795,16 @@ def _smtp_connect(account=None, cfg=None): port, timeout=EMAIL_SOCKET_TIMEOUT, ) - conn.starttls() + try: + conn.starttls() + except Exception: + # Don't leak the open plain socket on a rejected STARTTLS. SMTP has + # no shutdown(); close() is the low-level socket close (no QUIT). (#3174) + try: + conn.close() + except Exception: + pass + raise elif security == "ssl": conn = smtplib.SMTP_SSL( cfg["smtp_host"], @@ -782,7 +818,16 @@ def _smtp_connect(account=None, cfg=None): timeout=EMAIL_SOCKET_TIMEOUT, ) if cfg["smtp_user"] and cfg["smtp_password"]: - conn.login(cfg["smtp_user"], cfg["smtp_password"]) + try: + conn.login(cfg["smtp_user"], cfg["smtp_password"]) + except Exception: + # A failed login otherwise orphans the connected socket; close it + # before propagating (SMTP has no shutdown(); close() = socket close). (#3174) + try: + conn.close() + except Exception: + pass + raise return conn @@ -827,7 +872,7 @@ def _send_email(to, subject, body, in_reply_to=None, references=None, cc=None, b imap = _imap_connect(send_account) try: sent_folder = _detect_sent_folder(imap) - append_st, append_data = imap.append(sent_folder, "\\Seen", None, msg.as_bytes()) + append_st, append_data = imap.append(_q(sent_folder), "\\Seen", None, msg.as_bytes()) if append_st == "OK" and append_data: m = re.search(rb"APPENDUID\s+\d+\s+(\d+)", append_data[0] or b"") if m: @@ -853,10 +898,15 @@ def _send_email(to, subject, body, in_reply_to=None, references=None, cc=None, b def _reply_to_email(uid, body, folder="INBOX", reply_all=False, account=None): """Reply to an existing email by UID. Threads via In-Reply-To/References.""" - conn = _imap_connect(account) - conn.select(folder, readonly=True) - status, msg_data = conn.uid("FETCH", _b(uid), "(BODY.PEEK[])") - conn.logout() + conn = None + try: + conn = _imap_connect(account) + conn.select(_q(folder), readonly=True) + status, msg_data = conn.uid("FETCH", _b(uid), "(BODY.PEEK[])") + finally: + if conn: + try: conn.logout() + except Exception: pass if status != "OK" or not msg_data or not msg_data[0]: return {"error": f"Failed to fetch email UID {uid}"} raw = msg_data[0][1] @@ -896,7 +946,7 @@ def _reply_to_email(uid, body, folder="INBOX", reply_all=False, account=None): def _set_flag(uid, folder, flag, add=True, account=None): """Add or remove an IMAP flag (e.g. \\Seen, \\Answered, \\Deleted).""" conn = _imap_connect(account) - conn.select(folder) + conn.select(_q(folder)) op = "+FLAGS" if add else "-FLAGS" try: status, data = conn.uid("STORE", _b(uid), op, flag) @@ -918,7 +968,7 @@ def _bulk_set_flag(uids, folder, flag, add=True, account=None): conn = _imap_connect(account) touched = [] try: - conn.select(folder) + conn.select(_q(folder)) op = "+FLAGS" if add else "-FLAGS" msg_set = ",".join(str(u) for u in uids) try: @@ -945,7 +995,7 @@ def _bulk_move(uids, source_folder, dest_folder, account=None, role: str = ""): conn = _imap_connect(account) moved = 0 try: - conn.select(source_folder) + conn.select(_q(source_folder)) dest_folder = _resolve_folder(conn, dest_folder, role or _folder_role_from_name(dest_folder)) msg_set = ",".join(str(u) for u in uids) try: @@ -956,10 +1006,11 @@ def _bulk_move(uids, source_folder, dest_folder, account=None, role: str = ""): if not existing: return 0 moved = len(existing) - status, _ = conn.uid("MOVE", _b(msg_set), dest_folder) + dest_arg = _q(dest_folder) + status, _ = conn.uid("MOVE", _b(msg_set), dest_arg) if status != "OK": # Fallback: UID copy + flag-delete + expunge - status, _ = conn.uid("COPY", _b(msg_set), dest_folder) + status, _ = conn.uid("COPY", _b(msg_set), dest_arg) if status != "OK": return 0 status, _ = conn.uid("STORE", _b(msg_set), "+FLAGS", "\\Deleted") @@ -976,7 +1027,7 @@ def _search_uids(folder="INBOX", criteria="UNSEEN", account=None): ALL, ANSWERED). Used to resolve selectors like all_unread → uids.""" conn = _imap_connect(account) try: - conn.select(folder, readonly=True) + conn.select(_q(folder), readonly=True) status, data = conn.uid("SEARCH", None, criteria) if status != "OK" or not data or not data[0]: return [] @@ -988,7 +1039,7 @@ def _search_uids(folder="INBOX", criteria="UNSEEN", account=None): def _move_message(uid, source_folder, dest_folder, account=None, role: str = ""): """Move a message between folders. Tries IMAP MOVE, falls back to copy+delete.""" conn = _imap_connect(account) - conn.select(source_folder) + conn.select(_q(source_folder)) try: dest_folder = _resolve_folder(conn, dest_folder, role or _folder_role_from_name(dest_folder)) try: @@ -998,11 +1049,12 @@ def _move_message(uid, source_folder, dest_folder, account=None, role: str = "") existing = _uid_fetch_rows(data) if status != "OK" or not existing: return False - status, _ = conn.uid("MOVE", _b(uid), dest_folder) + dest_arg = _q(dest_folder) + status, _ = conn.uid("MOVE", _b(uid), dest_arg) if status == "OK": return True # Fallback: UID copy + delete - status, _ = conn.uid("COPY", _b(uid), dest_folder) + status, _ = conn.uid("COPY", _b(uid), dest_arg) if status != "OK": return False status, _ = conn.uid("STORE", _b(uid), "+FLAGS", "\\Deleted") @@ -1031,16 +1083,21 @@ def _archive_email(uid, folder="INBOX", account=None): def _download_attachment(uid, index, folder="INBOX", account=None): """Extract a specific attachment to disk and return its local path.""" - conn = _imap_connect(account) - conn.select(folder, readonly=True) - status, msg_data = conn.uid("FETCH", _b(uid), "(BODY.PEEK[])") - conn.logout() + conn = None + try: + conn = _imap_connect(account) + conn.select(_q(folder), readonly=True) + status, msg_data = conn.uid("FETCH", _b(uid), "(BODY.PEEK[])") + finally: + if conn: + try: conn.logout() + except Exception: pass if status != "OK": return {"error": f"Failed to fetch email UID {uid}"} raw = msg_data[0][1] msg = email.message_from_bytes(raw) - target_dir = DATA_DIR / "mail-attachments" / f"{folder}_{uid}" + target_dir = Path(MAIL_ATTACHMENTS_DIR) / f"{folder}_{uid}" filepath = _extract_attachment_to_disk(msg, index, target_dir) if not filepath: return {"error": f"Attachment index {index} not found"} diff --git a/mcp_servers/image_gen_server.py b/mcp_servers/image_gen_server.py index 872ccd681..0c8d3884a 100644 --- a/mcp_servers/image_gen_server.py +++ b/mcp_servers/image_gen_server.py @@ -16,6 +16,8 @@ from mcp.types import Tool, TextContent sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) +from src.constants import GENERATED_IMAGES_DIR + server = Server("image_gen") @@ -115,14 +117,18 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]: img = images[0] image_url = None + # Prefix the instance's public base URL (existing app_public_url setting) so the + # link is fully-qualified and clickable when the model echoes it. Empty = relative + # same-origin path (unchanged default). + _pub_base = (get_setting("app_public_url", "") or "").rstrip("/") if img.get("b64_json"): - img_dir = Path("data/generated_images") + img_dir = Path(GENERATED_IMAGES_DIR) img_dir.mkdir(parents=True, exist_ok=True) filename = f"{uuid.uuid4().hex[:12]}.png" img_path = img_dir / filename img_path.write_bytes(base64.b64decode(img["b64_json"])) - image_url = f"/api/generated-image/{filename}" + image_url = f"{_pub_base}/api/generated-image/{filename}" # Save to gallery try: @@ -146,7 +152,13 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]: else: return [TextContent(type="text", text="Error: Unexpected image API response format")] - result = f"Generated image for: {prompt[:100]}\nimage_url: {image_url}\nmodel: {model_id}\nsize: {size}" + # "Direct link:" rather than an "image_url:" label — small models copied the + # label token ("image_url") into the link href, producing a broken link. + result = ( + f"Generated image for: {prompt[:100]}\n" + f"Direct link: {image_url}\n" + f"model: {model_id}\nsize: {size}" + ) return [TextContent(type="text", text=result)] except httpx.TimeoutException: diff --git a/package-lock.json b/package-lock.json index 80eac7ebf..8e0812dd9 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,5 +1,5 @@ { - "name": "odysseus-ui", + "name": "odysseus", "lockfileVersion": 3, "requires": true, "packages": { diff --git a/pyproject.toml b/pyproject.toml index 116b1376c..58161958f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,18 @@ [tool.pytest.ini_options] testpaths = ["tests"] asyncio_mode = "auto" +# Test-taxonomy markers added at collection time by tests/conftest.py. The +# stable area_* markers are declared here; the dynamic sub_ +# markers are registered before collection by pytest_configure in +# tests/conftest.py, so unknown-mark warnings still flag genuine typos outside +# the taxonomy. See tests/_taxonomy.py and tests/README.md. +markers = [ + "area_security: tests covering auth, owner-scope, SSRF, XSS, confinement, redaction", + "area_routes: tests covering HTTP route / API behavior", + "area_services: tests covering service-layer behavior (llm, cookbook, email, calendar, ...)", + "area_cli: tests covering CLI / script behavior", + "area_js: JavaScript / Node-backed tests", + "area_helpers: self-tests for the shared test helpers in tests/helpers/", + "area_unit: pure parser / utility tests that do not clearly belong elsewhere", + "area_uncategorized: tests not yet matched by the taxonomy (fallback)", +] diff --git a/routes/admin_wipe_routes.py b/routes/admin_wipe_routes.py index 01511c373..212e2a768 100644 --- a/routes/admin_wipe_routes.py +++ b/routes/admin_wipe_routes.py @@ -31,7 +31,7 @@ from core.database import ( CalendarEvent, CalendarCal, ) -from src.constants import DATA_DIR +from src.constants import DATA_DIR, SKILLS_DIR, SKILLS_FILE, GALLERY_DIR, GALLERY_UPLOADS_DIR logger = logging.getLogger(__name__) @@ -107,7 +107,7 @@ def setup_admin_wipe_routes(session_manager): # Skills live as SKILL.md files under data/skills/. Drop # the entire directory; the SkillsManager re-creates the # tree on next write. - skills_dir = os.path.join(DATA_DIR, "skills") + skills_dir = SKILLS_DIR count = 0 if os.path.isdir(skills_dir): # Count SKILL.md files for the response — quick walk. @@ -115,7 +115,7 @@ def setup_admin_wipe_routes(session_manager): count += sum(1 for f in files if f == "SKILL.md") _rmtree_quiet(skills_dir) # Legacy fallback file - legacy = os.path.join(DATA_DIR, "skills.json") + legacy = SKILLS_FILE if os.path.exists(legacy): try: os.remove(legacy) @@ -151,8 +151,8 @@ def setup_admin_wipe_routes(session_manager): db.query(GalleryAlbum).delete() db.commit() # Also drop the upload dir so disk doesn't keep orphans. - _rmtree_quiet(os.path.join(DATA_DIR, "gallery")) - _rmtree_quiet(os.path.join(DATA_DIR, "gallery_uploads")) + _rmtree_quiet(GALLERY_DIR) + _rmtree_quiet(GALLERY_UPLOADS_DIR) return {"status": "deleted", "kind": kind, "count": count} if kind == "calendar": diff --git a/routes/api_token_routes.py b/routes/api_token_routes.py index 68d150368..97c576d15 100644 --- a/routes/api_token_routes.py +++ b/routes/api_token_routes.py @@ -155,22 +155,30 @@ def setup_api_token_routes() -> APIRouter: payload = await request.json() except Exception: payload = {} - scope_list = _normalize_scopes(payload.get("scopes")) - scopes_value = ",".join(scope_list) with get_db_session() as db: token = db.query(ApiToken).filter(ApiToken.id == token_id).first() if not token: raise HTTPException(404, "Token not found") if isinstance(payload.get("name"), str) and payload["name"].strip(): token.name = payload["name"].strip()[:MAX_NAME_LEN] - token.scopes = scopes_value + # Only touch scopes when the caller actually sent them. A partial + # update such as a rename ({"name": ...} with no "scopes" key) must + # not silently reset the token to the default scope — that dropped + # every previously granted scope. + if "scopes" in payload: + token.scopes = ",".join(_normalize_scopes(payload.get("scopes"))) db.add(token) + current_scopes = [ + s.strip() + for s in (getattr(token, "scopes", "") or DEFAULT_SCOPES).split(",") + if s.strip() + ] response = { "id": token_id, "name": getattr(token, "name", ""), "owner": getattr(token, "owner", None), "token_prefix": getattr(token, "token_prefix", ""), - "scopes": scope_list, + "scopes": current_scopes, } _invalidate_cache(request) return response diff --git a/routes/auth_routes.py b/routes/auth_routes.py index 644b12d04..9379bced8 100644 --- a/routes/auth_routes.py +++ b/routes/auth_routes.py @@ -131,10 +131,8 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter: return {"ok": False, "requires_totp": True, "username": username} if not auth_manager.totp_verify(username, body.totp_code): raise HTTPException(401, "Invalid 2FA code") - # All checks passed — create session - token = await asyncio.to_thread(auth_manager.create_session, username, body.password) - if not token: - raise HTTPException(401, "Invalid credentials") + # All checks passed — create session (password already verified above) + token = await asyncio.to_thread(auth_manager.create_session_trusted, username) cookie_kwargs = dict( key=SESSION_COOKIE, value=token, @@ -585,6 +583,27 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter: hint = " If this is Docker Compose ntfy, set NTFY_BIND to that host/Tailscale IP and NTFY_BASE_URL to the same server URL in .env, then recreate ntfy." return {"ok": False, "message": f"ntfy publish to {full_url} failed: {e}.{hint}"[:500]} + if preset == "discord_webhook": + import httpx + webhook_url = (integ.get("base_url") or "").strip() + if not webhook_url: + return {"ok": False, "message": "No webhook URL set — paste the full Discord webhook URL into the Base URL field."} + payload = { + "embeds": [{ + "title": "Odysseus connectivity test", + "description": "If you see this, your Discord Webhook integration is wired up correctly.", + "color": 5793266, + }] + } + try: + async with httpx.AsyncClient(timeout=8.0) as client: + r = await client.post(webhook_url, json=payload) + if r.is_success: + return {"ok": True, "message": "Test embed sent — check your Discord channel to confirm it arrived."} + return {"ok": False, "message": f"Discord returned HTTP {r.status_code}: {r.text[:200]}"} + except Exception as e: + return {"ok": False, "message": f"Request failed: {e}"[:400]} + # All other presets: GET against a known health endpoint. # Fall back to detecting from name if preset is missing. health_paths = { diff --git a/routes/backup_routes.py b/routes/backup_routes.py index 2b92a1529..5ca403f81 100644 --- a/routes/backup_routes.py +++ b/routes/backup_routes.py @@ -101,24 +101,68 @@ def setup_backup_routes(memory_manager, preset_manager, skills_manager) -> APIRo # ── Skills ── if "skills" in body and isinstance(body["skills"], list): existing = skills_manager.load_all() - existing_ids = {s.get("id") for s in existing} - existing_titles = {s.get("title", "").strip().lower() for s in existing} + existing_names = {s.get("name") for s in existing if s.get("name")} + existing_ids = {s.get("id") for s in existing if s.get("id")} + existing_titles = { + (s.get("title") or s.get("description") or "").strip().lower() + for s in existing + } added = 0 for skill in body["skills"]: - if not isinstance(skill, dict) or not skill.get("title"): + if not isinstance(skill, dict): continue - # Skip if same id or same title already exists - if skill.get("id") in existing_ids: + title = ( + skill.get("title") or skill.get("description") + or skill.get("name") or "" + ).strip() + if not title: continue - if skill["title"].strip().lower() in existing_titles: + sid = skill.get("id") or skill.get("name") + if sid and sid in existing_ids: continue - if user and not skill.get("owner"): - skill["owner"] = user - existing.append(skill) - existing_ids.add(skill.get("id")) - existing_titles.add(skill["title"].strip().lower()) + nm = skill.get("name") + if nm and nm in existing_names: + continue + if title.lower() in existing_titles: + continue + owner = skill.get("owner") + if user and not owner: + owner = user + # Skills live on disk as SKILL.md files; the old JSON-era + # skills_manager.save() no longer exists. Write each new skill + # via add_skill (source="user" skips auto-dedup — this is an + # explicit backup restore). + result = skills_manager.add_skill( + title=title, + name=skill.get("name"), + description=skill.get("description"), + problem=skill.get("problem", ""), + solution=skill.get("solution", ""), + steps=skill.get("steps"), + tags=skill.get("tags"), + source="user", + teacher_model=skill.get("teacher_model"), + confidence=skill.get("confidence", 0.8), + owner=owner, + category=skill.get("category", "general"), + when_to_use=skill.get("when_to_use"), + procedure=skill.get("procedure"), + pitfalls=skill.get("pitfalls"), + verification=skill.get("verification"), + platforms=skill.get("platforms"), + requires_toolsets=skill.get("requires_toolsets"), + fallback_for_toolsets=skill.get("fallback_for_toolsets"), + status=skill.get("status", "draft"), + version=skill.get("version", "1.0.0"), + ) + if result.get("_deduped"): + continue + if result.get("name"): + existing_names.add(result["name"]) + if result.get("id"): + existing_ids.add(result["id"]) + existing_titles.add(title.lower()) added += 1 - skills_manager.save(existing) imported.append(f"{added} skills") # ── Presets ── diff --git a/routes/calendar_routes.py b/routes/calendar_routes.py index 788a6ea30..345280528 100644 --- a/routes/calendar_routes.py +++ b/routes/calendar_routes.py @@ -1,6 +1,7 @@ """Calendar routes — local SQLite-backed calendar CRUD.""" import logging +import re import uuid from datetime import datetime, date, timedelta from typing import Optional, List @@ -12,7 +13,7 @@ from dateutil.rrule import rrulestr from core.database import SessionLocal, CalendarCal, CalendarEvent from src.auth_helpers import require_user -from src.upload_limits import read_upload_limited +from src.upload_limits import read_upload_limited, ICS_MAX_BYTES logger = logging.getLogger(__name__) @@ -100,6 +101,15 @@ def _ics_escape(text: str) -> str: ) +def _safe_ics_filename(name: str) -> str: + """Return a conservative .ics filename safe for Content-Disposition.""" + stem = name if isinstance(name, str) else "" + stem = re.sub(r"[^A-Za-z0-9._-]", "_", stem).strip("._-") + if not stem: + stem = "calendar" + return f"{stem[:128]}.ics" + + def _resolve_base_uid(uid: str) -> str: """Extract the base series UID from a compound occurrence UID. @@ -248,6 +258,17 @@ def parse_due_for_user(s: str) -> str: if t is not None: return base.replace(hour=t[0], minute=t[1]).isoformat() + # Time-first: "3pm today", "11pm today", "9am tomorrow" + m = _re.match(r'^(.+?)\s+(today|tonight|tomorrow|tmrw|yesterday)$', lower) + if m: + time_part, word = m.group(1).strip(), m.group(2) + base = today + if word in ("tomorrow", "tmrw"): base = today + _td(days=1) + elif word == "yesterday": base = today - _td(days=1) + t = _parse_time(time_part) + if t is not None: + return base.replace(hour=t[0], minute=t[1]).isoformat() + m = _re.match(r'^in\s+(\d+)\s*(hour|hr|minute|min|day)s?\s*$', lower) if m: n = int(m.group(1)); unit = m.group(2) @@ -399,7 +420,17 @@ def _parse_dt(s: str) -> datetime: # Last resort: dateutil's fuzzy parser try: from dateutil import parser as _du - return _du.parse(s) + parsed = _du.parse(s) + # Strip tz like every other return path above — this function's + # contract is naive datetimes (CalendarEvent.dtstart is naive). An + # offset-bearing non-ISO input (e.g. RFC-2822 "Mon, 05 Jan 2026 + # 14:00:00 +0900") otherwise leaked tz-aware into the naive column and + # crashed read-back comparisons in _expand_rrule with "can't compare + # offset-naive and offset-aware datetimes". + if parsed.tzinfo is not None: + from datetime import timezone as _tz + return parsed.astimezone(_tz.utc).replace(tzinfo=None) + return parsed except Exception: raise ValueError(f"could not parse datetime: {s!r}") @@ -440,6 +471,9 @@ def _event_to_dict(ev: CalendarEvent) -> dict: # ── Recurrence expansion ── +_RRULE_EXPANSION_LIMIT = 1000 + + def _expand_rrule( ev: CalendarEvent, start: datetime, end: datetime ) -> List[dict]: @@ -462,6 +496,7 @@ def _expand_rrule( d = _event_to_dict(ev) d["is_recurrence"] = False d["series_uid"] = ev.uid + d["truncated"] = False return [d] # Parse the rrule, applying it to the base dtstart. @@ -487,6 +522,7 @@ def _expand_rrule( d = _event_to_dict(ev) d["is_recurrence"] = False d["series_uid"] = ev.uid + d["truncated"] = False # Malformed RRULE rows are fetched by the recurring SQL branch # with only dtstart < end_dt — the base event may not actually # overlap the window. Only return if it does. @@ -499,22 +535,26 @@ def _expand_rrule( # (matching non-recurring overlap semantics: dtstart < end AND # dtend > start). expand_start = start - duration - occurrences = rule.between(expand_start, end, inc=True) - if not occurrences: - return [] - results = [] + truncated = False base = _event_to_dict(ev) - for occ_start in occurrences: + for occ_start in rule.xafter(expand_start, inc=True): + if occ_start >= end: + break + occ_end = occ_start + duration # Overlap filter: occurrence must intersect [start, end). # This enforces exclusive-end semantics (occ_start >= end is # excluded) and includes multi-day crossings (occ_end > start). - if occ_start >= end or occ_end <= start: + if occ_end <= start: continue + if len(results) >= _RRULE_EXPANSION_LIMIT: + truncated = True + break + # Build the compound uid: {base_uid}::{date} or ::{datetime} if ev.all_day: occ_uid = f"{ev.uid}::{occ_start.strftime('%Y-%m-%d')}" @@ -525,6 +565,7 @@ def _expand_rrule( d["uid"] = occ_uid d["series_uid"] = ev.uid d["is_recurrence"] = True + d["truncated"] = False if ev.all_day: d["dtstart"] = occ_start.strftime("%Y-%m-%d") @@ -537,6 +578,10 @@ def _expand_rrule( results.append(d) + if truncated: + for d in results: + d["truncated"] = True + return results @@ -545,72 +590,178 @@ def _expand_rrule( def setup_calendar_routes() -> APIRouter: router = APIRouter(prefix="/api/calendar", tags=["calendar"]) - # CalDAV connect form (Integrations → Calendar). Storage is local - # SQLite; sync (src/caldav_sync.py) pulls remote events into it on - # calendar open and periodically via the scheduler. + # ── CalDAV multi-account helpers ───────────────────────────────────────── + + def _get_caldav_accounts(owner: str) -> list: + from src.caldav_sync import _load_caldav_accounts + return _load_caldav_accounts(owner) + + def _save_caldav_accounts(owner: str, accounts: list) -> None: + from routes.prefs_routes import _load_for_user, _save_for_user + prefs = _load_for_user(owner) or {} + prefs["caldav_accounts"] = accounts + prefs.pop("caldav", None) + _save_for_user(owner, prefs) + + # ── CalDAV config routes (backward-compat single-account API) ──────────── + @router.get("/config") async def get_config(request: Request): + """Legacy single-account endpoint — returns the first configured account.""" owner = _require_user(request) - from routes.prefs_routes import _load_for_user - cfg = (_load_for_user(owner) or {}).get("caldav", {}) or {} - caldav_password = cfg.get("password") or "" - if caldav_password: + accounts = _get_caldav_accounts(owner) + if not accounts: + return {"url": "", "username": "", "password": "", "has_password": False, "local": True} + first = accounts[0] + pw = first.get("password") or "" + has_pw = False + if pw: try: from src.secret_storage import decrypt - caldav_password = decrypt(caldav_password) + has_pw = bool(decrypt(pw)) except Exception: - pass - # Surface url+username but never hand the password back to the - # client — saved-state UI shouldn't leak the credential. + has_pw = bool(pw) return { - "url": cfg.get("url", "") or "", - "username": cfg.get("username", "") or "", + "url": first.get("url", "") or "", + "username": first.get("username", "") or "", "password": "", - "has_password": bool(caldav_password), - "local": not bool(cfg.get("url")), + "has_password": has_pw, + "local": not bool(first.get("url")), } @router.post("/config") async def save_config(request: Request): + """Legacy single-account endpoint — upserts the first account.""" owner = _require_user(request) - from routes.prefs_routes import _load_for_user, _save_for_user try: body = await request.json() except Exception: body = {} - prefs = _load_for_user(owner) or {} - cfg = dict(prefs.get("caldav") or {}) - # Empty url => clear the whole entry (treat as "remove integration"). + accounts = _get_caldav_accounts(owner) if not (body.get("url") or "").strip(): - prefs.pop("caldav", None) - _save_for_user(owner, prefs) + _save_caldav_accounts(owner, []) return {"ok": True, "cleared": True} from src.caldav_sync import validate_caldav_url try: - cfg["url"] = validate_caldav_url(body.get("url", "")) + validated_url = validate_caldav_url(body.get("url", "")) except ValueError as e: raise HTTPException(400, str(e)) - cfg["username"] = (body.get("username") or "").strip() - # Preserve the stored password when the client sends an empty - # one (edit form re-submitted without re-typing the password). - # cfg already holds the existing (already-encrypted) password from - # prefs, so we only touch it when a new password is supplied — - # re-encrypting the stored value would double-encrypt it. + if accounts: + acc = dict(accounts[0]) + else: + import uuid as _uuid + acc = {"id": str(_uuid.uuid4()), "label": "CalDAV"} + acc["url"] = validated_url + acc["username"] = (body.get("username") or "").strip() if body.get("password"): from src.secret_storage import encrypt - cfg["password"] = encrypt(body["password"]) - prefs["caldav"] = cfg - _save_for_user(owner, prefs) + acc["password"] = encrypt(body["password"]) + new_accounts = [acc] + (accounts[1:] if len(accounts) > 1 else []) + _save_caldav_accounts(owner, new_accounts) + return {"ok": True} + + # ── CalDAV multi-account CRUD ───────────────────────────────────────────── + + @router.get("/config/accounts") + async def list_caldav_accounts(request: Request): + """Return all configured CalDAV accounts (passwords never returned).""" + owner = _require_user(request) + accounts = _get_caldav_accounts(owner) + safe = [] + for acc in accounts: + pw = acc.get("password") or "" + has_pw = False + if pw: + try: + from src.secret_storage import decrypt + has_pw = bool(decrypt(pw)) + except Exception: + has_pw = bool(pw) + safe.append({ + "id": acc.get("id", ""), + "label": acc.get("label", "") or acc.get("url", ""), + "url": acc.get("url", "") or "", + "username": acc.get("username", "") or "", + "has_password": has_pw, + }) + return {"accounts": safe} + + @router.post("/config/accounts") + async def add_caldav_account(request: Request): + """Add a new CalDAV account.""" + import uuid as _uuid + owner = _require_user(request) + try: + body = await request.json() + except Exception: + body = {} + from src.caldav_sync import validate_caldav_url + try: + url = validate_caldav_url(body.get("url", "")) + except ValueError as e: + raise HTTPException(400, str(e)) + if not body.get("password"): + raise HTTPException(400, "Password is required") + from src.secret_storage import encrypt + new_acc = { + "id": str(_uuid.uuid4()), + "label": (body.get("label") or "").strip() or "CalDAV", + "url": url, + "username": (body.get("username") or "").strip(), + "password": encrypt(body["password"]), + } + accounts = _get_caldav_accounts(owner) + accounts.append(new_acc) + _save_caldav_accounts(owner, accounts) + return {"ok": True, "id": new_acc["id"]} + + @router.put("/config/accounts/{account_id}") + async def update_caldav_account(account_id: str, request: Request): + """Update an existing CalDAV account by id.""" + owner = _require_user(request) + try: + body = await request.json() + except Exception: + body = {} + accounts = _get_caldav_accounts(owner) + idx = next((i for i, a in enumerate(accounts) if a.get("id") == account_id), None) + if idx is None: + raise HTTPException(404, "Account not found") + acc = dict(accounts[idx]) + if body.get("url"): + from src.caldav_sync import validate_caldav_url + try: + acc["url"] = validate_caldav_url(body["url"]) + except ValueError as e: + raise HTTPException(400, str(e)) + if body.get("label") is not None: + acc["label"] = (body.get("label") or "").strip() or "CalDAV" + if body.get("username") is not None: + acc["username"] = (body.get("username") or "").strip() + if body.get("password"): + from src.secret_storage import encrypt + acc["password"] = encrypt(body["password"]) + accounts[idx] = acc + _save_caldav_accounts(owner, accounts) + return {"ok": True} + + @router.delete("/config/accounts/{account_id}") + async def delete_caldav_account(account_id: str, request: Request): + """Remove a CalDAV account by id.""" + owner = _require_user(request) + accounts = _get_caldav_accounts(owner) + new_accounts = [a for a in accounts if a.get("id") != account_id] + if len(new_accounts) == len(accounts): + raise HTTPException(404, "Account not found") + _save_caldav_accounts(owner, new_accounts) return {"ok": True} @router.post("/test") async def test_connection(request: Request): - """Actually probe the configured CalDAV server with a PROPFIND - request (the same handshake every CalDAV client uses). Accepts - an optional {url, username, password} body so the user can test - a configuration BEFORE saving it; falls back to the stored - creds otherwise. Returns {ok, error?} with a useful message on - failure (status code, auth issue, network error).""" + """Probe a CalDAV server with a PROPFIND. Accepts an optional body: + {url, username, password} to test before saving, or {account_id} to + test an already-saved account. Falls back to the first saved account + when nothing is provided.""" owner = _require_user(request) try: body = await request.json() @@ -620,19 +771,24 @@ def setup_calendar_routes() -> APIRouter: user = (body.get("username") or "").strip() pw = body.get("password") or "" if not (url and user and pw): - # Fall back to saved settings for this user. - from routes.prefs_routes import _load_for_user - cfg = (_load_for_user(owner) or {}).get("caldav", {}) or {} - url = url or (cfg.get("url") or "") - user = user or (cfg.get("username") or "") - if not pw: - pw = cfg.get("password") or "" - if pw: - try: - from src.secret_storage import decrypt - pw = decrypt(pw) - except Exception: - pass + # Look up a saved account: by id if supplied, else first account. + accounts = _get_caldav_accounts(owner) + acc = None + if body.get("account_id"): + acc = next((a for a in accounts if a.get("id") == body["account_id"]), None) + if acc is None and accounts: + acc = accounts[0] + if acc: + url = url or (acc.get("url") or "") + user = user or (acc.get("username") or "") + if not pw: + pw = acc.get("password") or "" + if pw: + try: + from src.secret_storage import decrypt + pw = decrypt(pw) + except Exception: + pass if not (url and user and pw): return {"ok": False, "error": "Missing URL, username, or password"} from src.caldav_sync import validate_caldav_url @@ -695,6 +851,28 @@ def setup_calendar_routes() -> APIRouter: from src.caldav_sync import sync_caldav return await sync_caldav(owner) + @router.delete("/calendars/{cal_id}") + async def delete_calendar(cal_id: str, request: Request): + owner = _require_user(request) + db = SessionLocal() + try: + cal = db.query(CalendarCal).filter( + CalendarCal.id == cal_id, + CalendarCal.owner == owner, + ).first() + if not cal: + raise HTTPException(404, "Calendar not found") + db.delete(cal) + db.commit() + return {"ok": True} + except HTTPException: + raise + except Exception as e: + logger.error("Failed to delete calendar %s: %s", cal_id, e) + raise HTTPException(500, "Failed to delete calendar") + finally: + db.close() + @router.get("/calendars") async def list_calendars(request: Request): owner = _require_user(request) @@ -703,7 +881,7 @@ def setup_calendar_routes() -> APIRouter: _ensure_default_calendar(db, owner) cals = db.query(CalendarCal).filter(CalendarCal.owner == owner).all() return {"calendars": [ - {"name": c.name, "href": c.id, "color": c.color} + {"name": c.name, "href": c.id, "color": c.color, "source": c.source} for c in cals ]} except HTTPException: @@ -766,8 +944,12 @@ def setup_calendar_routes() -> APIRouter: expanded.extend(_expand_rrule(e, start_dt, end_dt)) # Sort by occurrence start time for consistent frontend ordering. + truncated = any(e.get("truncated") for e in expanded) expanded.sort(key=lambda d: d["dtstart"]) - return {"events": expanded} + response: dict = {"events": expanded} + if truncated: + response["truncated"] = True + return response except HTTPException: raise except Exception as e: @@ -988,9 +1170,9 @@ def setup_calendar_routes() -> APIRouter: finally: db.close() - # 10 MB hard cap on ICS upload. Loading the whole file into memory is - # unavoidable with python-icalendar, so an unbounded upload would OOM. - _ICS_MAX_BYTES = 10 * 1024 * 1024 + # Hard cap on ICS upload (ICS_MAX_BYTES, default 10 MB). Loading the whole + # file into memory is unavoidable with python-icalendar, so an unbounded + # upload would OOM. @router.post("/import") async def import_ics(request: Request, file: UploadFile = File(...), calendar_name: str = ""): @@ -1000,7 +1182,7 @@ def setup_calendar_routes() -> APIRouter: owner = _require_user(request) db = SessionLocal() try: - content = await read_upload_limited(file, _ICS_MAX_BYTES, "ICS file") + content = await read_upload_limited(file, ICS_MAX_BYTES, "ICS file") try: cal_data = iCal.from_ical(content) except Exception as e: @@ -1168,11 +1350,14 @@ def setup_calendar_routes() -> APIRouter: lines.append("END:VCALENDAR") ics_data = "\r\n".join(lines) - safe_name = cal.name.replace(" ", "_").replace("/", "_") + download_name = _safe_ics_filename(cal.name) return Response( content=ics_data, media_type="text/calendar", - headers={"Content-Disposition": f'attachment; filename="{safe_name}.ics"'}, + headers={ + "Content-Disposition": f'attachment; filename="{download_name}"', + "X-Content-Type-Options": "nosniff", + }, ) except HTTPException: raise @@ -1194,7 +1379,7 @@ def setup_calendar_routes() -> APIRouter: "tomorrow", "next Tuesday", "in 30 minutes" resolve correctly. Uses the "utility" endpoint (small / fast model) to keep latency low. """ - _require_user(request) + owner = _require_user(request) from src.endpoint_resolver import resolve_endpoint from src.llm_core import llm_call_async from src.text_helpers import strip_think @@ -1220,9 +1405,9 @@ def setup_calendar_routes() -> APIRouter: if tz_hint: set_user_tz_name(tz_hint) - url, model, headers = resolve_endpoint("utility") + url, model, headers = resolve_endpoint("utility", owner=owner or None) if not url: - url, model, headers = resolve_endpoint("default") + url, model, headers = resolve_endpoint("default", owner=owner or None) if not url or not model: return {"ok": False, "error": "No LLM endpoint configured"} diff --git a/routes/chat_helpers.py b/routes/chat_helpers.py index 0929b699d..0b1c5d8ba 100644 --- a/routes/chat_helpers.py +++ b/routes/chat_helpers.py @@ -75,7 +75,7 @@ def _enforce_chat_privileges(request, sess) -> None: allowlist, or HTTPException(429) if the user has hit their daily message cap. No-op for unauthenticated callers or when auth_manager is absent (single-user mode). Admins receive ADMIN_PRIVILEGES from get_privileges, - which means empty allowed_models / zero cap → no-op for them. + which means unrestricted allowed_models / zero cap -> no-op for them. """ try: user = get_current_user(request) @@ -88,8 +88,18 @@ def _enforce_chat_privileges(request, sess) -> None: return privs = auth_manager.get_privileges(user) or {} - allowed = privs.get("allowed_models") or [] - if allowed and sess.model and sess.model not in allowed: + + # Explicit "block everything" sentinel takes precedence over the + # allowlist — it's the only way to distinguish "user clicked [None]" + # (block all) from "user clicked [All]" (no restriction), since both + # otherwise produce an empty `allowed_models` list. + if privs.get("block_all_models"): + raise HTTPException(403, f"Your account is not allowed to use model '{sess.model}'.") + + allowed_raw = privs.get("allowed_models") + allowed = allowed_raw if isinstance(allowed_raw, list) else [] + restricted = bool(privs.get("allowed_models_restricted")) or bool(allowed) + if restricted and sess.model and sess.model not in allowed: raise HTTPException(403, f"Your account is not allowed to use model '{sess.model}'.") cap = int(privs.get("max_messages_per_day") or 0) @@ -194,14 +204,26 @@ def try_fallback_endpoint(sess, session_id: str) -> dict | None: Returns {"model": ..., "endpoint_url": ..., "endpoint_name": ...} or None. """ import requests as _req - from src.endpoint_resolver import build_chat_url, build_headers, build_models_url, normalize_base + from src.endpoint_resolver import ( + build_chat_url, + build_headers, + build_models_url, + normalize_base, + resolve_endpoint_runtime, + ) + from src.chatgpt_subscription import is_chatgpt_subscription_base current_url = sess.endpoint_url or "" + owner = getattr(sess, "owner", None) db = SessionLocal() try: - endpoints = db.query(ModelEndpoint).filter( + q = db.query(ModelEndpoint).filter( ModelEndpoint.is_enabled == True - ).all() + ) + if owner: + from src.auth_helpers import owner_filter + q = owner_filter(q, ModelEndpoint, owner) + endpoints = q.all() finally: db.close() @@ -210,26 +232,33 @@ def try_fallback_endpoint(sess, session_id: str) -> dict | None: # Skip current endpoint if current_url and base in current_url: continue - # Quick ping - ping_url = build_models_url(base) - headers = build_headers(ep.api_key, base) try: - r = _req.get(ping_url, headers=headers, timeout=5) - r.raise_for_status() - data = r.json() - models = [m.get("id") for m in (data.get("data") or []) if m.get("id")] - if not models: - models = [ - m.get("name") or m.get("model") - for m in (data.get("models") or []) - if m.get("name") or m.get("model") - ] + base, api_key = resolve_endpoint_runtime(ep, owner=owner) + except Exception: + continue + ping_url = build_models_url(base) + headers = build_headers(api_key, base) + try: + if ping_url: + r = _req.get(ping_url, headers=headers, timeout=5) + r.raise_for_status() + data = r.json() + models = [m.get("id") for m in (data.get("data") or []) if m.get("id")] + if not models: + models = [ + m.get("name") or m.get("model") + for m in (data.get("models") or []) + if m.get("name") or m.get("model") + ] + else: + models = json.loads(ep.cached_models or "[]") if not models: continue # Found a working endpoint — update session new_model = models[0] chat_url = build_chat_url(base) - new_headers = build_headers(ep.api_key, base) + new_headers = build_headers(api_key, base) + persisted_headers = {} if is_chatgpt_subscription_base(base) else new_headers sess.model = new_model sess.endpoint_url = chat_url @@ -241,7 +270,7 @@ def try_fallback_endpoint(sess, session_id: str) -> dict | None: _db.query(DBSession).filter(DBSession.id == session_id).update({ "model": new_model, "endpoint_url": chat_url, - "headers": json.dumps(new_headers), + "headers": persisted_headers, }) _db.commit() finally: @@ -275,11 +304,16 @@ def extract_preset(chat_handler, preset_id) -> PresetInfo: async def preprocess( chat_handler, message, att_ids, sess, auto_opened_docs: Optional[list] = None, + allow_tool_preprocessing: bool = True, ) -> PreprocessedMessage: """Run chat_handler.preprocess_message and wrap the result.""" enhanced, user_content, text_ctx, yt_transcripts, att_meta = ( await chat_handler.preprocess_message( - message, att_ids, sess, auto_opened_docs=auto_opened_docs + message, + att_ids, + sess, + auto_opened_docs=auto_opened_docs, + allow_tool_preprocessing=allow_tool_preprocessing, ) ) return PreprocessedMessage( @@ -329,16 +363,26 @@ def _session_url_matches_endpoint(session_url: str, endpoint_base: str) -> bool: return False +def _has_auth_keys(headers) -> bool: + """True if a headers dict carries an Authorization/x-api-key entry.""" + return isinstance(headers, dict) and any( + k.lower() in ('authorization', 'x-api-key') for k in headers + ) + + def resolve_session_auth(sess, session_id: str, owner: Optional[str] = None): """Ensure session has auth headers — resolve from endpoint DB if missing.""" - has_auth = sess.headers and isinstance(sess.headers, dict) and any( - k.lower() in ('authorization', 'x-api-key') for k in sess.headers - ) - if has_auth: + try: + from src.chatgpt_subscription import is_chatgpt_subscription_base + is_chatgpt_subscription = is_chatgpt_subscription_base(getattr(sess, "endpoint_url", "") or "") + except Exception: + is_chatgpt_subscription = False + has_auth = _has_auth_keys(sess.headers) + if has_auth and not is_chatgpt_subscription: return try: - from src.endpoint_resolver import build_headers, normalize_base + from src.endpoint_resolver import build_headers, resolve_endpoint_runtime db = SessionLocal() try: target_url = getattr(sess, "endpoint_url", "") or "" @@ -354,10 +398,30 @@ def resolve_session_auth(sess, session_id: str, owner: Optional[str] = None): for ep in q.all(): if not _session_url_matches_endpoint(target_url, ep.base_url or ""): continue - if not ep.api_key: + try: + base, api_key = resolve_endpoint_runtime(ep, owner=owner) + except Exception as e: + logger.warning("Failed to resolve provider auth for session %s: %s", session_id, e) + return + if not api_key: + # No usable key (e.g. ChatGPT Subscription needs re-auth). + return + sess.headers = build_headers(api_key, base) + if is_chatgpt_subscription: + # The bearer is short-lived and re-resolved per request, so it + # stays request-local and is never written to the plaintext + # sessions.headers column. Proactively strip any bearer an + # older code path may have persisted so it does not linger. + stale_q = db.query(DBSession).filter(DBSession.id == session_id) + if owner: + stale_q = stale_q.filter(DBSession.owner == owner) + stored = stale_q.first() + if stored is not None and _has_auth_keys(stored.headers): + stale_q.update({"headers": {}}) + db.commit() + logger.info(f"Cleared persisted ChatGPT Subscription bearer from session {session_id}") + logger.debug(f"Resolved request-local ChatGPT Subscription auth for session {session_id}") return - base = normalize_base(ep.base_url or "") - sess.headers = build_headers(ep.api_key, base) update_q = db.query(DBSession).filter(DBSession.id == session_id) if owner: update_q = update_q.filter(DBSession.owner == owner) @@ -401,7 +465,12 @@ def _normalize_model_id_from_cache(sess) -> Optional[str]: db = SessionLocal() try: - endpoints = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True).all() + q = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True) + owner = getattr(sess, "owner", None) + if owner: + from src.auth_helpers import owner_filter + q = owner_filter(q, ModelEndpoint, owner) + endpoints = q.all() for ep in endpoints: try: if normalize_base(getattr(ep, "base_url", "") or "") != session_base: @@ -448,6 +517,7 @@ async def build_chat_context( webhook_manager=None, use_enhanced_message: bool = False, agent_mode: bool = False, + allow_tool_preprocessing: bool = True, ) -> ChatContext: """Build the full context (preface + messages) for an LLM call. @@ -465,6 +535,7 @@ async def build_chat_context( preprocessed = await preprocess( chat_handler, message, att_ids or [], sess, auto_opened_docs=auto_opened_docs, + allow_tool_preprocessing=allow_tool_preprocessing, ) # Add user message to history @@ -483,6 +554,9 @@ async def build_chat_context( # Skills injection respects its own enable toggle (mirrors memory_enabled). # When off, the "Available skills" index is not added to the prompt. skills_enabled = not incognito and uprefs.get("skills_enabled", True) + if not allow_tool_preprocessing: + mem_enabled = False + skills_enabled = False logger.debug( "Memory enabled=%s for user=%s (incognito=%s, no_memory=%s, pref=%s)", mem_enabled, user, incognito, no_memory, uprefs.get("memory_enabled", "NOT_SET"), @@ -490,11 +564,11 @@ async def build_chat_context( # Use RAG? use_rag_val = (str(use_rag).lower() != "false") if use_rag is not None else True - if incognito: + if incognito or not allow_tool_preprocessing: use_rag_val = False # If pre-fetched search context was provided (compare mode), skip live web search - skip_web = bool(search_context) + skip_web = bool(search_context) or not allow_tool_preprocessing # Build context preface # The stream path uses enhanced_message (with CoT/preprocessing applied), @@ -521,7 +595,7 @@ async def build_chat_context( used_memories = getattr(chat_processor, '_last_used_memories', []) # Inject pre-fetched search context (compare mode) - if search_context: + if search_context and allow_tool_preprocessing: preface.append(untrusted_context_message("prefetched search context", search_context)) # YouTube transcripts @@ -530,7 +604,11 @@ async def build_chat_context( # Normalize model ID. Prefer cached endpoint models so group chat does not # re-hit slow local /models endpoints on every participant turn. - norm = _normalize_model_id_from_cache(sess) or normalize_model_id(sess.endpoint_url, sess.model) + norm = _normalize_model_id_from_cache(sess) or normalize_model_id( + sess.endpoint_url, + sess.model, + owner=getattr(sess, "owner", None), + ) if norm: sess.model = norm @@ -539,7 +617,7 @@ async def build_chat_context( # Auto-compact messages, context_length, was_compacted = await maybe_compact( - sess, sess.endpoint_url, sess.model, messages, sess.headers, + sess, sess.endpoint_url, sess.model, messages, sess.headers, owner=user, ) messages = trim_for_context(messages, context_length) @@ -772,7 +850,19 @@ def save_assistant_response( ): """Add assistant response to session history. In incognito mode, keeps in-memory context but skips DB persistence.""" md = dict(last_metrics) if last_metrics else {} - md["model"] = sess.model + def _model_value(value) -> str: + if value is None: + return "" + if not isinstance(value, str): + value = str(value) + return value.strip() + + requested_model = _model_value(md.get("requested_model") or md.get("selected_model") or getattr(sess, "model", "")) + actual_model = _model_value(md.get("model") or md.get("actual_model") or requested_model) + if requested_model: + md["requested_model"] = requested_model + if actual_model: + md["model"] = actual_model if character_name: md["character_name"] = character_name if web_sources: @@ -841,12 +931,13 @@ def run_post_response_tasks( skills_manager=None, owner: str = None, extract_skills: bool = True, + allow_background_extraction: bool = True, ): """Fire background tasks after a completed response: memory extraction, webhooks, auto-name, skill extraction.""" # Memory extraction — only every 4th message pair to avoid excess LLM calls _msg_count = len(sess.history) if hasattr(sess, 'history') else 0 _should_extract = (_msg_count >= 4) and (_msg_count % 4 == 0) - if not incognito and not compare_mode and _should_extract and uprefs.get("auto_memory", True): + if allow_background_extraction and not incognito and not compare_mode and _should_extract and uprefs.get("auto_memory", True): from services.memory.memory_extractor import extract_and_store from src.task_endpoint import resolve_task_endpoint t_url, t_model, t_headers = resolve_task_endpoint( @@ -873,6 +964,7 @@ def run_post_response_tasks( ) if ( extract_skills + and allow_background_extraction and auto_skills_enabled and not incognito and not compare_mode diff --git a/routes/chat_routes.py b/routes/chat_routes.py index a18a1a62e..a718d3fbe 100644 --- a/routes/chat_routes.py +++ b/routes/chat_routes.py @@ -20,6 +20,7 @@ from src import agent_runs from src.model_context import estimate_tokens from src.chat_helpers import coerce_message_and_session from src.endpoint_resolver import normalize_base as _normalize_base, build_chat_url +from src.session_search import search_session_messages from src.prompt_security import untrusted_context_message from core.exceptions import SessionNotFoundError from src.auth_helpers import get_current_user @@ -39,6 +40,7 @@ from routes.chat_helpers import ( _enforce_chat_privileges, ) from src.action_intents import classify_tool_intent as _classify_tool_intent +from src.tool_policy import build_effective_tool_policy logger = logging.getLogger(__name__) @@ -167,13 +169,20 @@ def _recover_empty_session_model(sess, session_id: str, owner: str | None = None Covers the window between endpoint setup and the first chat send: the picker showed a model in the dropdown but the session record never got written (Issue #587 — UI uses the cached endpoint list, not s.model). - Without this, we'd POST the upstream with model="" and get a generic - 401/503 instead of using the model the user already picked. - - Returns True iff sess.model was repaired. + For ChatGPT Subscription, also repairs stale OpenAI API model names such as + ``gpt-5`` that are not accepted by the Codex-backed ChatGPT account route. """ - if getattr(sess, "model", None): - return False + current_model = (getattr(sess, "model", "") or "").strip() + endpoint_url = (getattr(sess, "endpoint_url", "") or "").strip() + is_chatgpt_subscription = False + if current_model: + try: + from src.chatgpt_subscription import is_chatgpt_subscription_base + is_chatgpt_subscription = is_chatgpt_subscription_base(endpoint_url) + if not is_chatgpt_subscription: + return False + except Exception: + return False db = SessionLocal() try: # Prefer the endpoint whose base URL matches the session — we know the @@ -192,16 +201,51 @@ def _recover_empty_session_model(sess, session_id: str, owner: str | None = None break if not ep: return False + if not is_chatgpt_subscription: + try: + from src.chatgpt_subscription import is_chatgpt_subscription_base + is_chatgpt_subscription = is_chatgpt_subscription_base(getattr(ep, "base_url", "") or endpoint_url) + except Exception: + is_chatgpt_subscription = False try: cached = json.loads(ep.cached_models) if isinstance(ep.cached_models, str) else (ep.cached_models or []) except Exception: cached = [] if not cached: + visible = [] + else: + try: + visible = _visible_models(cached, getattr(ep, "hidden_models", None)) + except Exception: + visible = cached + if current_model and current_model in {str(item).strip() for item in visible}: return False - try: - visible = _visible_models(cached, getattr(ep, "hidden_models", None)) - except Exception: - visible = cached + if is_chatgpt_subscription: + live_models = [] + if getattr(ep, "provider_auth_id", None): + try: + from src.chatgpt_subscription import fetch_available_models + from src.endpoint_resolver import resolve_endpoint_runtime + _base, api_key = resolve_endpoint_runtime(ep, owner=owner) + if api_key: + live_models = fetch_available_models(api_key) + if live_models: + ep.cached_models = json.dumps(live_models) + db.commit() + except Exception: + live_models = [] + # ChatGPT Subscription recovery must use the live Codex catalog. + # Cached rows are only trusted above to avoid revalidating a model + # that is already present in the visible picker list. + cached = live_models + if not cached: + return False + try: + visible = _visible_models(cached, getattr(ep, "hidden_models", None)) + except Exception: + visible = cached + if current_model and current_model in {str(item).strip() for item in visible}: + return False if not visible: return False model = visible[0] @@ -211,14 +255,17 @@ def _recover_empty_session_model(sess, session_id: str, owner: str | None = None # Persist so the next request, websocket reconnect, or page reload # picks up the same model (we'd otherwise re-pick on every send # and silently switch on the user if the cached order shifts). - db_session = db.query(DBSession).filter(DBSession.id == session_id).first() + db_session_q = db.query(DBSession).filter(DBSession.id == session_id) + if owner: + db_session_q = db_session_q.filter(DBSession.owner == owner) + db_session = db_session_q.first() if db_session: db_session.model = model db_session.updated_at = datetime.utcnow() db.commit() sess.model = model logger.info( - "Recovered empty session model for %s — picked %r from endpoint %s", + "Recovered session model for %s — picked %r from endpoint %s", session_id, model, ep.id, ) return True @@ -304,8 +351,13 @@ def setup_chat_routes( # non-streaming path can't be used to bypass). _enforce_chat_privileges(request, sess) + tool_policy = build_effective_tool_policy(last_user_message=message) + allow_tool_preprocessing = not tool_policy.block_all_tool_calls + # Inline memory command - memory_response = await chat_handler.handle_memory_command(sess, message) + memory_response = None + if not tool_policy.blocks("manage_memory"): + memory_response = await chat_handler.handle_memory_command(sess, message) if memory_response: return {"response": memory_response} @@ -319,10 +371,15 @@ def setup_chat_routes( use_web=use_web, time_filter=time_filter, webhook_manager=webhook_manager, + allow_tool_preprocessing=allow_tool_preprocessing, ) # Research injection - if use_research: + research_blocked_by_policy = ( + tool_policy.blocks("trigger_research") + or tool_policy.blocks("manage_research") + ) + if use_research and not research_blocked_by_policy: try: _r_ep, _r_model, _r_headers = _resolve_research_endpoint(sess) research_ctx = await research_handler.call_research_service( @@ -357,6 +414,7 @@ def setup_chat_routes( ctx.uprefs, memory_manager, memory_vector, webhook_manager, character_name=ctx.preset.character_name, owner=ctx.user, + allow_background_extraction=not tool_policy.block_all_tool_calls, ) return {"response": reply} @@ -394,6 +452,7 @@ def setup_chat_routes( search_context = form_data.get("search_context") # pre-fetched web search results (compare mode) compare_mode = str(form_data.get("compare_mode", "")).lower() == "true" incognito = str(form_data.get("incognito", "")).lower() == "true" + plan_mode = str(form_data.get("plan_mode", "")).lower() == "true" chat_mode = str(form_data.get("mode", "")).lower() # 'chat' or 'agent' # Workspace: confine the agent's file/shell tools to this folder. Validate # it's a real directory; ignore (no confinement) otherwise. @@ -401,6 +460,17 @@ def setup_chat_routes( if workspace: _ws_real = os.path.realpath(os.path.expanduser(workspace)) workspace = _ws_real if os.path.isdir(_ws_real) else "" + # Plan mode is a modifier on agent mode — it only makes sense with tools. + if plan_mode: + chat_mode = "agent" + # An approved plan being EXECUTED: the frontend sends the checklist back + # on each turn so we can pin it in context. This way a long plan on a + # weak model survives history truncation — the agent can always re-read + # the plan. Ignored while still proposing (plan_mode on). Capped so a + # huge plan can't blow the prompt. + approved_plan = "" + if not plan_mode: + approved_plan = (form_data.get("approved_plan") or "").strip()[:8192] # Did the USER explicitly pick agent mode? (vs. us auto-escalating # below). Skill extraction should only learn from real agent sessions, # not chats we quietly promoted for a notes/calendar intent. @@ -479,11 +549,6 @@ def setup_chat_routes( do_research = True logger.info(f"Session {session} in research_pending — auto-triggering research") - # Persist session mode (research > agent > chat) - _effective_mode = 'research' if do_research else (chat_mode or 'chat') - if _effective_mode in ('agent', 'research', 'chat'): - set_session_mode(session, _effective_mode) - att_ids = [] if body and isinstance(body.get("attachments"), list): att_ids = [str(x) for x in body["attachments"]] @@ -494,6 +559,10 @@ def setup_chat_routes( pass no_memory = str(form_data.get("no_memory", "")).lower() == "true" + pre_context_tool_policy = build_effective_tool_policy( + last_user_message=message, + ) + allow_tool_preprocessing = not pre_context_tool_policy.block_all_tool_calls # Build shared context (stream path uses enhanced_message for context preface) ctx = await build_chat_context( @@ -515,6 +584,7 @@ def setup_chat_routes( # manage_skills (agent mode). In plain chat or incognito the # index would be useless / unwanted noise. agent_mode=(chat_mode == "agent"), + allow_tool_preprocessing=allow_tool_preprocessing, ) _research_flags = {"do": do_research} # Mutable container for generator scope @@ -659,6 +729,32 @@ def setup_chat_routes( if chat_mode == 'chat': disabled_tools.update({"bash", "python", "read_file", "write_file", "web_search", "web_fetch", "search_chats", "manage_tasks"}) + # Plan mode: investigate read-only, propose a plan, don't mutate. Block + # every tool not on the read-only allowlist. (stream_agent_loop enforces + # this again + drops MCP, so this is belt-and-suspenders.) + if plan_mode: + from src.tool_security import plan_mode_disabled_tools + disabled_tools.update(plan_mode_disabled_tools()) + + tool_policy = build_effective_tool_policy( + disabled_tools=disabled_tools, + last_user_message=message, + ) + disabled_tools = tool_policy.all_disabled_names() + research_blocked_by_policy = bool( + tool_policy.blocks("trigger_research") + or tool_policy.blocks("manage_research") + ) + effective_do_research = bool( + do_research and _research_flags["do"] and not research_blocked_by_policy + ) + + # Persist session mode after policy/privilege gates so blocked research + # turns remain ordinary chat/agent streams and saved messages. + _effective_mode = 'research' if effective_do_research else (chat_mode or 'chat') + if _effective_mode in ('agent', 'research', 'chat'): + set_session_mode(session, _effective_mode) + async def stream_with_save() -> AsyncGenerator[str, None]: # _effective_mode is read-only here; closure captures it from # the outer scope. (Was `nonlocal` but never reassigned.) @@ -666,7 +762,7 @@ def setup_chat_routes( web_sources = ctx.web_sources # Register active stream for partial-save safety net - _active_streams[session] = {"status": "streaming", "partial": "", "query": message, "is_research": do_research, "mode": _effective_mode} + _active_streams[session] = {"status": "streaming", "partial": "", "query": message, "is_research": effective_do_research, "mode": _effective_mode} if ctx.preprocessed.attachment_meta: yield f"data: {json.dumps({'type': 'attachments', 'data': ctx.preprocessed.attachment_meta})}\n\n" @@ -690,7 +786,7 @@ def setup_chat_routes( yield f"data: {json.dumps({'type': 'memories_used', 'data': ctx.used_memories})}\n\n" # Run research as a background task (survives page refresh) - if do_research and _research_flags["do"]: + if effective_do_research: _r_ep, _r_model, _r_headers = _resolve_research_endpoint(sess) _auth_keys = list(_r_headers.keys()) if _r_headers else [] logger.info(f"Research endpoint resolved: model={_r_model}, endpoint={_r_ep}, auth_keys={_auth_keys}, sess_headers_keys={list(sess.headers.keys()) if isinstance(sess.headers, dict) else type(sess.headers)}") @@ -829,7 +925,7 @@ def setup_chat_routes( _fallback_candidates = [] # Send model name early so the frontend can show it during streaming - _model_suffix = "Research" if do_research else None + _model_suffix = "Research" if effective_do_research else None _model_info = {"type": "model_info", "model": sess.model} if _model_suffix: _model_info["suffix"] = _model_suffix @@ -839,6 +935,12 @@ def setup_chat_routes( if _is_image_generation_session(sess, owner=_user): from src.settings import get_setting + if tool_policy.blocks("generate_image"): + _blocked_msg = tool_policy.reason_for("generate_image") + yield f'data: {json.dumps({"delta": _blocked_msg})}\n\n' + yield "data: [DONE]\n\n" + _active_streams.pop(session, None) + return if not get_setting("image_gen_enabled", True): yield f'data: {json.dumps({"delta": "Image generation is disabled by the administrator."})}\n\n' yield "data: [DONE]\n\n" @@ -873,6 +975,8 @@ def setup_chat_routes( elif chat_mode == "chat": _chat_start = time.time() _answered_by = None # set if the selected model failed and a fallback answered + _requested_model = sess.model + _actual_model = None # ── Chat mode: call stream_llm directly, NO tools, NO document access ── try: _chat_candidates = [(sess.endpoint_url, sess.model, sess.headers)] + _fallback_candidates @@ -905,10 +1009,18 @@ def setup_chat_routes( # Selected model failed; a fallback answered. # Forward the notice and remember the real model. _answered_by = data.get("answered_by") or _answered_by + _actual_model = _actual_model or _answered_by + data["selected_model"] = data.get("selected_model") or _requested_model yield chunk + elif data.get("type") == "model_actual": + _actual_model = data.get("model") or _actual_model + data["requested_model"] = _requested_model + yield f'data: {json.dumps(data)}\n\n' elif data.get("type") == "usage": last_metrics = data.get("data", {}) - last_metrics["model"] = _answered_by or sess.model + _reported_model = last_metrics.get("model") + last_metrics["requested_model"] = _requested_model + last_metrics["model"] = _reported_model or _actual_model or _answered_by or _requested_model if ctx.context_length and last_metrics.get("input_tokens"): pct = min(round((last_metrics["input_tokens"] / ctx.context_length) * 100, 1), 100.0) last_metrics["context_percent"] = pct @@ -945,7 +1057,8 @@ def setup_chat_routes( "tokens_per_second": _tps, "context_percent": _ctx_pct, "context_length": ctx.context_length, - "model": sess.model, + "model": _actual_model or _answered_by or _requested_model, + "requested_model": _requested_model, "usage_source": "estimated", } yield f'data: {json.dumps({"type": "metrics", "data": last_metrics})}\n\n' @@ -957,7 +1070,7 @@ def setup_chat_routes( rag_sources=ctx.rag_sources, research_sources=research_sources, used_memories=ctx.used_memories, - do_research=do_research, + do_research=effective_do_research, incognito=incognito, ) if _saved_id: @@ -967,14 +1080,22 @@ def setup_chat_routes( last_metrics, ctx.uprefs, memory_manager, memory_vector, webhook_manager, incognito=incognito, compare_mode=compare_mode, character_name=ctx.preset.character_name, - owner=_user, + owner=_user, + allow_background_extraction=not tool_policy.block_all_tool_calls, ) _stream_set(session, status="done") yield chunk except (asyncio.CancelledError, GeneratorExit): if full_response: logger.info("Client disconnected mid-stream (chat mode) for session %s, saving partial (%d chars)", session, len(full_response)) - _stopped_content, _stopped_md = clean_thinking_for_save(full_response, {"stopped": True, "model": sess.model}) + _stopped_content, _stopped_md = clean_thinking_for_save( + full_response, + { + "stopped": True, + "model": _actual_model or _answered_by or _requested_model, + "requested_model": _requested_model, + }, + ) sess.add_message(ChatMessage("assistant", _stopped_content, metadata=_stopped_md)) if not incognito: session_manager.save_sessions() @@ -986,6 +1107,8 @@ def setup_chat_routes( _agent_rounds = 0 _agent_tool_calls = 0 _answered_by = None # set if the selected model failed and a fallback answered + _requested_model = sess.model + _actual_model = None try: from src.settings import get_setting from src.agent_tools import MAX_AGENT_ROUNDS as _DEFAULT_ROUNDS @@ -1012,9 +1135,12 @@ def setup_chat_routes( active_document=active_doc, session_id=session, disabled_tools=disabled_tools if disabled_tools else None, + tool_policy=tool_policy, owner=_user, fallbacks=_fallback_candidates, workspace=workspace or None, + plan_mode=plan_mode, + approved_plan=approved_plan or None, ): if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"): try: @@ -1035,6 +1161,8 @@ def setup_chat_routes( "doc_stream_open", "doc_stream_delta", "doc_update", "doc_suggestions", "ui_control", "rounds_exhausted", + "ask_user", + "plan_update", ): if data.get("type") == "agent_step": _agent_rounds = max(_agent_rounds, data.get("round", 1)) @@ -1047,10 +1175,18 @@ def setup_chat_routes( # model so metrics reflect it, not the masked # selected model. _answered_by = data.get("answered_by") or _answered_by + _actual_model = _actual_model or _answered_by + data["selected_model"] = data.get("selected_model") or _requested_model yield chunk + elif data.get("type") == "model_actual": + _actual_model = data.get("model") or _actual_model + data["requested_model"] = _requested_model + yield f'data: {json.dumps(data)}\n\n' elif data.get("type") == "metrics": last_metrics = data.get("data", {}) - last_metrics["model"] = _answered_by or sess.model + _reported_model = last_metrics.get("model") + last_metrics["requested_model"] = last_metrics.get("requested_model") or _requested_model + last_metrics["model"] = _reported_model or _actual_model or _answered_by or _requested_model yield f'data: {json.dumps({"type": "metrics", "data": last_metrics})}\n\n' except json.JSONDecodeError: yield chunk @@ -1078,6 +1214,7 @@ def setup_chat_routes( skills_manager=skills_manager, owner=_user, extract_skills=user_requested_agent, + allow_background_extraction=not tool_policy.block_all_tool_calls, ) _stream_set(session, status="done") yield chunk @@ -1091,7 +1228,14 @@ def setup_chat_routes( try: if full_response: logger.info("Client disconnected mid-stream for session %s, saving partial response (%d chars)", session, len(full_response)) - _stopped_content2, _stopped_md2 = clean_thinking_for_save(full_response, {"stopped": True, "model": sess.model}) + _stopped_content2, _stopped_md2 = clean_thinking_for_save( + full_response, + { + "stopped": True, + "model": _actual_model or _answered_by or _requested_model, + "requested_model": _requested_model, + }, + ) sess.add_message(ChatMessage("assistant", _stopped_content2, metadata=_stopped_md2)) if not incognito: session_manager.save_sessions() @@ -1110,11 +1254,30 @@ def setup_chat_routes( finally: _active_streams.pop(session, None) - # Run the stream as a DETACHED background task so it survives the client - # closing the tab / navigating away (true terminal-agent behavior). The - # SSE response just subscribes (replay buffered output + live); dropping - # the SSE only removes a subscriber — the run keeps going and saves the - # assistant message on completion regardless. Reconnect via /api/chat/resume. + # Compare panes are short-lived, single-shot generations whose sessions + # exist only to drive that one pane — there's nothing to "resume" and + # the user expects the pane's Stop button (which aborts the fetch, + # closing this SSE) to promptly cancel the upstream LLM call. Detaching + # them would keep burning upstream tokens/compute after the pane is + # stopped or the comparison is abandoned, and would surface a stale + # "still streaming" /resume target for a session nobody will revisit. + # + # So: stream them directly (no agent_runs wrapping). Starlette cancels + # the underlying async generator (raising CancelledError/GeneratorExit + # inside it) as soon as it notices the client disconnected — which the + # mode-specific except blocks above already handle by saving the + # partial response exactly once. This stops the upstream call promptly + # without waiting on the next streamed chunk. + # + # Normal chat/agent streams keep the DETACHED behavior below: they + # survive the client closing the tab / navigating away (true + # terminal-agent semantics). The SSE response just subscribes (replay + # buffered output + live); dropping the SSE only removes a subscriber — + # the run keeps going and saves the assistant message on completion + # regardless. Reconnect via /api/chat/resume. + if compare_mode: + return StreamingResponse(_safe_stream(), media_type="text/event-stream") + agent_runs.start(session, _safe_stream()) return StreamingResponse(agent_runs.subscribe(session), media_type="text/event-stream") @@ -1185,45 +1348,16 @@ def setup_chat_routes( return [] _user = get_current_user(request) - query_term = q.strip() - db = SessionLocal() - try: - base_q = ( - db.query(DBChatMessage, DBSession.name) - .join(DBSession, DBChatMessage.session_id == DBSession.id) - .filter( - DBSession.archived == False, - DBChatMessage.content.ilike(f"%{query_term}%"), - DBChatMessage.role.in_(["user", "assistant"]), - ) + return [ + result.to_dict() + for result in search_session_messages( + q, + limit=limit, + owner=_user, + restrict_owner=_user is not None, + include_legacy_owner=False, ) - if _user: - base_q = base_q.filter(DBSession.owner == _user) - rows = base_q.order_by(DBChatMessage.timestamp.desc()).limit(limit).all() - - results = [] - for msg, session_name in rows: - content = msg.content or "" - lower_content = content.lower() - idx = lower_content.find(query_term.lower()) - if idx == -1: - snippet = content[:120] - else: - start = max(0, idx - 50) - end = min(len(content), idx + len(query_term) + 50) - snippet = ("..." if start > 0 else "") + content[start:end] + ("..." if end < len(content) else "") - - results.append({ - "session_id": msg.session_id, - "session_name": session_name or "Untitled", - "role": msg.role, - "content_snippet": snippet, - "timestamp": msg.timestamp.isoformat() if msg.timestamp else None, - }) - - return results - finally: - db.close() + ] # ------------------------------------------------------------------ # # POST /api/rewrite — lightweight rewrite of last AI message (no tools) diff --git a/routes/chatgpt_subscription_routes.py b/routes/chatgpt_subscription_routes.py new file mode 100644 index 000000000..9c695b371 --- /dev/null +++ b/routes/chatgpt_subscription_routes.py @@ -0,0 +1,170 @@ +"""ChatGPT Subscription device-flow setup routes.""" + +import json +import logging +import uuid +from typing import Dict, Optional + +from fastapi import HTTPException, Request + +from core.database import ModelEndpoint, ProviderAuthSession, SessionLocal, utcnow_naive +from routes.device_flow import ( + DeviceFlowPoll, + DeviceFlowStart, + PendingDeviceFlowStore, + create_device_flow_router, +) +from src.auth_helpers import get_current_user +from src import chatgpt_subscription + +logger = logging.getLogger(__name__) + +_DEVICE_FLOW_STORE = PendingDeviceFlowStore() + + +def _provision_endpoint(tokens: Dict, owner: Optional[str]) -> Dict: + access_token = tokens.get("access_token") + refresh_token = tokens.get("refresh_token") + if not access_token or not refresh_token: + raise ValueError("ChatGPT token response was missing access_token or refresh_token") + + base = chatgpt_subscription.DEFAULT_CHATGPT_SUBSCRIPTION_BASE_URL + models = chatgpt_subscription.fetch_available_models(access_token) + if not models: + raise ValueError("ChatGPT Subscription connected, but no usable Codex models were discovered for this account.") + db = SessionLocal() + try: + auth = ( + db.query(ProviderAuthSession) + .filter( + ProviderAuthSession.provider == chatgpt_subscription.CHATGPT_SUBSCRIPTION_PROVIDER, + ProviderAuthSession.owner == owner, + ) + .first() + ) + if auth is None: + auth = ProviderAuthSession( + id=str(uuid.uuid4())[:8], + provider=chatgpt_subscription.CHATGPT_SUBSCRIPTION_PROVIDER, + owner=owner, + label="ChatGPT Subscription", + base_url=base, + auth_mode="chatgpt", + ) + db.add(auth) + auth.base_url = base + auth.access_token = access_token + auth.refresh_token = refresh_token + auth.last_refresh = utcnow_naive() + auth.auth_mode = "chatgpt" + + ep = ( + db.query(ModelEndpoint) + .filter( + ModelEndpoint.base_url == base, + ModelEndpoint.provider_auth_id == auth.id, + ModelEndpoint.owner == owner, + ) + .first() + ) + if ep is None: + ep = ModelEndpoint( + id=str(uuid.uuid4())[:8], + name="ChatGPT Subscription", + base_url=base, + model_type="llm", + endpoint_kind="api", + owner=owner, + ) + db.add(ep) + ep.name = "ChatGPT Subscription" + ep.base_url = base + ep.api_key = None + ep.provider_auth_id = auth.id + ep.is_enabled = True + ep.supports_tools = False + ep.model_type = "llm" + ep.endpoint_kind = "api" + ep.model_refresh_mode = "manual" + ep.cached_models = json.dumps(models) + db.commit() + result = { + "id": ep.id, + "name": ep.name, + "base_url": ep.base_url, + "models": models, + } + finally: + db.close() + + try: + from routes.model_routes import _invalidate_models_cache + + _invalidate_models_cache() + except Exception: + pass + return result + + +def _start_device_flow(request: Request, _form) -> DeviceFlowStart: + try: + data = chatgpt_subscription.request_device_code() + except Exception as exc: + raise chatgpt_subscription.to_http_exception(exc) + + device_auth_id = data.get("device_auth_id") + user_code = data.get("user_code") + if not device_auth_id or not user_code: + raise HTTPException(502, "ChatGPT did not return a complete device code") + verification_uri = data.get("verification_uri") or f"{chatgpt_subscription.CHATGPT_OAUTH_ISSUER}/codex/device" + return DeviceFlowStart( + pending={ + "device_auth_id": device_auth_id, + "user_code": user_code, + "owner": get_current_user(request) or None, + }, + response={ + "user_code": user_code, + "verification_uri": verification_uri, + }, + interval=int(data.get("interval") or 5), + expires_in=int(data.get("expires_in") or 900), + ) + + +def _poll_device_flow(_request: Request, pending: Dict) -> DeviceFlowPoll: + try: + data = chatgpt_subscription.poll_device_auth(pending["device_auth_id"], pending["user_code"]) + except Exception as exc: + logger.debug("ChatGPT device poll failed: %s", exc) + return DeviceFlowPoll.pending(str(exc)) + + authorization_code = data.get("authorization_code") + code_verifier = data.get("code_verifier") + if authorization_code and code_verifier: + try: + tokens = chatgpt_subscription.exchange_authorization_code(authorization_code, code_verifier) + result = _provision_endpoint(tokens, pending["owner"]) + except Exception as exc: + logger.exception("ChatGPT Subscription endpoint provisioning failed") + raise chatgpt_subscription.to_http_exception(exc) + return DeviceFlowPoll.authorized(result) + + err = data.get("error") or data.get("status") + if err in ("authorization_pending", "pending", None): + return DeviceFlowPoll.pending() + if err == "slow_down": + return DeviceFlowPoll.slow_down(int(data.get("interval") or 0) or None) + if err in ("expired_token", "access_denied", "denied"): + return DeviceFlowPoll.failed(err) + return DeviceFlowPoll.pending(err or "unknown") + + +def setup_chatgpt_subscription_routes(): + return create_device_flow_router( + prefix="/api/chatgpt-subscription", + tags=["chatgpt-subscription"], + store=_DEVICE_FLOW_STORE, + start_flow=_start_device_flow, + poll_flow=_poll_device_flow, + ) diff --git a/routes/codex_routes.py b/routes/codex_routes.py index 9898daed2..1afac02b9 100644 --- a/routes/codex_routes.py +++ b/routes/codex_routes.py @@ -15,8 +15,9 @@ from typing import Any from fastapi import APIRouter, BackgroundTasks, Body, HTTPException, Request from fastapi.responses import StreamingResponse -from src.auth_helpers import require_user +from src.auth_helpers import require_authenticated_request, require_user from src.tool_implementations import do_manage_notes +from src.constants import COOKBOOK_STATE_FILE COOKBOOK_READ_SCOPES = {"cookbook:read", "cookbook:launch"} @@ -41,7 +42,9 @@ async def _as_owner(request: Request, owner: str, fn, *args, **kwargs): the scope-gated owner (not the "api" pseudo-user the bearer middleware sets). Restores the original value when done. Works for sync and async handlers.""" orig = getattr(request.state, "current_user", None) + orig_api_token = getattr(request.state, "api_token", None) request.state.current_user = owner + request.state.api_token = False try: result = fn(*args, **kwargs) if asyncio.iscoroutine(result): @@ -49,6 +52,13 @@ async def _as_owner(request: Request, owner: str, fn, *args, **kwargs): return result finally: request.state.current_user = orig + if orig_api_token is None: + try: + delattr(request.state, "api_token") + except AttributeError: + pass + else: + request.state.api_token = orig_api_token def _scope_owner(request: Request, allowed: set[str]) -> str: @@ -146,7 +156,7 @@ def setup_codex_routes( @router.get("/plugin.zip") def plugin_zip(request: Request): - require_user(request) + require_authenticated_request(request) root = Path(__file__).resolve().parent.parent / "integrations" / "codex" if not root.exists(): raise HTTPException(404, "Codex plugin bundle not found") @@ -415,8 +425,8 @@ def setup_codex_routes( def _read_cookbook_state() -> dict: from pathlib import Path as _Path - import os as _os, json as _json - p = _Path(_os.environ.get("DATA_DIR", "data")) / "cookbook_state.json" + import json as _json + p = _Path(COOKBOOK_STATE_FILE) if not p.exists(): return {} try: @@ -724,7 +734,7 @@ def setup_codex_routes( import time as _t, json as _json from core.atomic_io import atomic_write_json from pathlib import Path as _Path - cookbook_state_path = _Path("/app/data/cookbook_state.json") + cookbook_state_path = _Path(COOKBOOK_STATE_FILE) try: state = _json.loads(cookbook_state_path.read_text(encoding="utf-8")) except Exception: @@ -762,7 +772,7 @@ def setup_claude_routes() -> APIRouter: @router.get("/plugin.zip") def plugin_zip(request: Request): - require_user(request) + require_authenticated_request(request) # Only ship the skills/ subtree so extracting at ~/.claude/ doesn't dump # README.md or other bundle metadata into the user's claude config dir. skills_root = Path(__file__).resolve().parent.parent / "integrations" / "claude" / "skills" diff --git a/routes/compare_routes.py b/routes/compare_routes.py index 35cd21289..ad42f1a89 100644 --- a/routes/compare_routes.py +++ b/routes/compare_routes.py @@ -12,6 +12,7 @@ import logging from core.database import Comparison, SessionLocal from core.session_manager import SessionManager from src.auth_helpers import get_current_user +from routes.session_routes import _reject_raw_endpoint_url_for_non_admin logger = logging.getLogger(__name__) @@ -38,6 +39,24 @@ def _owned_endpoint_by_url(db, base_url, owner): return owner_filter(q, ModelEndpoint, owner).first() +def _owned_endpoint_by_id(db, endpoint_id, owner): + """ModelEndpoint whose id == `endpoint_id` and is VISIBLE to `owner` (their + own rows + legacy null-owner "shared" rows); None otherwise. + + Preferred over _owned_endpoint_by_url for credential resolution: two visible + endpoints can share the same base_url but hold DIFFERENT api_keys (e.g. two + accounts on the same provider). A base_url-only match returns whichever row + sorts first, so it can copy the WRONG owner-scoped key into the [CMP] session. + An id pins the exact registered endpoint, so /api/compare/start prefers it and + only falls back to URL matching for legacy / admin raw-URL callers. Owner + scoping is identical to _owned_endpoint_by_url (a null/empty owner is a no-op). + """ + from core.database import ModelEndpoint + from src.auth_helpers import owner_filter + q = db.query(ModelEndpoint).filter(ModelEndpoint.id == endpoint_id) + return owner_filter(q, ModelEndpoint, owner).first() + + class RecordVoteRequest(BaseModel): prompt: str models: List[str] @@ -54,8 +73,10 @@ def setup_compare_routes(session_manager: SessionManager): prompt: str = Form(...), model_a: str = Form(...), model_b: str = Form(...), - endpoint_a: str = Form(...), - endpoint_b: str = Form(...), + endpoint_a: str = Form(""), + endpoint_b: str = Form(""), + endpoint_a_id: str = Form(""), + endpoint_b_id: str = Form(""), is_blind: str = Form("true"), ): """Create two ephemeral sessions and a comparison record. @@ -63,10 +84,10 @@ def setup_compare_routes(session_manager: SessionManager): Returns the comparison ID and the two session IDs so the client can fire two independent SSE streams to /api/chat_stream. """ + user = getattr(request.state, 'current_user', None) comp_id = str(uuid.uuid4()) sid_a = str(uuid.uuid4()) sid_b = str(uuid.uuid4()) - user = getattr(request.state, 'current_user', None) # Blind mapping: randomly assign left/right blind = str(is_blind).lower() == "true" @@ -87,31 +108,94 @@ def setup_compare_routes(session_manager: SessionManager): # de-anonymizing the comparison before the user votes (issue #1285). slot_name = {session_left: "Model A", session_right: "Model B"} - # Create ephemeral sessions (prefixed [CMP]) - for sid, model, endpoint in [(sid_a, model_a, endpoint_a), (sid_b, model_b, endpoint_b)]: + # SECURITY: resolve and validate BOTH endpoints before creating any + # session. Compare copies a registered endpoint's Authorization header + # into the [CMP] session, so validating one endpoint while creating its + # session, then rejecting the other, would leave a partial compare + # session behind with that header attached. Doing all the owner-scope + # resolution + raw-URL rejection up front means a 403 on either endpoint + # aborts the whole request with nothing created and no header copied. + from src.endpoint_resolver import build_chat_url, build_headers, normalize_base + resolved = [] + db = SessionLocal() + try: + for sid, model, endpoint, endpoint_id in [ + (sid_a, model_a, endpoint_a, endpoint_a_id), + (sid_b, model_b, endpoint_b, endpoint_b_id), + ]: + # Prefer an explicit endpoint id: it pins the EXACT registered + # endpoint (and its api_key), even when two endpoints visible to + # the caller share a base_url with different keys — a URL-only + # match would copy whichever row sorts first, i.e. possibly the + # wrong key. Fall back to URL resolution only for legacy / admin + # raw-URL callers that don't send an id. + eid = endpoint_id.strip() if isinstance(endpoint_id, str) else "" + if eid: + ep = _owned_endpoint_by_id(db, eid, user) + if ep is None: + # An id the caller can't see (wrong owner / deleted) must + # NOT silently fall back to a same-URL row with a different + # key — that's exactly the mix-up ids exist to prevent. + raise HTTPException(404, "Model endpoint not found") + # The id already resolved the endpoint; ignore any raw URL the + # caller also sent and dial the stored config instead. + endpoint = ep.base_url + elif not endpoint: + raise HTTPException( + 422, "endpoint_a/endpoint_b or endpoint_a_id/endpoint_b_id is required" + ) + else: + # Resolve the supplied URL to a ModelEndpoint the caller owns + # (their own rows + legacy null-owner shared rows), scoped so a + # comparison can't borrow another user's private endpoint key. + base = normalize_base(endpoint) + ep = _owned_endpoint_by_url(db, base, user) + # Reject *unregistered* raw URLs for signed-in non-admins; a + # matched registered endpoint supplies an id so the caller can + # still compare endpoints they own. Blanket-rejecting here (the + # earlier `endpoint_id=None` call) locked non-admins out of + # compare entirely, since compare resolves endpoints by URL with + # no endpoint_id. Mirrors the gallery inpaint/harmonize checks. + # Raised here (phase 1), before any session exists. + _reject_raw_endpoint_url_for_non_admin( + request, user, str(ep.id) if ep is not None else None, endpoint + ) + # Bind the [CMP] session to the RESOLVED endpoint, not the raw + # caller-supplied string. When the URL matches a registered + # endpoint visible to the caller, use that row's own normalized + # base URL (the same value owner scoping + endpoint validation + # already vetted) so the session dials exactly where the stored + # config points. The raw `endpoint` only survives for callers + # allowed to pass one — admins / single-user mode, where + # `_reject_raw_endpoint_url_for_non_admin` is a no-op and `ep` + # is None. Mirrors the registered-endpoint path in session_routes. + session_endpoint_url = ( + build_chat_url(normalize_base(ep.base_url)) if ep is not None else endpoint + ) + # Headers come only from a matched endpoint's key; None when + # `ep` is None (raw admin URL or no match), so a comparison can + # never inherit another user's key/headers. + headers = build_headers(ep.api_key, ep.base_url) if (ep and ep.api_key) else None + resolved.append((sid, model, session_endpoint_url, headers)) + finally: + db.close() + + # Both endpoints validated — only now create the ephemeral [CMP] + # sessions and copy any resolved headers. + for sid, model, session_endpoint_url, headers in resolved: name = f"[CMP] {slot_name[sid]}" if blind else f"[CMP] {model.split('/')[-1]}" session_manager.create_session( session_id=sid, name=name, - endpoint_url=endpoint, + endpoint_url=session_endpoint_url, model=model, rag=False, owner=user, ) - # Copy API key from endpoint config - db = SessionLocal() - try: - from src.endpoint_resolver import build_headers, normalize_base - # Find matching endpoint by URL, scoped to the caller so a - # comparison can't borrow another user's private endpoint key. - base = normalize_base(endpoint) - ep = _owned_endpoint_by_url(db, base, user) - if ep and ep.api_key: - s = session_manager.sessions.get(sid) - if s: - s.headers = build_headers(ep.api_key, ep.base_url) - finally: - db.close() + if headers: + s = session_manager.sessions.get(sid) + if s: + s.headers = headers # Store comparison record db = SessionLocal() @@ -121,8 +205,12 @@ def setup_compare_routes(session_manager: SessionManager): prompt=prompt, model_a=model_a, model_b=model_b, - endpoint_a=endpoint_a, - endpoint_b=endpoint_b, + # Record the URL the session actually dials. For URL callers this + # is their raw input; for id-only callers (empty endpoint_a/_b) + # fall back to the resolved endpoint URL so the column stays + # meaningful and non-null. resolved is in [a, b] order. + endpoint_a=endpoint_a or resolved[0][2], + endpoint_b=endpoint_b or resolved[1][2], is_blind=blind, blind_mapping=json.dumps(mapping), owner=user, diff --git a/routes/contacts_routes.py b/routes/contacts_routes.py index 409184fa1..e4e8ce759 100644 --- a/routes/contacts_routes.py +++ b/routes/contacts_routes.py @@ -11,20 +11,24 @@ import uuid import json import csv import io +import os import httpx from pathlib import Path from datetime import datetime -from fastapi import APIRouter, Query, Depends, Response +from urllib.parse import urljoin, urlparse, urlunparse + +from fastapi import APIRouter, Query, Depends, Response, HTTPException from typing import List, Dict, Optional -from src.auth_helpers import require_user from core.middleware import require_admin +from src.url_safety import check_outbound_url logger = logging.getLogger(__name__) -DATA_DIR = Path(__file__).resolve().parent.parent / "data" -SETTINGS_FILE = DATA_DIR / "settings.json" -LOCAL_CONTACTS_FILE = DATA_DIR / "contacts.json" +from src.constants import DATA_DIR as _DATA_DIR, SETTINGS_FILE as _SETTINGS_FILE, CONTACTS_FILE as _CONTACTS_FILE +DATA_DIR = Path(_DATA_DIR) +SETTINGS_FILE = Path(_SETTINGS_FILE) +LOCAL_CONTACTS_FILE = Path(_CONTACTS_FILE) def _load_settings(): @@ -53,6 +57,21 @@ def _carddav_configured(cfg: Optional[Dict] = None) -> bool: return bool((cfg.get("url") or "").strip()) +def _validate_carddav_url(url: str) -> str: + cleaned = (url if isinstance(url, str) else "").strip().rstrip("/") + ok, reason = check_outbound_url( + cleaned, + block_private=os.getenv("CARDDAV_BLOCK_PRIVATE_IPS", "false").lower() == "true", + ) + if not ok: + raise ValueError(f"Rejected CardDAV URL: {reason}") + return cleaned + + +def _carddav_base_url(cfg: Dict) -> str: + return _validate_carddav_url(cfg.get("url") or "") + + def _normalize_contact(contact: Dict) -> Dict: emails = [] for e in contact.get("emails") or ([] if not contact.get("email") else [contact.get("email")]): @@ -219,14 +238,18 @@ _contact_cache = {"contacts": [], "fetched_at": None} def _abs_url(href: str) -> str: """Combine a multistatus (an absolute path like /user/contacts/x.vcf) with the configured CardDAV server origin so we - get a fully-qualified URL to PUT/DELETE. If href is already absolute - (http...), return it as-is.""" - from urllib.parse import urlparse, urlunparse - if href.startswith("http://") or href.startswith("https://"): - return href + get a fully-qualified URL to PUT/DELETE. Absolute hrefs are accepted only + for the configured origin; a cross-origin href is treated as a path on the + configured server so a malicious CardDAV response cannot redirect later + writes/deletes to cloud metadata or another host.""" cfg = _get_carddav_config() - p = urlparse(cfg["url"]) - return urlunparse((p.scheme, p.netloc, href, "", "", "")) + base = _carddav_base_url(cfg) + base_p = urlparse(base) + joined = urljoin(base.rstrip("/") + "/", href or "") + joined_p = urlparse(joined) + if (joined_p.scheme, joined_p.netloc) != (base_p.scheme, base_p.netloc): + joined = urlunparse((base_p.scheme, base_p.netloc, joined_p.path or "/", "", joined_p.query, "")) + return _validate_carddav_url(joined) # CardDAV REPORT body — pull every card's etag + raw vCard in ONE request, @@ -297,6 +320,7 @@ def _fetch_contacts(force=False): return contacts try: + cfg["url"] = _carddav_base_url(cfg) auth = None if cfg["username"]: auth = (cfg["username"], cfg["password"]) @@ -353,8 +377,8 @@ def _create_contact(name: str, email: str) -> bool: contact_uid = str(uuid.uuid4()) vcard = _build_vcard(name, email, contact_uid) - url = cfg["url"].rstrip("/") + "/" + contact_uid + ".vcf" try: + url = _carddav_base_url(cfg) + "/" + contact_uid + ".vcf" auth = None if cfg["username"]: auth = (cfg["username"], cfg["password"]) @@ -382,7 +406,7 @@ def _vcard_url(uid: str) -> str: escape the collection and target an arbitrary CardDAV resource.""" from urllib.parse import quote cfg = _get_carddav_config() - return cfg["url"].rstrip("/") + "/" + quote(uid, safe="") + ".vcf" + return _carddav_base_url(cfg) + "/" + quote(uid, safe="") + ".vcf" def _import_vcards(text: str) -> Dict: @@ -413,6 +437,11 @@ def _import_vcards(text: str) -> Dict: if imported: _save_local_contacts(contacts) return {"imported": imported, "failed": 0, "total": len(parsed)} + try: + base_url = _carddav_base_url(cfg) + except ValueError as e: + logger.warning("CardDAV import URL rejected: %s", e) + return {"imported": 0, "failed": 0, "total": 0, "error": str(e)} auth = (cfg["username"], cfg["password"]) if cfg["username"] else None # Split into individual cards. re.split drops the BEGIN line, so we # re-add it. Normalize CRLF. @@ -441,7 +470,7 @@ def _import_vcards(text: str) -> Dict: elif not re.search(r"^VERSION:", block, re.MULTILINE): block = block.replace("BEGIN:VCARD", "BEGIN:VCARD\nVERSION:4.0", 1) vcard = block.replace("\n", "\r\n") + "\r\n" - url = cfg["url"].rstrip("/") + "/" + quote(uid, safe="") + ".vcf" + url = base_url + "/" + quote(uid, safe="") + ".vcf" try: r = httpx.put( url, data=vcard.encode("utf-8"), @@ -601,8 +630,8 @@ def _update_contact(uid: str, name: str, emails: List[str], phones: List[str]) - vcard = _build_vcard(name, "", uid=uid, emails=emails, phones=phones) # Use the real resource href (handles externally-created contacts whose # filename != UID); falls back to the .vcf guess. - url = _resolve_resource_url(uid) try: + url = _resolve_resource_url(uid) auth = (cfg["username"], cfg["password"]) if cfg["username"] else None r = httpx.put( url, @@ -630,8 +659,8 @@ def _delete_contact(uid: str) -> bool: _save_local_contacts(remaining) return True - url = _resolve_resource_url(uid) try: + url = _resolve_resource_url(uid) auth = (cfg["username"], cfg["password"]) if cfg["username"] else None r = httpx.delete(url, auth=auth, timeout=10) if r.status_code in (200, 204): @@ -747,7 +776,13 @@ def setup_contacts_routes(): settings = _load_settings() for key in ("carddav_url", "carddav_username", "carddav_password"): if key in data: - settings[key] = data[key] + if key == "carddav_url" and str(data[key] or "").strip(): + try: + settings[key] = _validate_carddav_url(data[key]) + except ValueError as e: + raise HTTPException(400, str(e)) + else: + settings[key] = data[key] _save_settings(settings) # Force re-fetch _contact_cache["fetched_at"] = None diff --git a/routes/cookbook_helpers.py b/routes/cookbook_helpers.py index 454c67b42..39a18f715 100644 --- a/routes/cookbook_helpers.py +++ b/routes/cookbook_helpers.py @@ -11,6 +11,8 @@ import shlex from fastapi import HTTPException from pydantic import BaseModel +from core.platform_compat import _ssh_exec_argv + logger = logging.getLogger(__name__) @@ -195,6 +197,20 @@ def _pip_install_attempt(pip_cmd: str) -> str: ) +def _pip_command(python_cmd: str) -> str: + """Return a pip command for either a pip executable or a Python executable.""" + cmd = python_cmd.strip() + if " -m pip" in cmd or cmd in {"pip", "pip3"}: + return python_cmd + if cmd in {"python", "python3", "python.exe"} or cmd.endswith(("/python", "/python3", "\\python.exe")): + return f"{python_cmd} -m pip" + return python_cmd + + +def _pip_break_system_packages_check(pip_cmd: str) -> str: + return f"{pip_cmd} install --help 2>/dev/null | grep -q -- --break-system-packages" + + def _pip_install_fallback_chain(package: str, *, python_cmd: str = "python3 -m pip", upgrade: bool = False) -> str: """Build a bash pip install fallback chain that surfaces errors. @@ -206,33 +222,44 @@ def _pip_install_fallback_chain(package: str, *, python_cmd: str = "python3 -m p exit code is preserved (no ``| tail`` masking) and the last 5 lines of pip output appear in the Cookbook log on failure. """ + from core.platform_compat import IS_WINDOWS upgrade_flag = " -U" if upgrade else "" # Shell-quote the package spec: an extras spec like ``llama-cpp-python[server]`` # contains brackets that bash would treat as a glob, so it must be quoted # before being embedded in the install command. Plain names (e.g. # ``huggingface_hub``) are returned unchanged by ``shlex.quote``. pkg = shlex.quote(package) - base = _pip_install_attempt(f"{python_cmd} install -q{upgrade_flag} {pkg}") - user = _pip_install_attempt(f"{python_cmd} install --user --break-system-packages -q{upgrade_flag} {pkg}") + # llama-cpp-python source builds are brittle on older distro pip/packaging + # stacks (common on WSL images). Prefer the prebuilt wheel index whenever + # this package is requested so dependency-install tasks are reliable. + if "llama-cpp-python" in package: + pkg += " --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu" + + pip_cmd = _pip_command(python_cmd) + base = _pip_install_attempt(f"{pip_cmd} install -q{upgrade_flag} {pkg}") + user = _pip_install_attempt(f"{pip_cmd} install --user -q{upgrade_flag} {pkg}") + user_break_system = _pip_install_attempt(f"{pip_cmd} install --user --break-system-packages -q{upgrade_flag} {pkg}") + user_fallback = f"( {user} || {{ {_pip_break_system_packages_check(pip_cmd)} && {user_break_system}; }} )" # Derive the python executable for the venv detection check. # Must use the same interpreter that pip belongs to; hardcoding # python3 breaks when pip lives in a venv that only has "python". - if " -m pip" in python_cmd: - python_exe = python_cmd.replace(" -m pip", "") - elif python_cmd.strip() == "pip": + if " -m pip" in pip_cmd: + python_exe = pip_cmd.replace(" -m pip", "") + elif pip_cmd.strip() == "pip": python_exe = "python" - elif python_cmd.strip() == "pip3": + elif pip_cmd.strip() == "pip3": python_exe = "python3" else: python_exe = "python3" venv_check = f'{python_exe} -c "import sys; sys.exit(0 if sys.prefix != sys.base_prefix else 1)"' - # Negated: `! venv_check` succeeds (exit 0) when NOT in a venv → `&&` tries - # --user. When IN a venv `! venv_check` fails → `&&` skips --user and the + # Negated: `! venv_check` succeeds (exit 0) when NOT in a venv -> `&&` tries + # --user. When IN a venv `! venv_check` fails -> `&&` skips --user and the # group exits non-zero, propagating the base-install failure instead of # masking it as success (the `|| { venv_check || … }` shape from #903 # swallowed the exit code because venv_check's exit-0 became the group's - # result). - return f"{base} || {{ ! {venv_check} && {user}; }}" + # result). `--break-system-packages` is only attempted when the active pip + # supports it; older pip versions abort with "no such option" otherwise. + return f"{base} || {{ ! {venv_check} && {user_fallback}; }}" def _venv_safe_local_pip_install_cmd(cmd: str, *, local: bool, in_venv: bool) -> str: @@ -263,6 +290,55 @@ def _venv_safe_local_pip_install_cmd(cmd: str, *, local: bool, in_venv: bool) -> return shlex.join(stripped) +def _pip_install_command_without_break_system_packages(cmd: str) -> str: + try: + parts = shlex.split(cmd) + except ValueError: + return cmd + stripped = [part for part in parts if part != "--break-system-packages"] + return shlex.join(stripped) + + +def _pip_install_help_check_from_cmd(cmd: str) -> str | None: + try: + parts = shlex.split(cmd) + except ValueError: + return None + try: + install_index = parts.index("install") + except ValueError: + return None + if install_index <= 0: + return None + pip_prefix = parts[:install_index] + return f"{shlex.join(pip_prefix + ['install', '--help'])} 2>/dev/null | grep -q -- --break-system-packages" + + +def _append_pip_install_runner_lines(runner_lines: list[str], cmd: str) -> None: + """Append a pip install command, guarding --break-system-packages support. + + The Dependencies UI may submit ``python3 -m pip install --user + --break-system-packages ...`` for non-venv installs. That flag is useful on + PEP-668-locked distros, but older pip (including Ubuntu 22.04's apt pip in + the NVIDIA CUDA base image) aborts with "no such option". Branch at runner + time so stale browser JS and remote targets are handled by the server too. + """ + if "--break-system-packages" not in (cmd or ""): + runner_lines.append(cmd) + return + help_check = _pip_install_help_check_from_cmd(cmd) + without_break = _pip_install_command_without_break_system_packages(cmd) + if not help_check or without_break == cmd: + runner_lines.append(cmd) + return + runner_lines.append(f"if {help_check}; then") + runner_lines.append(f" {cmd}") + runner_lines.append("else") + runner_lines.append(' echo "[odysseus] pip does not support --break-system-packages; installing without it."') + runner_lines.append(f" {without_break}") + runner_lines.append("fi") + + def _user_shell_path_bootstrap() -> list[str]: return [ 'ODYSSEUS_USER_SHELL="${SHELL:-}"', @@ -271,11 +347,14 @@ def _user_shell_path_bootstrap() -> list[str]: ' if [ -n "$ODYSSEUS_USER_PATH" ]; then export PATH="$ODYSSEUS_USER_PATH:$PATH"; fi', 'fi', 'command -v python3 >/dev/null 2>&1 || python3() { python "$@"; }', + 'command -v python >/dev/null 2>&1 || python() { python3 "$@"; }', ] -def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str: - """Build the standalone Python scanner used by /api/model/cached.""" +def _cached_model_scan_script(model_dirs: list[str] | None = None, add_hf_cache: str | None = None) -> str: + """Build the standalone Python scanner used by /api/model/cached. + Allows for an additional HuggingFace cache path to be scanned (i.e. Windows HF cache for local WSL envs.) + """ lines = [ "import json, os, re, shutil, subprocess, urllib.request", "models = []", @@ -338,6 +417,15 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str: " if f.is_file(): nf += 1; sz += f.stat().st_size", " if f.name.endswith('.incomplete'): ic = True", " snap = os.path.join(cache, d, 'snapshots')", + " # Windows HF cache stores files directly in snapshots/; blobs/ may be empty.", + " # Fallback: scan snapshots for real files when blobs yielded nothing.", + " if sz == 0 and os.path.isdir(snap):", + " for sd in os.listdir(snap):", + " sf = os.path.join(snap, sd)", + " if not os.path.isdir(sf): continue", + " for f in os.scandir(sf):", + " if f.is_file(): nf += 1; sz += f.stat().st_size", + " if f.name.endswith('.incomplete'): ic = True", " is_diffusion = False; gguf_files = []", " if os.path.isdir(snap):", " for sd in os.listdir(snap):", @@ -346,6 +434,21 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str: " if os.path.exists(os.path.join(sf, 'model_index.json')): is_diffusion = True", " for f in collect_ggufs(sf): f['rel_path'] = sd + '/' + f['rel_path']; gguf_files.append(f)", " models.append({'repo_id':rid,'size_bytes':sz,'nb_files':nf,'has_incomplete':ic,'path':cache,'is_diffusion':is_diffusion,'is_gguf':bool(gguf_files),'gguf_files':gguf_files})", + "def hf_cache_paths():", + " candidates = []", + " def add(p):", + " if not p: return", + " p = os.path.expanduser(p)", + " if p not in candidates: candidates.append(p)", + " add(os.environ.get('HUGGINGFACE_HUB_CACHE'))", + " hf_home = os.environ.get('HF_HOME')", + " if hf_home: add(os.path.join(hf_home, 'hub'))", + " add('~/.cache/huggingface/hub')", + " # Docker images mount ./data/huggingface at /app/.cache/huggingface.", + " # When HOME is /root, expanduser() misses that persisted cache.", + " add('/app/.cache/huggingface/hub')", + f" add({add_hf_cache!r})" if add_hf_cache else "", + " return candidates", "def scan_dir(p):", " if not os.path.isdir(p) or not safe_path(p): return", " for d in sorted(os.listdir(p)):", @@ -409,7 +512,7 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str: " seen.add(name)", " models.append({'repo_id':name,'size_bytes':size_bytes,'nb_files':1,'has_incomplete':False,'path':'ollama','backend':'ollama','is_ollama':True})", " return", - "scan_hf(os.path.expanduser('~/.cache/huggingface/hub'))", + "for _hf_cache in hf_cache_paths(): scan_hf(_hf_cache)", "scan_ollama()", "scan_ollama_api()", ] @@ -525,6 +628,7 @@ def _validate_serve_cmd(v: str | None) -> str | None: # Backticks and raw newlines are never legitimate here. if any(c in v for c in ("`", "\n", "\r")): raise HTTPException(400, "Invalid characters in cmd") + # Known GGUF launcher prelude → validate the serve invocation(s) it guards. m = _GGUF_PRELUDE_RE.match(v) if m: @@ -533,9 +637,19 @@ def _validate_serve_cmd(v: str | None) -> str | None: for part in rest.split("||"): _check_serve_binary(part.strip()) return v + # Otherwise: a single invocation — no shell metacharacters allowed. + # Temporarily replace safe $(printf %s ...) expressions with a placeholder + # to avoid triggering the metacharacter/command-injection checks. + cleaned_v = v + printf_matches = list(re.finditer(r"\$\(\s*printf\s+%s\s+([^\n()]*?)\)", v)) + for match in printf_matches: + inner = match.group(1) + if not any(c in inner for c in (";", "&&", "||", "$(", "`")): + cleaned_v = cleaned_v.replace(match.group(0), "/placeholder/safe/path.gguf") + # (`$(` was the original intent; bare `$` is fine for shell-safe paths.) - if any(c in v for c in (";", "&&", "||", "$(")): + if any(c in cleaned_v for c in (";", "&&", "||", "$(")): raise HTTPException(400, "Invalid characters in cmd") _check_serve_binary(v) return v @@ -559,6 +673,21 @@ def _append_serve_preflight_exit_lines(runner_lines: list[str], *, keep_shell_op runner_lines.append('fi') +def _append_vllm_linux_preflight_lines(runner_lines: list[str]) -> None: + """Append Linux vLLM readiness lines that identify the runtime being used.""" + # Keep the user install bin visible for Odysseus-managed `pip install --user` + # installs, but then report the actual CLI path so external runtimes are clear. + runner_lines.append('export PATH="$HOME/.local/bin:$PATH"') + runner_lines.append('ODYSSEUS_VLLM_BIN="$(command -v vllm 2>/dev/null || true)"') + runner_lines.append('if [ -z "$ODYSSEUS_VLLM_BIN" ]; then') + runner_lines.append(' echo "ERROR: vLLM is not installed."') + runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127') + runner_lines.append('else') + runner_lines.append(' echo "[odysseus] vLLM CLI: $ODYSSEUS_VLLM_BIN"') + runner_lines.append(' ODYSSEUS_VLLM_VERSION="$("$ODYSSEUS_VLLM_BIN" --version 2>&1 | head -n 1 || true)"') + runner_lines.append(' if [ -n "$ODYSSEUS_VLLM_VERSION" ]; then echo "[odysseus] vLLM version: $ODYSSEUS_VLLM_VERSION"; fi') + runner_lines.append('fi') + def _append_serve_exit_code_lines( runner_lines: list[str], *, @@ -804,3 +933,172 @@ def _ssh_ps(host, script_path, port=None): # Windows session dir — stored in user's temp on the remote WIN_SESSION_DIR = "$env:TEMP\\\\odysseus-sessions" + + +def _diagnose_serve_output(text: str) -> dict | None: + """Server-side mirror of the Cookbook UI's common serve diagnoses. + + The browser uses cookbook-diagnosis.js for clickable fixes. This gives + the agent/tool path the same structured signal so it can retry with an + adjusted command instead of guessing from raw tmux output. + """ + if not text: + return None + tail = text[-6000:] + patterns = [ + ( + r"No available memory for the cache blocks|Available KV cache memory:.*-", + "No GPU memory left for KV cache after loading model.", + [ + {"label": "retry with GPU memory utilization 0.95", "op": "replace", "flag": "--gpu-memory-utilization", "value": "0.95"}, + {"label": "retry with context 2048", "op": "replace", "flag": "--max-model-len", "value": "2048"}, + ], + ), + ( + r"CUDA out of memory|torch\.cuda\.OutOfMemoryError|CUDA error: out of memory|warming up sampler|max_num_seqs.*gpu_memory_utilization", + "GPU ran out of memory during startup or warmup.", + [ + {"label": "retry with context 4096", "op": "replace", "flag": "--max-model-len", "value": "4096"}, + {"label": "retry with GPU memory utilization 0.80", "op": "replace", "flag": "--gpu-memory-utilization", "value": "0.80"}, + {"label": "retry with --enforce-eager", "op": "append", "arg": "--enforce-eager"}, + ], + ), + ( + r"not divisib|must be divisible|attention heads.*divisible", + "Tensor parallel size is incompatible with the model.", + [ + {"label": "retry with tensor parallel size 1", "op": "replace", "flag": "--tensor-parallel-size", "value": "1"}, + {"label": "retry with tensor parallel size 2", "op": "replace", "flag": "--tensor-parallel-size", "value": "2"}, + ], + ), + ( + r"KV cache.*too (small|large)|max_model_len.*exceeds|maximum.*context", + "Context length is too large for available GPU memory.", + [ + {"label": "retry with context 8192", "op": "replace", "flag": "--max-model-len", "value": "8192"}, + {"label": "retry with context 4096", "op": "replace", "flag": "--max-model-len", "value": "4096"}, + ], + ), + ( + r"enable-auto-tool-choice requires --tool-call-parser", + "Auto tool choice requires an explicit tool call parser.", + [{"label": "retry with Hermes tool parser", "op": "append", "arg": "--tool-call-parser hermes"}], + ), + ( + r"Please pass.*trust.remote.code=True|contains custom code which must be executed to correctly load|does not recognize this architecture|model type.*but Transformers does not", + "Model requires custom code or newer model support.", + [{"label": "retry with --trust-remote-code", "op": "append", "arg": "--trust-remote-code"}], + ), + ( + r"There is no module or parameter named ['\"]lm_head\.input_scale['\"]|lm_head\.input_scale|weight_scale_2", + "vLLM cannot load this ModelOpt LM-head quantized checkpoint with the current runtime.", + [ + { + "label": "upgrade vLLM through the environment that provides this CLI, or use a compatible checkpoint", + "op": "manual", + } + ], + ), + ( + r"Either a revision or a version must be specified|transformers\.integrations\.hub_kernels|kernels/layer", + "vLLM/Transformers kernel package mismatch.", + [{"label": "update vLLM, Transformers, and kernels on this server", "op": "dependency", "package": "vllm transformers kernels"}], + ), + ( + r"Address already in use|bind.*address.*in use", + "Port is already in use.", + [{"label": "retry on port 8001", "op": "replace", "flag": "--port", "value": "8001"}], + ), + ( + r"No CUDA GPUs are available|no GPU.*found|CUDA_VISIBLE_DEVICES.*invalid", + "No GPUs are visible to the serve process.", + [{"label": "clear Cookbook GPU selection or choose available GPUs", "op": "settings", "field": "gpus", "value": ""}], + ), + ( + r"Failed to infer device type|NVML Shared Library Not Found|No module named 'amdsmi'|platform is not available", + "vLLM could not find a supported GPU (CUDA or ROCm). " + "This machine may have integrated or unsupported graphics only.", + [ + {"label": "switch to llama.cpp (CPU/Metal, works without a discrete GPU)", "op": "manual"}, + {"label": "switch to Ollama (CPU/Metal, works without a discrete GPU)", "op": "manual"}, + ], + ), + ( + r"vllm.*command not found|No module named vllm|ERROR: vLLM is not installed", + "vLLM is not installed or not in PATH on this server.", + [{"label": "install vLLM in Cookbook Dependencies", "op": "dependency", "package": "vllm"}], + ), + ( + r"sglang.*command not found|No module named sglang|SGLang is not installed", + "SGLang is not installed or not in PATH on this server.", + [{"label": "install SGLang in Cookbook Dependencies", "op": "dependency", "package": "sglang[all]"}], + ), + ( + r"llama-server.*command not found|llama\.cpp.*not found|No module named.*llama_cpp|No module named 'starlette_context'|git: command not found|cmake: command not found", + "llama.cpp / llama-cpp-python dependencies are missing.", + [{"label": "install llama.cpp dependencies or llama-cpp-python[server]", "op": "dependency", "package": "llama-cpp-python[server]"}], + ), + ( + r"No GGUF found on this host|no \.gguf file|No GGUF file found", + "No GGUF file found for this model on this host. The llama.cpp backend needs a .gguf file.", + [{"label": "download a GGUF build of this model (repo name usually ends in -GGUF, file like Q4_K_M.gguf)", "op": "manual"}], + ), + ( + r"No module named 'torch'|No module named torch|No module named 'diffusers'|No module named diffusers", + "Diffusion serving requires PyTorch and diffusers.", + [{"label": "install diffusers[torch] in Cookbook Dependencies", "op": "dependency", "package": "diffusers[torch]"}], + ), + ( + r"403 Forbidden|401 Unauthorized|Access to model.*is restricted|gated repo|not in the authorized list|awaiting a review", + "Model access is gated or unauthorized.", + [{"label": "set HF token and request model access on HuggingFace", "op": "manual"}], + ), + ] + for pattern, message, suggestions in patterns: + if re.search(pattern, tail, re.I): + return {"message": message, "suggestions": suggestions} + if re.search(r"Traceback \(most recent call last\)", tail, re.I) and not re.search( + r"Application startup complete|GET /v1/|Uvicorn running on", tail, re.I + ): + return { + "message": "Python traceback detected during serve startup.", + "suggestions": [{"label": "inspect traceback and retry with adjusted backend/settings", "op": "manual"}], + } + return None + + +async def run_ssh_command_async( + remote: str, + ssh_port: str | None, + remote_cmd: str, + *, + timeout: float, + connect_timeout: int | None = None, + strict_host_key_checking: bool | None = None, + stdin_data: bytes | None = None, +) -> tuple[int, bytes, bytes]: + """Run an ssh command with centralized timeout and stderr/stdout capture. + Async version of core.platform_compat.run_ssh_command_sync. + """ + import asyncio + proc = await asyncio.create_subprocess_exec( + *_ssh_exec_argv( + remote, + ssh_port, + remote_cmd=remote_cmd, + connect_timeout=connect_timeout, + strict_host_key_checking=strict_host_key_checking, + ), + stdin=asyncio.subprocess.PIPE if stdin_data is not None else None, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + try: + stdout, stderr = await asyncio.wait_for( + proc.communicate(input=stdin_data), timeout=timeout + ) + except asyncio.TimeoutError: + proc.kill() + await proc.communicate() + raise + return proc.returncode or 0, stdout, stderr diff --git a/routes/cookbook_routes.py b/routes/cookbook_routes.py index bf2365b9e..7a1ee85c6 100644 --- a/routes/cookbook_routes.py +++ b/routes/cookbook_routes.py @@ -15,19 +15,26 @@ from pathlib import Path from fastapi import APIRouter, HTTPException, Request, Depends from src.auth_helpers import require_user +from src.constants import COOKBOOK_STATE_FILE from pydantic import BaseModel from core.middleware import require_admin from core.platform_compat import ( IS_WINDOWS, + SSH_PATH_OVERRIDE, + NVIDIA_PATH_CANDIDATES, detached_popen_kwargs, find_bash, + git_bash_path, kill_process_tree, pid_alive, safe_chmod, which_tool, + translate_path, + get_wsl_windows_user_profile, ) from routes.shell_routes import TMUX_LOG_DIR +from src.constants import COOKBOOK_STATE_FILE logger = logging.getLogger(__name__) @@ -38,8 +45,10 @@ from routes.cookbook_helpers import ( _ps_squote, _bash_squote, _validate_serve_cmd, _parse_serve_phase, _safe_env_prefix, _local_tooling_path_export, _append_serve_preflight_exit_lines, _append_serve_exit_code_lines, _append_llama_cpp_linux_accel_build_lines, _cached_model_scan_script, - _ollama_bind_from_cmd, _pip_install_fallback_chain, _pip_install_no_cache, - _user_shell_path_bootstrap, _venv_safe_local_pip_install_cmd, + _append_vllm_linux_preflight_lines, _ollama_bind_from_cmd, _pip_install_fallback_chain, + _pip_install_no_cache, _user_shell_path_bootstrap, _venv_safe_local_pip_install_cmd, + _append_pip_install_runner_lines, + _diagnose_serve_output, run_ssh_command_async, ModelDownloadRequest, ServeRequest, ) @@ -54,7 +63,7 @@ _HF_TOKEN_STATUS_SNIPPET = ( def setup_cookbook_routes() -> APIRouter: router = APIRouter(tags=["cookbook"]) - _cookbook_state_path = Path(os.environ.get("DATA_DIR", "data")) / "cookbook_state.json" + _cookbook_state_path = Path(COOKBOOK_STATE_FILE) def _mask_secret(value: str) -> str: if not value: @@ -81,127 +90,6 @@ def setup_cookbook_routes() -> APIRouter: task["payload"].pop("hf_token", None) return state - def _diagnose_serve_output(text: str) -> dict | None: - """Server-side mirror of the Cookbook UI's common serve diagnoses. - - The browser uses cookbook-diagnosis.js for clickable fixes. This gives - the agent/tool path the same structured signal so it can retry with an - adjusted command instead of guessing from raw tmux output. - """ - if not text: - return None - tail = text[-6000:] - patterns = [ - ( - r"No available memory for the cache blocks|Available KV cache memory:.*-", - "No GPU memory left for KV cache after loading model.", - [ - {"label": "retry with GPU memory utilization 0.95", "op": "replace", "flag": "--gpu-memory-utilization", "value": "0.95"}, - {"label": "retry with context 2048", "op": "replace", "flag": "--max-model-len", "value": "2048"}, - ], - ), - ( - r"CUDA out of memory|torch\.cuda\.OutOfMemoryError|CUDA error: out of memory|warming up sampler|max_num_seqs.*gpu_memory_utilization", - "GPU ran out of memory during startup or warmup.", - [ - {"label": "retry with context 4096", "op": "replace", "flag": "--max-model-len", "value": "4096"}, - {"label": "retry with GPU memory utilization 0.80", "op": "replace", "flag": "--gpu-memory-utilization", "value": "0.80"}, - {"label": "retry with --enforce-eager", "op": "append", "arg": "--enforce-eager"}, - ], - ), - ( - r"not divisib|must be divisible|attention heads.*divisible", - "Tensor parallel size is incompatible with the model.", - [ - {"label": "retry with tensor parallel size 1", "op": "replace", "flag": "--tensor-parallel-size", "value": "1"}, - {"label": "retry with tensor parallel size 2", "op": "replace", "flag": "--tensor-parallel-size", "value": "2"}, - ], - ), - ( - r"KV cache.*too (small|large)|max_model_len.*exceeds|maximum.*context", - "Context length is too large for available GPU memory.", - [ - {"label": "retry with context 8192", "op": "replace", "flag": "--max-model-len", "value": "8192"}, - {"label": "retry with context 4096", "op": "replace", "flag": "--max-model-len", "value": "4096"}, - ], - ), - ( - r"enable-auto-tool-choice requires --tool-call-parser", - "Auto tool choice requires an explicit tool call parser.", - [{"label": "retry with Hermes tool parser", "op": "append", "arg": "--tool-call-parser hermes"}], - ), - ( - r"Please pass.*trust.remote.code=True|contains custom code which must be executed to correctly load|does not recognize this architecture|model type.*but Transformers does not", - "Model requires custom code or newer model support.", - [{"label": "retry with --trust-remote-code", "op": "append", "arg": "--trust-remote-code"}], - ), - ( - r"Either a revision or a version must be specified|transformers\.integrations\.hub_kernels|kernels/layer", - "vLLM/Transformers kernel package mismatch.", - [{"label": "update vLLM, Transformers, and kernels on this server", "op": "dependency", "package": "vllm transformers kernels"}], - ), - ( - r"Address already in use|bind.*address.*in use", - "Port is already in use.", - [{"label": "retry on port 8001", "op": "replace", "flag": "--port", "value": "8001"}], - ), - ( - r"No CUDA GPUs are available|no GPU.*found|CUDA_VISIBLE_DEVICES.*invalid", - "No GPUs are visible to the serve process.", - [{"label": "clear Cookbook GPU selection or choose available GPUs", "op": "settings", "field": "gpus", "value": ""}], - ), - ( - r"Failed to infer device type|NVML Shared Library Not Found|No module named 'amdsmi'|platform is not available", - "vLLM could not find a supported GPU (CUDA or ROCm). " - "This machine may have integrated or unsupported graphics only.", - [ - {"label": "switch to llama.cpp (CPU/Metal, works without a discrete GPU)", "op": "manual"}, - {"label": "switch to Ollama (CPU/Metal, works without a discrete GPU)", "op": "manual"}, - ], - ), - ( - r"vllm.*command not found|No module named vllm|ERROR: vLLM is not installed", - "vLLM is not installed or not in PATH on this server.", - [{"label": "install vLLM in Cookbook Dependencies", "op": "dependency", "package": "vllm"}], - ), - ( - r"sglang.*command not found|No module named sglang|SGLang is not installed", - "SGLang is not installed or not in PATH on this server.", - [{"label": "install SGLang in Cookbook Dependencies", "op": "dependency", "package": "sglang[all]"}], - ), - ( - r"llama-server.*command not found|llama\.cpp.*not found|No module named.*llama_cpp|No module named 'starlette_context'|git: command not found|cmake: command not found", - "llama.cpp / llama-cpp-python dependencies are missing.", - [{"label": "install llama.cpp dependencies or llama-cpp-python[server]", "op": "dependency", "package": "llama-cpp-python[server]"}], - ), - ( - r"No GGUF found on this host|no \.gguf file|No GGUF file found", - "No GGUF file found for this model on this host. The llama.cpp backend needs a .gguf file.", - [{"label": "download a GGUF build of this model (repo name usually ends in -GGUF, file like Q4_K_M.gguf)", "op": "manual"}], - ), - ( - r"No module named 'torch'|No module named torch|No module named 'diffusers'|No module named diffusers", - "Diffusion serving requires PyTorch and diffusers.", - [{"label": "install diffusers[torch] in Cookbook Dependencies", "op": "dependency", "package": "diffusers[torch]"}], - ), - ( - r"403 Forbidden|401 Unauthorized|Access to model.*is restricted|gated repo|not in the authorized list|awaiting a review", - "Model access is gated or unauthorized.", - [{"label": "set HF token and request model access on HuggingFace", "op": "manual"}], - ), - ] - for pattern, message, suggestions in patterns: - if re.search(pattern, tail, re.I): - return {"message": message, "suggestions": suggestions} - if re.search(r"Traceback \(most recent call last\)", tail, re.I) and not re.search( - r"Application startup complete|GET /v1/|Uvicorn running on", tail, re.I - ): - return { - "message": "Python traceback detected during serve startup.", - "suggestions": [{"label": "inspect traceback and retry with adjusted backend/settings", "op": "manual"}], - } - return None - def _state_for_client(state): """Return cookbook state without raw secrets for browser clients.""" _strip_task_secrets(state) @@ -295,6 +183,7 @@ def setup_cookbook_routes() -> APIRouter: safe_chmod(key_path.with_suffix(".pub"), 0o644) return {"ok": True, "public_key": _read_cookbook_public_key()} + def _needs_binary(cmd: str, binary: str) -> bool: return bool(re.search(rf"(^|[\s;&|()]){re.escape(binary)}($|[\s;&|()])", cmd or "")) @@ -355,8 +244,8 @@ def setup_cookbook_routes() -> APIRouter: # POSIX form + shell-quoting so drive paths / spaces survive. inner = TMUX_LOG_DIR / f"{session_id}_run.sh" inner.write_text("\n".join(bash_lines) + "\n", encoding="utf-8") - lp = shlex.quote(log_path.as_posix()) - ip = shlex.quote(inner.as_posix()) + lp = shlex.quote(git_bash_path(log_path)) + ip = shlex.quote(git_bash_path(inner)) script_path = TMUX_LOG_DIR / f"{session_id}.sh" script_path.write_text( f"bash {ip} > {lp} 2>&1\n", @@ -472,6 +361,8 @@ def setup_cookbook_routes() -> APIRouter: ps_lines = [] ps_lines.append('$sessionDir = "$env:TEMP\\odysseus-sessions"') ps_lines.append('New-Item -ItemType Directory -Force -Path $sessionDir | Out-Null') + ps_lines.append('$env:PYTHONIOENCODING = "utf-8"') + ps_lines.append('$env:PYTHONUTF8 = "1"') if req.hf_token: ps_lines.append(f"$env:HF_TOKEN = '{_ps_squote(req.hf_token)}'") if req.env_prefix: @@ -545,7 +436,7 @@ def setup_cookbook_routes() -> APIRouter: # Install hf CLI + optional hf_transfer best-effort. Retries disable # hf_transfer because the Rust parallel path is fast but has been # flaky near the end of very large multi-file downloads. - # Use --break-system-packages on PEP-668 systems (Arch, newer Debian) so it doesn't bail. + # The helper tries active pip first, then guarded user-site fallbacks. runner_lines.append(f"command -v hf >/dev/null 2>&1 || {_pip_install_fallback_chain('huggingface_hub', python_cmd='pip', upgrade=True)}") if req.disable_hf_transfer: runner_lines.append("export HF_HUB_ENABLE_HF_TRANSFER=0") @@ -673,24 +564,35 @@ def setup_cookbook_routes() -> APIRouter: for d in model_dir.split(','): d = d.strip() if d: - model_dirs.append(d) - paths_code = _cached_model_scan_script(model_dirs) + translated_d = translate_path(d) if not host else d + model_dirs.append(translated_d) + win_hf_hub = None + if not host: + win_profile = get_wsl_windows_user_profile() + win_hf_hub = os.path.join(win_profile, ".cache", "huggingface", "hub") if win_profile else None + + paths_code = _cached_model_scan_script(model_dirs, win_hf_hub) scan_py = TMUX_LOG_DIR / "scan_cache.py" scan_py.write_text(paths_code, encoding="utf-8") + scan_payload = scan_py.read_bytes() if host: - _pf = f"-p {ssh_port} " if ssh_port and ssh_port != "22" else "" if platform == "windows": - # Windows: use 'python' and pipe via stdin with double-quote wrapping - cmd = f'ssh {_pf}{host} "python -" < \'{scan_py}\'' + remote_cmd = "python -" else: - cmd = f"ssh {_pf}{host} 'python3 -' < '{scan_py}'" - proc = await asyncio.create_subprocess_shell( - cmd, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - cwd=str(Path.home()), + # POSIX: use 'python3' if available, fall back to 'python'; throw if neither is found. + remote_cmd = ( + "if command -v python3 >/dev/null 2>&1; then python3 -; " + "elif command -v python >/dev/null 2>&1; then python -; " + "else echo \"python3/python not found\" >&2; exit 127; fi" + ) + rc, stdout_b, stderr_b = await run_ssh_command_async( + host, + ssh_port, + remote_cmd, + timeout=60, + stdin_data=scan_payload, ) else: # LOCAL scan: use sys.executable (the venv Python Odysseus is already @@ -710,7 +612,7 @@ def setup_cookbook_routes() -> APIRouter: stderr=asyncio.subprocess.PIPE, cwd=str(Path.home()), ) - stdout_b, stderr_b = await asyncio.wait_for(proc.communicate(), timeout=60) + stdout_b, stderr_b = await asyncio.wait_for(proc.communicate(), timeout=60) models = [] try: @@ -915,6 +817,10 @@ def setup_cookbook_routes() -> APIRouter: existing.name = display_name if supports_tools is not None: existing.supports_tools = supports_tools + # Wipe stale model lists so the picker re-probes and discovers + # the newly-served model instead of showing the old one. + existing.cached_models = None + existing.hidden_models = None db.commit() logger.info(f"Updated existing local model endpoint: {base_url}") return existing.id @@ -971,11 +877,27 @@ def setup_cookbook_routes() -> APIRouter: in_venv=sys.prefix != sys.base_prefix, ) is_pip_install = bool(req.cmd and "pip install" in req.cmd) + remote = req.remote_host + is_windows = req.platform == "windows" + local_windows = IS_WINDOWS and not remote + if is_windows or local_windows: + if req.cmd.startswith("python3 "): + req.cmd = "python " + req.cmd[len("python3 "):] + if is_pip_install and ("llama-cpp-python" in req.cmd or "llama_cpp" in req.cmd) and (is_windows or local_windows): + if "--extra-index-url" not in req.cmd: + req.cmd += " --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu" + if is_pip_install: # Keep big dependency wheel builds (vLLM, …) off the home filesystem's # pip cache so they don't fail mid-build with "No space left" (#1219) # and leave the dep installed-but-unusable (#1459). req.cmd = _pip_install_no_cache(req.cmd) + # Accept common aliases and enforce server extras for llama-cpp so + # `python -m llama_cpp.server` has all runtime dependencies. + req.cmd = re.sub(r"(?=!~,` for version specifiers. # v2 review HIGH-14: tightened from the previous regex which @@ -1028,6 +950,8 @@ def setup_cookbook_routes() -> APIRouter: ps_lines = [] ps_lines.append('$sessionDir = "$env:TEMP\\odysseus-sessions"') ps_lines.append('New-Item -ItemType Directory -Force -Path $sessionDir | Out-Null') + ps_lines.append('$env:PYTHONIOENCODING = "utf-8"') + ps_lines.append('$env:PYTHONUTF8 = "1"') if req.hf_token: ps_lines.append(f"$env:HF_TOKEN = '{_ps_squote(req.hf_token)}'") if req.gpus: @@ -1046,7 +970,7 @@ def setup_cookbook_routes() -> APIRouter: ps_lines.append('try { python -c "import llama_cpp" 2>$null } catch {}') ps_lines.append('if ($LASTEXITCODE -ne 0) {') ps_lines.append(' Write-Host "Installing llama-cpp-python..."') - ps_lines.append(' python -m pip install llama-cpp-python[server]') + ps_lines.append(' python -m pip install llama-cpp-python[server] --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu') ps_lines.append('}') elif "vllm" in req.cmd: ps_lines.append('Write-Host "ERROR: vLLM is not supported on Windows. Use Ollama or llama.cpp instead."') @@ -1121,45 +1045,57 @@ def setup_cookbook_routes() -> APIRouter: # ollama is found (otherwise macOS falls back to a slow source build). # /opt/homebrew = Apple Silicon, /usr/local = Intel; harmless on Linux. runner_lines.append('export PATH="$HOME/.local/bin:$HOME/bin:$HOME/llama.cpp/build/bin:/opt/homebrew/bin:/usr/local/bin:$PATH"') - runner_lines.append('if [ -d /data/data/com.termux ]; then') - runner_lines.append(' # Termux: no native build — use the Python bindings (CPU).') - runner_lines.append(' if ! python3 -c "import llama_cpp" 2>/dev/null; then') - runner_lines.append(' pkg install -y cmake 2>/dev/null') - runner_lines.append(' pip install numpy diskcache jinja2 2>/dev/null') - runner_lines.append(' CMAKE_ARGS="-DGGML_BLAS=OFF -DGGML_LLAMAFILE=OFF" pip install \'llama-cpp-python[server]\' --no-build-isolation --no-cache-dir 2>&1 || true') - runner_lines.append(' fi') - runner_lines.append('elif ! command -v llama-server &>/dev/null; then') - runner_lines.append(' echo "Native llama-server not found — building from source (one-time, may take a few minutes)..."') - runner_lines.append(' mkdir -p ~/bin') - runner_lines.append(' cd ~ && [ -d llama.cpp ] || git clone --depth 1 https://github.com/ggml-org/llama.cpp') - # Build with the right accelerator: Metal on macOS (llama.cpp - # enables it automatically, no flag), CUDA on Linux when present, - # else a plain CPU build. nproc is Linux-only — fall back to - # `sysctl hw.ncpu` on macOS. (Tip: `brew install llama.cpp` ships - # a prebuilt llama-server and skips this whole source build.) - runner_lines.append(' NPROC="$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 4)"') - runner_lines.append(' if [ "$(uname -s)" = "Darwin" ]; then') - runner_lines.append(' command -v cmake >/dev/null 2>&1 || echo "WARNING: cmake not found — install it with: brew install cmake (or: brew install llama.cpp for a prebuilt llama-server)."') - # Start from a clean cache: a prior failed configure (e.g. a CUDA - # attempt) poisons build/CMakeCache.txt, so a plain `cmake -B build` - # would reuse the bad settings and fail again. CMAKE_BUILD_TYPE is - # explicit so the binary is optimized (Metal auto-enables on macOS). - runner_lines.append(' cd ~/llama.cpp && rm -rf build && cmake -B build -DCMAKE_BUILD_TYPE=Release \\') - runner_lines.append(' && cmake --build build -j"$NPROC" --target llama-server \\') - runner_lines.append(' && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server') - runner_lines.append(' else') - _append_llama_cpp_linux_accel_build_lines(runner_lines) - runner_lines.append(' fi') - runner_lines.append(' # If the native build failed, fall back to the Python bindings.') - runner_lines.append(' if ! command -v llama-server &>/dev/null && ! python3 -c "import llama_cpp" 2>/dev/null; then') - runner_lines.append(' echo "llama-server build failed — installing Python bindings as fallback..."') - runner_lines.append(f" {_pip_install_fallback_chain('llama-cpp-python[server]', python_cmd='pip')} || true") - runner_lines.append(' fi') - runner_lines.append(' if ! command -v llama-server &>/dev/null && ! python3 -c "import llama_cpp" 2>/dev/null; then') - runner_lines.append(' echo "ERROR: llama.cpp serving is not available after install/build attempts."') - runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127') - runner_lines.append(' fi') - runner_lines.append('fi') + if local_windows: + # LOCAL Windows: no native source compilation (no cmake/compiler on Git Bash). + # Just check python bindings (using native `python` binary) and fall back to pip install. + runner_lines.append('if ! command -v llama-server &>/dev/null && ! python -c "import llama_cpp" 2>/dev/null; then') + runner_lines.append(' echo "llama-server not found — installing Python bindings..."') + runner_lines.append(f" {_pip_install_fallback_chain('llama-cpp-python[server]', python_cmd='python')} || true") + runner_lines.append('fi') + runner_lines.append('if ! command -v llama-server &>/dev/null && ! python -c "import llama_cpp" 2>/dev/null; then') + runner_lines.append(' echo "ERROR: llama.cpp serving is not available after install attempts."') + runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127') + runner_lines.append('fi') + else: + runner_lines.append('if [ -d /data/data/com.termux ]; then') + runner_lines.append(' # Termux: no native build — use the Python bindings (CPU).') + runner_lines.append(' if ! python3 -c "import llama_cpp" 2>/dev/null; then') + runner_lines.append(' pkg install -y cmake 2>/dev/null') + runner_lines.append(' pip install numpy diskcache jinja2 2>/dev/null') + runner_lines.append(' CMAKE_ARGS="-DGGML_BLAS=OFF -DGGML_LLAMAFILE=OFF" pip install \'llama-cpp-python[server]\' --no-build-isolation --no-cache-dir 2>&1 || true') + runner_lines.append(' fi') + runner_lines.append('elif ! command -v llama-server &>/dev/null; then') + runner_lines.append(' echo "Native llama-server not found — building from source (one-time, may take a few minutes)..."') + runner_lines.append(' mkdir -p ~/bin') + runner_lines.append(' cd ~ && [ -d llama.cpp ] || git clone --depth 1 https://github.com/ggml-org/llama.cpp') + # Build with the right accelerator: Metal on macOS (llama.cpp + # enables it automatically, no flag), CUDA on Linux when present, + # else a plain CPU build. nproc is Linux-only — fall back to + # `sysctl hw.ncpu` on macOS. (Tip: `brew install llama.cpp` ships + # a prebuilt llama-server and skips this whole source build.) + runner_lines.append(' NPROC="$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 4)"') + runner_lines.append(' if [ "$(uname -s)" = "Darwin" ]; then') + runner_lines.append(' command -v cmake >/dev/null 2>&1 || echo "WARNING: cmake not found — install it with: brew install cmake (or: brew install llama.cpp for a prebuilt llama-server)."') + # Start from a clean cache: a prior failed configure (e.g. a CUDA + # attempt) poisons build/CMakeCache.txt, so a plain `cmake -B build` + # would reuse the bad settings and fail again. CMAKE_BUILD_TYPE is + # explicit so the binary is optimized (Metal auto-enables on macOS). + runner_lines.append(' cd ~/llama.cpp && rm -rf build && cmake -B build -DCMAKE_BUILD_TYPE=Release \\') + runner_lines.append(' && cmake --build build -j"$NPROC" --target llama-server \\') + runner_lines.append(' && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server') + runner_lines.append(' else') + _append_llama_cpp_linux_accel_build_lines(runner_lines) + runner_lines.append(' fi') + # If the native build failed, fall back to the Python bindings. + runner_lines.append(' if ! command -v llama-server &>/dev/null && ! python3 -c "import llama_cpp" 2>/dev/null; then') + runner_lines.append(' echo "llama-server build failed — installing Python bindings as fallback..."') + runner_lines.append(f" {_pip_install_fallback_chain('llama-cpp-python[server]', python_cmd='pip')} || true") + runner_lines.append(' fi') + runner_lines.append(' if ! command -v llama-server &>/dev/null && ! python3 -c "import llama_cpp" 2>/dev/null; then') + runner_lines.append(' echo "ERROR: llama.cpp serving is not available after install/build attempts."') + runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127') + runner_lines.append(' fi') + runner_lines.append('fi') elif "ollama" in req.cmd: handled_ollama_serve = True _ollama_default_host = "0.0.0.0" if remote else "127.0.0.1" @@ -1181,13 +1117,23 @@ def setup_cookbook_routes() -> APIRouter: runner_lines.append(' ODYSSEUS_OLLAMA_PORT="$_ody_try_port"') runner_lines.append(' break') runner_lines.append(' fi') - runner_lines.append(' exec 3<&-; exec 3>&-') - runner_lines.append('done') + runner_lines.append(' echo "[odysseus] Ollama API ready on port ${ODYSSEUS_OLLAMA_PORT}: ${ODYSSEUS_OLLAMA_URL}"') + runner_lines.append(' echo "[odysseus] This task is monitoring an existing Ollama server; stopping it here will not stop an external Docker/system service."') + if local_windows: + # Windows detached process has no TTY; exec bash -i crashes. + # Keep the monitoring task alive with a sleep loop. + runner_lines.append(' while true; do sleep 60; done') + else: + runner_lines.append(' exec bash -i') + runner_lines.append('fi') runner_lines.append('if ! command -v ollama &>/dev/null; then') runner_lines.append(' echo "ERROR: Ollama not found on this server. Install it from https://ollama.com/download or `curl -fsSL https://ollama.com/install.sh | sh`."') runner_lines.append(' echo') runner_lines.append(' echo "=== Process exited with code 127 ==="') - runner_lines.append(' exec bash -i') + if local_windows: + runner_lines.append(' exit 127') + else: + runner_lines.append(' exec bash -i') runner_lines.append('fi') runner_lines.append('ODYSSEUS_OLLAMA_URL="http://${ODYSSEUS_OLLAMA_HOST}:${ODYSSEUS_OLLAMA_PORT}"') if remote and _ollama_host in ("0.0.0.0", "::"): @@ -1195,24 +1141,20 @@ def setup_cookbook_routes() -> APIRouter: runner_lines.append('echo "[odysseus] Ollama has no built-in authentication; expose this only on a trusted LAN/VPN or provide an explicit OLLAMA_HOST with your own access controls."') runner_lines.append('echo "Starting ollama server on ${ODYSSEUS_OLLAMA_HOST}:${ODYSSEUS_OLLAMA_PORT}..."') runner_lines.append('OLLAMA_HOST="${ODYSSEUS_OLLAMA_HOST}:${ODYSSEUS_OLLAMA_PORT}" ollama serve') - runner_lines.append('_ody_exit=$?') - runner_lines.append('echo') - runner_lines.append('echo "=== Process exited with code ${_ody_exit} ==="') - runner_lines.append('exec bash -i') + if local_windows: + _append_serve_exit_code_lines(runner_lines, keep_shell_open=False) + else: + runner_lines.append('_ody_exit=$?') + runner_lines.append('echo') + runner_lines.append('echo "=== Process exited with code ${_ody_exit} ==="') + runner_lines.append('exec bash -i') elif "vllm serve" in req.cmd: # vLLM is CUDA/ROCm-only and does not run on macOS at all. runner_lines.append('if [ "$(uname -s)" = "Darwin" ]; then') runner_lines.append(' echo "ERROR: vLLM does not run on macOS. Use Ollama or llama.cpp (Metal) instead."') runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=1') runner_lines.append('fi') - # Put ~/.local/bin on PATH first — without a venv, vllm installs - # there via --user and the non-login serve shell otherwise can't - # find the `vllm` CLI ("command not found"). Mirrors llama.cpp above. - runner_lines.append('export PATH="$HOME/.local/bin:$PATH"') - runner_lines.append('if ! command -v vllm &>/dev/null; then') - runner_lines.append(' echo "ERROR: vLLM is not installed."') - runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127') - runner_lines.append('fi') + _append_vllm_linux_preflight_lines(runner_lines) elif "sglang.launch_server" in req.cmd: runner_lines.append('export PATH="$HOME/.local/bin:$PATH"') runner_lines.append('if ! command -v sglang &>/dev/null; then') @@ -1236,7 +1178,10 @@ def setup_cookbook_routes() -> APIRouter: runner_lines, keep_shell_open=not local_windows, ) - runner_lines.append(req.cmd) + if is_pip_install: + _append_pip_install_runner_lines(runner_lines, req.cmd) + else: + runner_lines.append(req.cmd) if local_windows: # Detached background process — no interactive shell to keep open. # Print the exit marker the status poller looks for, then stop. @@ -1397,8 +1342,8 @@ def setup_cookbook_routes() -> APIRouter: cmd = f"ssh {pf}{host} '{setup_script}'" else: # Linux: auto-install tmux (via whichever package manager is available) - # and huggingface_hub + hf_transfer (falling back to --user/--break-system-packages - # on PEP-668 locked distros like Arch / newer Debian). + # and huggingface_hub + hf_transfer (falling back to --user, then + # guarded --break-system-packages on PEP-668 locked distros). setup_script = ( # Install tmux if missing — try common package managers; skip if no sudo "if ! command -v tmux >/dev/null 2>&1; then " @@ -1410,10 +1355,15 @@ def setup_cookbook_routes() -> APIRouter: " fi; " "fi; " "command -v tmux >/dev/null 2>&1 || echo 'WARNING: tmux missing and auto-install failed (need passwordless sudo). Install manually.'; " - # Install Python bits. Try system install first; fall back to --user --break-system-packages on PEP 668 systems. + # Install Python bits. Try system install first; fall back to --user, + # then use --break-system-packages only when pip supports it. "pip install -q huggingface_hub hf_transfer 2>/dev/null || " - "pip install --user --break-system-packages -q huggingface_hub hf_transfer 2>/dev/null || " - "pip3 install --user --break-system-packages -q huggingface_hub hf_transfer 2>/dev/null; " + "pip install --user -q huggingface_hub hf_transfer 2>/dev/null || " + "( pip install --help 2>/dev/null | grep -q -- --break-system-packages && " + "pip install --user --break-system-packages -q huggingface_hub hf_transfer 2>/dev/null ) || " + "pip3 install --user -q huggingface_hub hf_transfer 2>/dev/null || " + "( pip3 install --help 2>/dev/null | grep -q -- --break-system-packages && " + "pip3 install --user --break-system-packages -q huggingface_hub hf_transfer 2>/dev/null ); " "python3 -c 'from huggingface_hub import snapshot_download; print(\"OK\")'" ) cmd = f"ssh {pf}{host} '{setup_script}'" @@ -1436,11 +1386,38 @@ def setup_cookbook_routes() -> APIRouter: async def _run_nvidia_smi(query: str, host: str | None, ssh_port: str | None, timeout: int = 8): """Run nvidia-smi locally or over SSH. Returns (stdout, error_or_None).""" if host: - pf = f"-p {ssh_port} " if ssh_port and ssh_port != "22" else "" - cmd = f"ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no {pf}{host} '{query}'" - proc = await asyncio.create_subprocess_shell( - cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE - ) + candidates = [query] + stripped = query.strip() + if stripped.startswith("nvidia-smi "): + args = stripped[len("nvidia-smi "):] + candidates.append( + "bash -lc " + + shlex.quote( + f"{SSH_PATH_OVERRIDE}" + f"nvidia-smi {args}" + ) + ) + for nvidia_path in NVIDIA_PATH_CANDIDATES: + candidates.append(f"{nvidia_path} {args}") + + last_err = "nvidia-smi failed" + for candidate in candidates: + try: + rc, stdout, stderr = await run_ssh_command_async( + host, + ssh_port, + candidate, + connect_timeout=5, + timeout=timeout, + ) + except asyncio.TimeoutError: + return None, "nvidia-smi timed out" + if rc == 0: + return stdout.decode("utf-8", errors="replace"), None + err = (stderr.decode("utf-8", errors="replace") or "").strip()[:200] + if err: + last_err = err + return None, last_err else: proc = await asyncio.create_subprocess_exec( *shlex.split(query), @@ -2203,7 +2180,13 @@ def setup_cookbook_routes() -> APIRouter: "inc=os.path.isdir(blobs) and any(x.endswith('.incomplete') for x in os.listdir(blobs));" "sys.exit(0 if ok and not inc else 1)" ) - cmd = ["python3", "-c", py, repo_id] + if remote_host: + cmd = ["python3", "-c", py, repo_id] + else: + # Local Windows: python3 can hit the Microsoft Store stub. Use the + # real Python Odysseus is running under (guaranteed to exist). + import sys as _sys_local + cmd = [_sys_local.executable, "-c", py, repo_id] try: if remote_host: ssh_base = ["ssh"] diff --git a/routes/copilot_routes.py b/routes/copilot_routes.py index bb2b1d21f..1d8be52ce 100644 --- a/routes/copilot_routes.py +++ b/routes/copilot_routes.py @@ -20,39 +20,26 @@ All routes are admin-gated (endpoint/provider management is an admin action). """ import json -import time import uuid import logging -import threading from typing import Dict, Optional import httpx -from fastapi import APIRouter, Request, Form, HTTPException +from fastapi import HTTPException, Request from core.database import SessionLocal, ModelEndpoint -from core.middleware import require_admin +from routes.device_flow import ( + DeviceFlowPoll, + DeviceFlowStart, + PendingDeviceFlowStore, + create_device_flow_router, +) from src.auth_helpers import get_current_user from src import copilot logger = logging.getLogger(__name__) -# Pending device-flow logins, keyed by an opaque poll_id. The device_code is a -# bearer-like secret, so it lives here (server memory) rather than in the -# browser. Entries expire with the GitHub device code. -# -# NOTE: this is per-process state. The device flow assumes a single worker -# (Odysseus' default): with multiple uvicorn workers, the poll request can land -# on a worker that never saw the start, returning "Unknown or expired login -# session". Move this to a shared store (DB/Redis) if running multi-worker. -_PENDING: Dict[str, Dict] = {} -_PENDING_LOCK = threading.Lock() - - -def _prune_expired() -> None: - now = time.time() - with _PENDING_LOCK: - for k in [k for k, v in _PENDING.items() if v.get("expires_at", 0) < now]: - _PENDING.pop(k, None) +_DEVICE_FLOW_STORE = PendingDeviceFlowStore() def _provision_endpoint(token: str, base: str, owner: Optional[str]) -> Dict: @@ -112,112 +99,75 @@ def _provision_endpoint(token: str, base: str, owner: Optional[str]) -> Dict: return result -def setup_copilot_routes() -> APIRouter: - router = APIRouter(prefix="/api/copilot", tags=["copilot"]) +def _start_device_flow(request: Request, form) -> DeviceFlowStart: + host = copilot.GITHUB_HOST + ent = str(form.get("enterprise_url") or "").strip() + if ent: + host = copilot.normalize_domain(ent) + try: + data = copilot.request_device_code(host) + except httpx.HTTPStatusError as e: + status = e.response.status_code if e.response is not None else "unknown" + raise HTTPException(502, f"GitHub device-code request failed (HTTP {status})") + except Exception as e: + raise HTTPException(502, f"GitHub device-code request failed: {e}") - @router.post("/device/start") - def device_start(request: Request, enterprise_url: str = Form("")): - require_admin(request) - _prune_expired() - host = copilot.GITHUB_HOST - ent = (enterprise_url or "").strip() - if ent: - host = copilot.normalize_domain(ent) - try: - data = copilot.request_device_code(host) - except httpx.HTTPStatusError as e: - status = e.response.status_code if e.response is not None else "unknown" - raise HTTPException(502, f"GitHub device-code request failed (HTTP {status})") - except Exception as e: - raise HTTPException(502, f"GitHub device-code request failed: {e}") + device_code = data.get("device_code") + if not device_code: + raise HTTPException(502, "GitHub did not return a device code") - device_code = data.get("device_code") - if not device_code: - raise HTTPException(502, "GitHub did not return a device code") - interval = int(data.get("interval") or 5) - expires_in = int(data.get("expires_in") or 900) - poll_id = uuid.uuid4().hex - with _PENDING_LOCK: - _PENDING[poll_id] = { - "device_code": device_code, - "host": host, - "enterprise_url": ent, - "interval": interval, - "owner": get_current_user(request) or None, - "expires_at": time.time() + expires_in, - "next_poll_at": 0.0, - } - # verification_uri_complete embeds the user code, so the browser tab we - # open lands the user straight on GitHub's "Authorize" screen with the - # code pre-filled — one click, no manual code entry. - return { - "poll_id": poll_id, + # verification_uri_complete embeds the user code, so the browser tab we + # open lands the user straight on GitHub's "Authorize" screen with the + # code pre-filled — one click, no manual code entry. + return DeviceFlowStart( + pending={ + "device_code": device_code, + "host": host, + "enterprise_url": ent, + "owner": get_current_user(request) or None, + }, + response={ "user_code": data.get("user_code"), "verification_uri": data.get("verification_uri"), "verification_uri_complete": data.get("verification_uri_complete"), - "interval": interval, - "expires_in": expires_in, - } + }, + interval=int(data.get("interval") or 5), + expires_in=int(data.get("expires_in") or 900), + ) - @router.post("/device/poll") - def device_poll(request: Request, poll_id: str = Form(...)): - require_admin(request) - _prune_expired() - with _PENDING_LOCK: - pending = _PENDING.get(poll_id) - if not pending: - raise HTTPException(404, "Unknown or expired login session") - # Enforce GitHub's polling interval server-side so a chatty client - # can't trip slow_down. - now = time.time() - if now < pending.get("next_poll_at", 0): - return {"status": "pending"} +def _poll_device_flow(_request: Request, pending: Dict) -> DeviceFlowPoll: + try: + data = copilot.poll_access_token(pending["host"], pending["device_code"]) + except Exception as e: + return DeviceFlowPoll.pending(f"poll error: {e}") + token = data.get("access_token") + if token: + base = copilot.enterprise_base(pending["enterprise_url"]) if pending["enterprise_url"] else copilot.COPILOT_BASE try: - data = copilot.poll_access_token(pending["host"], pending["device_code"]) + result = _provision_endpoint(token, base, pending["owner"]) except Exception as e: - return {"status": "pending", "detail": f"poll error: {e}"} + logger.exception("Copilot endpoint provisioning failed") + raise HTTPException(500, f"Login succeeded but provisioning failed: {e}") + return DeviceFlowPoll.authorized(result) - token = data.get("access_token") - if token: - base = copilot.enterprise_base(pending["enterprise_url"]) if pending["enterprise_url"] else copilot.COPILOT_BASE - try: - result = _provision_endpoint(token, base, pending["owner"]) - except Exception as e: - logger.exception("Copilot endpoint provisioning failed") - with _PENDING_LOCK: - _PENDING.pop(poll_id, None) - raise HTTPException(500, f"Login succeeded but provisioning failed: {e}") - with _PENDING_LOCK: - _PENDING.pop(poll_id, None) - return {"status": "authorized", "endpoint": result} + err = data.get("error") + if err == "authorization_pending": + return DeviceFlowPoll.pending() + if err == "slow_down": + return DeviceFlowPoll.slow_down(int(data.get("interval") or 0) or None) + if err in ("expired_token", "access_denied"): + return DeviceFlowPoll.failed(err) + # Unknown error — surface but keep the session for another try. + return DeviceFlowPoll.pending(err or "unknown") - err = data.get("error") - if err == "authorization_pending": - with _PENDING_LOCK: - if poll_id in _PENDING: - _PENDING[poll_id]["next_poll_at"] = now + pending["interval"] - return {"status": "pending"} - if err == "slow_down": - new_interval = int(data.get("interval") or (pending["interval"] + 5)) - with _PENDING_LOCK: - if poll_id in _PENDING: - _PENDING[poll_id]["interval"] = new_interval - _PENDING[poll_id]["next_poll_at"] = now + new_interval - return {"status": "pending"} - if err in ("expired_token", "access_denied"): - with _PENDING_LOCK: - _PENDING.pop(poll_id, None) - return {"status": "failed", "error": err} - # Unknown error — surface but keep the session for another try. - return {"status": "pending", "detail": err or "unknown"} - @router.post("/device/cancel") - def device_cancel(request: Request, poll_id: str = Form(...)): - require_admin(request) - with _PENDING_LOCK: - _PENDING.pop(poll_id, None) - return {"status": "cancelled"} - - return router +def setup_copilot_routes(): + return create_device_flow_router( + prefix="/api/copilot", + tags=["copilot"], + store=_DEVICE_FLOW_STORE, + start_flow=_start_device_flow, + poll_flow=_poll_device_flow, + ) diff --git a/routes/device_flow.py b/routes/device_flow.py new file mode 100644 index 000000000..8b8ab4ac8 --- /dev/null +++ b/routes/device_flow.py @@ -0,0 +1,193 @@ +"""Shared OAuth/device-flow route scaffolding for provider setup.""" + +from __future__ import annotations + +import inspect +import threading +import time +import uuid +from dataclasses import dataclass +from typing import Any, Callable, Iterable, Mapping, Optional + +from fastapi import APIRouter, Form, HTTPException, Request + +from core.middleware import require_admin + + +@dataclass(frozen=True) +class DeviceFlowStart: + """Provider-specific start result consumed by the shared route wrapper.""" + + pending: Mapping[str, Any] + response: Mapping[str, Any] + interval: int = 5 + expires_in: int = 900 + + +@dataclass(frozen=True) +class DeviceFlowPoll: + """Normalized provider poll outcome.""" + + status: str + endpoint: Optional[Mapping[str, Any]] = None + error: Optional[str] = None + detail: Optional[str] = None + interval: Optional[int] = None + + @classmethod + def pending(cls, detail: Optional[str] = None) -> "DeviceFlowPoll": + return cls(status="pending", detail=detail) + + @classmethod + def slow_down(cls, interval: Optional[int] = None, detail: Optional[str] = None) -> "DeviceFlowPoll": + return cls(status="slow_down", interval=interval, detail=detail) + + @classmethod + def authorized(cls, endpoint: Mapping[str, Any]) -> "DeviceFlowPoll": + return cls(status="authorized", endpoint=endpoint) + + @classmethod + def failed(cls, error: str) -> "DeviceFlowPoll": + return cls(status="failed", error=error) + + +class PendingDeviceFlowStore: + """Thread-safe in-memory pending device-flow store. + + Device codes and provider-side secrets stay inside this process. Each entry + stores provider payload separately from poll metadata so provider callbacks + only receive the fields they created. + """ + + def __init__(self, *, time_func: Callable[[], float] = time.time): + self._pending: dict[str, dict[str, Any]] = {} + self._lock = threading.Lock() + self._time = time_func + + def _now(self) -> float: + return float(self._time()) + + def prune_expired(self) -> None: + now = self._now() + with self._lock: + for key in [k for k, v in self._pending.items() if v.get("expires_at", 0) < now]: + self._pending.pop(key, None) + + def add(self, payload: Mapping[str, Any], *, interval: int, expires_in: int) -> str: + self.prune_expired() + poll_id = uuid.uuid4().hex + with self._lock: + self._pending[poll_id] = { + "payload": dict(payload), + "interval": max(int(interval or 5), 1), + "expires_at": self._now() + max(int(expires_in or 900), 1), + "next_poll_at": 0.0, + } + return poll_id + + def get_payload(self, poll_id: str) -> Optional[dict[str, Any]]: + self.prune_expired() + with self._lock: + entry = self._pending.get(poll_id) + if entry is None: + return None + return dict(entry.get("payload") or {}) + + def is_throttled(self, poll_id: str) -> bool: + with self._lock: + entry = self._pending.get(poll_id) + return bool(entry and self._now() < float(entry.get("next_poll_at") or 0)) + + def schedule_next(self, poll_id: str) -> None: + now = self._now() + with self._lock: + entry = self._pending.get(poll_id) + if entry is not None: + entry["next_poll_at"] = now + int(entry.get("interval") or 5) + + def slow_down(self, poll_id: str, interval: Optional[int] = None) -> None: + now = self._now() + with self._lock: + entry = self._pending.get(poll_id) + if entry is not None: + new_interval = int(interval or (int(entry.get("interval") or 5) + 5)) + entry["interval"] = max(new_interval, 1) + entry["next_poll_at"] = now + entry["interval"] + + def pop(self, poll_id: str) -> None: + with self._lock: + self._pending.pop(poll_id, None) + + +async def _maybe_await(value: Any) -> Any: + if inspect.isawaitable(value): + return await value + return value + + +def _pending_response(detail: Optional[str] = None) -> dict[str, Any]: + response: dict[str, Any] = {"status": "pending"} + if detail: + response["detail"] = detail + return response + + +def create_device_flow_router( + *, + prefix: str, + tags: Iterable[str], + store: PendingDeviceFlowStore, + start_flow: Callable[[Request, Mapping[str, Any]], DeviceFlowStart], + poll_flow: Callable[[Request, Mapping[str, Any]], DeviceFlowPoll], +) -> APIRouter: + """Create standard `/device/start|poll|cancel` routes for a provider.""" + + router = APIRouter(prefix=prefix, tags=list(tags)) + + @router.post("/device/start") + async def device_start(request: Request): + require_admin(request) + form = await request.form() + start = await _maybe_await(start_flow(request, form)) + interval = int(start.interval or 5) + expires_in = int(start.expires_in or 900) + poll_id = store.add(start.pending, interval=interval, expires_in=expires_in) + response = dict(start.response) + response.update({"poll_id": poll_id, "interval": interval, "expires_in": expires_in}) + return response + + @router.post("/device/poll") + async def device_poll(request: Request, poll_id: str = Form(...)): + require_admin(request) + payload = store.get_payload(poll_id) + if payload is None: + raise HTTPException(404, "Unknown or expired login session") + if store.is_throttled(poll_id): + return {"status": "pending"} + + try: + outcome = await _maybe_await(poll_flow(request, payload)) + except Exception: + store.pop(poll_id) + raise + + if outcome.status == "authorized": + store.pop(poll_id) + return {"status": "authorized", "endpoint": dict(outcome.endpoint or {})} + if outcome.status == "failed": + store.pop(poll_id) + return {"status": "failed", "error": outcome.error or "denied"} + if outcome.status == "slow_down": + store.slow_down(poll_id, outcome.interval) + return _pending_response(outcome.detail) + + store.schedule_next(poll_id) + return _pending_response(outcome.detail) + + @router.post("/device/cancel") + def device_cancel(request: Request, poll_id: str = Form(...)): + require_admin(request) + store.pop(poll_id) + return {"status": "cancelled"} + + return router diff --git a/routes/document_routes.py b/routes/document_routes.py index aef2a5f68..cb41108e0 100644 --- a/routes/document_routes.py +++ b/routes/document_routes.py @@ -7,14 +7,24 @@ from typing import Dict, Any, List, Optional from fastapi import APIRouter, HTTPException, Query, Request, UploadFile, File, Form -from sqlalchemy import func +from sqlalchemy import case, func, or_ from core.database import SessionLocal, Document, DocumentVersion from core.database import Session as DbSession from src.auth_helpers import get_current_user +from src.constants import MAIL_ATTACHMENTS_DIR logger = logging.getLogger(__name__) +def _get_session_or_404(db, session_id: str, user: Optional[str]): + session = db.query(DbSession).filter(DbSession.id == session_id).first() + if not session: + raise HTTPException(404, "Session not found") + if user and session.owner != user: + raise HTTPException(404, "Session not found") + return session + + def _aggregate_language_facets(lang_rows): """Sum document counts per display language for the library facet. @@ -30,6 +40,19 @@ def _aggregate_language_facets(lang_rows): return out +def _library_language_for_document(doc: Document) -> str: + """Return the display language used by the document library. + + PDF documents are stored as markdown wrappers so the editor can preserve + extracted text, form fields, and annotations. The library should still + identify them as PDFs instead of exposing that internal wrapper format. + """ + from src.pdf_form_doc import find_source_upload_id + + if find_source_upload_id(doc.current_content or ""): + return "pdf" + return doc.language or "text" + from routes.document_helpers import ( DocumentCreate, DocumentUpdate, DocumentPatch, @@ -69,17 +92,12 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter: # the doc is owner-stamped, so it lives in the library on its own. session = None if req.session_id: - session = db.query(DbSession).filter(DbSession.id == req.session_id).first() - if not session: - raise HTTPException(404, "Session not found") # Match the lenient ownership model the rest of the app uses # (see _owner_filter): only block when an AUTHENTICATED user is # writing into a DIFFERENT user's session. In single-user / - # unconfigured / localhost-bypass mode the middleware leaves - # current_user unset (None), and those sessions are already - # served freely everywhere else. - if user and session.owner and session.owner != user: - raise HTTPException(403, "Cannot create document in another user's session") + # unconfigured / localhost-bypass mode, falsey users preserve + # the existing lenient path. + session = _get_session_or_404(db, req.session_id, user) doc_id = str(uuid.uuid4()) ver_id = str(uuid.uuid4()) @@ -171,11 +189,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter: if session_id: db = SessionLocal() try: - sess = db.query(DbSession).filter(DbSession.id == session_id).first() - if not sess: - raise HTTPException(404, "Session not found") - if user and sess.owner and sess.owner != user: - raise HTTPException(403, "Cannot import into another user's session") + _get_session_or_404(db, session_id, user) finally: db.close() @@ -198,7 +212,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter: title = os.path.splitext(meta.get("original_name") or meta.get("name") or upload_id)[0] try: - body_text = strip_pdf_content_marker(_process_pdf(pdf_path)) + body_text = strip_pdf_content_marker(_process_pdf(pdf_path, owner=user)) except Exception: body_text = None @@ -260,18 +274,29 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter: db = SessionLocal() try: from sqlalchemy import or_ + pdf_marker_cond = or_( + Document.current_content.like('%" + output += "\n\n" return {"output": output, "exit_code": 0} if tool == "web_fetch": @@ -1036,10 +1053,10 @@ async def _direct_fallback( # Accept either a JSON arg ({"url": "..."}) or a plain URL/domain. if raw.startswith("{"): try: - parsed = _json.loads(raw) + parsed = json.loads(raw) if isinstance(parsed, dict): url = str(parsed.get("url") or "").strip() - except _json.JSONDecodeError: + except json.JSONDecodeError: url = "" if not url: # Non-JSON (or JSON without a usable url): take the first line @@ -1101,6 +1118,7 @@ async def execute_tool_block( block: Any, session_id: Optional[str] = None, disabled_tools: Optional[set] = None, + tool_policy: Optional[ToolPolicy] = None, owner: Optional[str] = None, progress_cb: Optional[Callable[[Dict], Awaitable[None]]] = None, workspace: Optional[str] = None, @@ -1137,8 +1155,7 @@ async def execute_tool_block( # Return a helpful error so the model retries with the correct format. if tool in ("python", "json", "xml") and content.strip().startswith("{") and content.strip().endswith("}"): try: - import json as _json - parsed = _json.loads(content.strip()) + parsed = json.loads(content.strip()) if isinstance(parsed, dict): desc = f"{tool}: misformatted tool call" result = { @@ -1160,6 +1177,12 @@ async def execute_tool_block( pass # Reject tools that the user has disabled for this request + if tool_policy and tool_policy.blocks(tool): + desc = f"{tool}: BLOCKED" + result = {"error": tool_policy.reason_for(tool), "exit_code": 1} + logger.info("Tool blocked by policy: %s", tool) + return desc, result + if disabled_tools and tool in disabled_tools: desc = f"{tool}: BLOCKED" result = {"error": f"Tool '{tool}' is disabled by user.", "exit_code": 1} @@ -1184,6 +1207,87 @@ async def execute_tool_block( logger.warning("Public tool policy blocked owner=%r tool=%s", owner, tool) return desc, result + # ask_user: the agent poses a multiple-choice question to the user to get a + # decision/clarification. This is a pure UI-control marker — no subprocess, + # no filesystem. It returns an `ask_user` payload that the agent loop turns + # into an `ask_user` SSE event and then ENDS the turn, so the chat waits for + # the user's selection (their choice arrives as the next message). + if tool == "ask_user": + question, options, multi = "", [], False + raw = (content or "").strip() + try: + parsed = json.loads(raw) if raw else {} + except (ValueError, TypeError): + parsed = {} + if isinstance(parsed, dict): + question = str(parsed.get("question", "")).strip() + multi = bool(parsed.get("multi") or parsed.get("multiSelect")) + for opt in (parsed.get("options") or []): + if isinstance(opt, dict): + label = str(opt.get("label", "")).strip() + descr = str(opt.get("description", "")).strip() + elif isinstance(opt, str): + label, descr = opt.strip(), "" + else: + continue + if label: + options.append({"label": label, "description": descr}) + else: + question = raw + if not question or len(options) < 2: + return "ask_user: invalid", { + "error": ( + "ask_user needs a non-empty `question` and at least 2 `options` " + "(each an object with a `label`, optional `description`)." + ), + "exit_code": 1, + } + options = options[:6] # keep the choice list sane + desc = f"ask_user: {question[:80]}" + labels = ", ".join(o["label"] for o in options) + result = { + "ask_user": {"question": question, "options": options, "multi": multi}, + "output": f"Asked the user: {question}\nOptions: {labels}\nAwaiting their selection.", + "exit_code": 0, + } + logger.info("Tool executed: %s (%d options, multi=%s)", desc, len(options), multi) + return desc, result + + # update_plan: the agent writes back to the active plan — tick an item done + # or revise steps (e.g. when the user asks to change something). Pure UI + # marker: returns a `plan_update` payload the agent loop turns into a + # `plan_update` SSE event; the frontend replaces the stored plan and refreshes + # the docked plan window. Does NOT end the turn. + if tool == "update_plan": + import json as _json + raw = (content or "").strip() + plan = "" + try: + parsed = _json.loads(raw) if raw else {} + except (ValueError, TypeError): + parsed = {} + if isinstance(parsed, dict) and parsed.get("plan"): + plan = str(parsed.get("plan", "")).strip() + else: + # Plain-string call (raw checklist) or JSON without a usable `plan`. + plan = raw + if not plan: + return "update_plan: invalid", { + "error": "update_plan needs a non-empty `plan` (the full updated checklist as markdown).", + "exit_code": 1, + } + plan = plan[:8192] + done = plan.count("- [x]") + plan.count("- [X]") + total = done + plan.count("- [ ]") + desc = f"update_plan: {done}/{total} done" if total else "update_plan" + result = { + "plan_update": {"plan": plan}, + "output": f"Plan updated ({done}/{total} steps complete)." if total else "Plan updated.", + "exit_code": 0, + } + logger.info("Tool executed: %s", desc) + return desc, result + # Background execution: a `bash` block whose first line is the `#!bg` # marker runs DETACHED — returns a job id immediately so the chat stream # isn't held open for a multi-minute install/ffmpeg/download. The always-on diff --git a/src/tool_implementations.py b/src/tool_implementations.py index dbaf50c2d..548f6f0f5 100644 --- a/src/tool_implementations.py +++ b/src/tool_implementations.py @@ -12,19 +12,9 @@ import os import re from typing import Any, Dict, List, Optional -MAX_OUTPUT_CHARS = 10_000 -MAX_READ_CHARS = 20_000 - - -def get_mcp_manager(): - from src import agent_tools - return agent_tools.get_mcp_manager() - - -def _truncate(text: str, limit: int = MAX_OUTPUT_CHARS) -> str: - if len(text) > limit: - return text[:limit] + f"\n... (truncated, {len(text)} chars total)" - return text +from src.constants import MAX_READ_CHARS, DEEP_RESEARCH_DIR, VAULT_FILE +from src.tool_utils import get_mcp_manager +from core.constants import internal_api_base logger = logging.getLogger(__name__) @@ -549,7 +539,7 @@ async def do_suggest_document(content: str, doc_id: str = None, owner: Optional[ # --------------------------------------------------------------------------- async def do_search_chats(query: str, limit: int = 20, owner: str | None = None) -> Dict: - """Search past chat messages for the calling user's sessions only. + """Search past session transcripts for the calling user's sessions only. Without an owner filter this used to leak EVERY user's chat history into the agent's `search_chats` results (v2 review HIGH-11). The @@ -557,63 +547,36 @@ async def do_search_chats(query: str, limit: int = 20, owner: str | None = None) through; legacy callers without owner pass through as before but will only see legacy/null-owner rows. """ - from src.database import SessionLocal, ChatMessage as DBChatMessage, Session as DBSession - # Escape LIKE wildcards in the user-supplied query so a stray % or _ - # doesn't widen the match (and to keep the response deterministic). - safe_q = query.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_") - db = SessionLocal() try: - q = ( - db.query(DBChatMessage, DBSession.id, DBSession.name) - .join(DBSession, DBChatMessage.session_id == DBSession.id) - .filter( - DBSession.archived == False, - DBChatMessage.content.ilike(f"%{safe_q}%", escape="\\"), - DBChatMessage.role.in_(["user", "assistant"]), - ) - ) - if owner is not None: - # Restrict to this user's sessions plus legacy null-owner - # rows (so single-user upgrades keep seeing their own data). - q = q.filter((DBSession.owner == owner) | (DBSession.owner.is_(None))) - rows = q.order_by(DBChatMessage.timestamp.desc()).limit(limit).all() + from src.session_search import search_session_messages - if not rows: + results = search_session_messages(query, limit=limit, owner=owner) + if not results: return {"results": f"No chats found matching \"{query}\"."} # Group by session to avoid duplicate links seen_sessions = {} - for msg, session_id, session_name in rows: - if session_id not in seen_sessions: - content = msg.content or "" - lower_content = content.lower() - idx = lower_content.find(query.lower()) - if idx == -1: - snippet = content[:150] - else: - start = max(0, idx - 60) - end = min(len(content), idx + len(query) + 60) - snippet = ("..." if start > 0 else "") + content[start:end] + ("..." if end < len(content) else "") - seen_sessions[session_id] = { - "name": session_name or "Untitled", - "snippet": snippet, - "role": msg.role, - "timestamp": msg.timestamp.isoformat() if msg.timestamp else None, - } + for result in results: + if result.session_id not in seen_sessions: + seen_sessions[result.session_id] = result lines = [f"Found {len(seen_sessions)} session(s) matching \"{query}\":\n"] - for sid, info in seen_sessions.items(): - lines.append(f"- **{info['name']}** (#{sid})") + for sid, result in seen_sessions.items(): + lines.append(f"- **{result.session_name}** (#{sid})") lines.append(f" Link: [Open chat](#{sid})") - lines.append(f" > {info['snippet']}") + lines.append(f" Match ({result.role}): {result.content_snippet}") + if result.context_before: + before = result.context_before[-1] + lines.append(f" Before ({before['role']}): {before['content'][:180]}") + if result.context_after: + after = result.context_after[0] + lines.append(f" After ({after['role']}): {after['content'][:180]}") lines.append("") return {"results": "\n".join(lines)} except Exception as e: logger.error(f"search_chats failed: {e}") return {"error": str(e), "exit_code": 1} - finally: - db.close() # --------------------------------------------------------------------------- @@ -1566,6 +1529,8 @@ async def do_manage_settings(content: str, owner: Optional[str] = None) -> Dict: "image gen": "image_gen_enabled", "image generation": "image_gen_enabled", "reminder channel": "reminder_channel", "reminders": "reminder_channel", "ntfy topic": "reminder_ntfy_topic", + "webhook integration": "reminder_webhook_integration_id", + "webhook template": "reminder_webhook_payload_template", "webhook payload": "reminder_webhook_payload_template", "agent tool calls": "agent_max_tool_calls", "max tool calls": "agent_max_tool_calls", "agent timeout": "agent_stream_timeout_seconds", "stream timeout": "agent_stream_timeout_seconds", "token budget": "agent_input_token_budget", "input budget": "agent_input_token_budget", @@ -1581,7 +1546,7 @@ async def do_manage_settings(content: str, owner: Optional[str] = None) -> Dict: _ENUMS = { "image_quality": ["low", "medium", "high"], - "reminder_channel": ["browser", "email", "ntfy"], + "reminder_channel": ["browser", "email", "ntfy", "webhook"], } def _coerce(value, default): if isinstance(default, bool): @@ -1854,6 +1819,22 @@ async def do_manage_notes(content: str, owner: Optional[str] = None) -> Dict: text = re.sub(r"^\s*reminder\s*:\s*", "", text) return re.sub(r"\s+", " ", text) + def _note_visible_to_owner(note, owner_value: Optional[str]) -> bool: + # Empty owner_value is single-user / auth-disabled mode. A real + # authenticated owner must match exactly; null/empty legacy rows are not + # shared between accounts. + if not owner_value: + return True + return getattr(note, "owner", None) == owner_value + + def _note_by_prefix(note_id: str): + if not note_id: + return None + q = db.query(Note).filter(Note.id.startswith(note_id)) + if owner: + q = q.filter(Note.owner == owner) + return q.first() + try: if action == "list": q = db.query(Note) @@ -1973,10 +1954,10 @@ async def do_manage_notes(content: str, owner: Optional[str] = None) -> Dict: elif action == "update": note_id = args.get("id", "") - note = db.query(Note).filter(Note.id.startswith(note_id)).first() if note_id else None + note = _note_by_prefix(note_id) if not note: return {"error": f"Note '{note_id}' not found", "exit_code": 1} - if owner is not None and note.owner and note.owner != owner: + if not _note_visible_to_owner(note, owner): return {"error": "Note not found", "exit_code": 1} for field in ("title", "content", "note_type", "color", "label"): if field in args and args[field] is not None: @@ -2009,10 +1990,10 @@ async def do_manage_notes(content: str, owner: Optional[str] = None) -> Dict: elif action == "delete": note_id = args.get("id", "") - note = db.query(Note).filter(Note.id.startswith(note_id)).first() if note_id else None + note = _note_by_prefix(note_id) if not note: return {"error": f"Note '{note_id}' not found", "exit_code": 1} - if owner is not None and note.owner and note.owner != owner: + if not _note_visible_to_owner(note, owner): return {"error": "Note not found", "exit_code": 1} title = note.title db.delete(note) @@ -2022,10 +2003,10 @@ async def do_manage_notes(content: str, owner: Optional[str] = None) -> Dict: elif action == "toggle_item": note_id = args.get("id", "") index = args.get("index", 0) - note = db.query(Note).filter(Note.id.startswith(note_id)).first() if note_id else None + note = _note_by_prefix(note_id) if not note: return {"error": f"Note '{note_id}' not found", "exit_code": 1} - if owner is not None and note.owner and note.owner != owner: + if not _note_visible_to_owner(note, owner): return {"error": "Note not found", "exit_code": 1} if not note.items: return {"error": "Note has no checklist items", "exit_code": 1} @@ -2137,6 +2118,13 @@ async def do_manage_calendar(content: str, owner: Optional[str] = None) -> Dict: """Parse agent event datetimes in the user's timezone when available.""" return _parse_dt_pair(parse_due_for_user(raw)) + def _first_nonempty_arg(*names: str): + for name in names: + value = args.get(name) + if value not in (None, ""): + return value + return None + def _create_calendar_reminder(summary: str, location: str, dtstart: datetime, all_day: bool, minutes_before: int, is_utc: bool = False) -> tuple[Optional[str], Optional[str]]: @@ -2194,12 +2182,18 @@ async def do_manage_calendar(content: str, owner: Optional[str] = None) -> Dict: elif action == "list_events": try: - if args.get("start"): - start_dt = _parse_dt(args["start"]) + start_raw = _first_nonempty_arg( + "start", "start_date", "range_start", "from", "dtstart", "since" + ) + end_raw = _first_nonempty_arg( + "end", "end_date", "range_end", "to", "dtend", "until" + ) + if start_raw: + start_dt = _parse_dt(start_raw) else: start_dt = datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0) - if args.get("end"): - end_dt = _parse_dt(args["end"]) + if end_raw: + end_dt = _parse_dt(end_raw) else: end_dt = start_dt + timedelta(days=14) except ValueError as e: @@ -2489,10 +2483,12 @@ async def do_manage_calendar(content: str, owner: Optional[str] = None) -> Dict: # ── Cookbook tools ── -# Cookbook routes loopback. The agent's tool calls run in-process but -# need to reach admin-gated cookbook routes; we ride the per-process -# internal token so require_admin lets us through. See core/middleware.py. -_COOKBOOK_BASE = "http://localhost:7000" +# In-process loopback base for agent tools that call Odysseus's own API +# (cookbook state, model serve, gallery, email, calendar). We ride the +# per-process internal token so require_admin lets us through. See +# core/middleware.py. Resolution (override / APP_PORT / 7000) lives in +# core.constants.internal_api_base(). +_INTERNAL_BASE = internal_api_base() def _internal_headers(owner: Optional[str] = None) -> Dict[str, str]: @@ -2511,7 +2507,7 @@ async def _cookbook_servers() -> Dict[str, Any]: import httpx try: async with httpx.AsyncClient(timeout=10) as client: - r = await client.get(f"{_COOKBOOK_BASE}/api/cookbook/state", headers=_internal_headers()) + r = await client.get(f"{_INTERNAL_BASE}/api/cookbook/state", headers=_internal_headers()) state = r.json() if r.headers.get("content-type", "").startswith("application/json") else {} except Exception: return {"default_host": "", "hosts": []} @@ -2577,7 +2573,7 @@ async def _cookbook_env_for_host(host: str) -> Dict[str, Any]: state: Dict[str, Any] = {} try: async with httpx.AsyncClient(timeout=10) as client: - r = await client.get(f"{_COOKBOOK_BASE}/api/cookbook/state", headers=headers) + r = await client.get(f"{_INTERNAL_BASE}/api/cookbook/state", headers=headers) state = r.json() if r.headers.get("content-type", "").startswith("application/json") else {} except Exception as e: logger.debug(f"cookbook env lookup failed for host={host!r}: {e}") @@ -2637,7 +2633,7 @@ async def _cookbook_register_task(session_id: str, model: str, host: str, headers = _internal_headers() try: async with httpx.AsyncClient(timeout=10) as client: - r = await client.get(f"{_COOKBOOK_BASE}/api/cookbook/state", headers=headers) + r = await client.get(f"{_INTERNAL_BASE}/api/cookbook/state", headers=headers) state = r.json() if r.headers.get("content-type", "").startswith("application/json") else {} except Exception as e: logger.debug(f"cookbook state read failed: {e}") @@ -2659,7 +2655,7 @@ async def _cookbook_register_task(session_id: str, model: str, host: str, placeholder = ( f"Launched via agent — waiting for tmux output…\n" f" session: {session_id}\n" - f" target: {target}{cmd.split()[0] if cmd else ''}\n" + f" target: {target}{(cmd.split() or [''])[0] if cmd else ''}\n" f" cmd: {cmd[:200]}{'…' if len(cmd) > 200 else ''}" ) tasks.append({ @@ -2681,7 +2677,7 @@ async def _cookbook_register_task(session_id: str, model: str, host: str, state["tasks"] = tasks try: async with httpx.AsyncClient(timeout=10) as client: - r = await client.post(f"{_COOKBOOK_BASE}/api/cookbook/state", + r = await client.post(f"{_INTERNAL_BASE}/api/cookbook/state", json=state, headers=headers) return r.status_code < 400 except Exception as e: @@ -2690,26 +2686,32 @@ async def _cookbook_register_task(session_id: str, model: str, host: str, # Paths the generic `app_api` tool will refuse to call. Auth/token/user -# administration is too risky to route through an agent surface even -# when the agent is admin-context — accidental "delete account" -# style mistakes have permanent blast radius. +# administration and host shell execution are too risky to route through an +# agent surface even when the agent is admin-context; accidental account or +# command mistakes have permanent blast radius. _APP_API_BLOCKLIST_PREFIXES = ( "/api/auth", # login/logout/password "/api/users", # user CRUD (bare /api/users list+create+delete must also block) "/api/tokens", # api token mgmt (bare /api/tokens list+create must also block) "/api/admin", # admin one-shots (wipe etc.) + "/api/shell", # host shell execution must stay behind named command tooling "/api/backup/restore", # destructive restore ) # (method, prefix) pairs to refuse specifically. Used for endpoints -# where GET is fine but writes are destructive — saw the agent wipe -# cookbook_state.json (presets + tasks) by POSTing {"tasks": []} to -# /api/cookbook/state, which overwrote the whole file. Use the -# dedicated preset/task tools instead. +# where GET is fine but writes are destructive or host-control shaped. +# Saw the agent wipe cookbook_state.json (presets + tasks) by POSTing +# {"tasks": []} to /api/cookbook/state, which overwrote the whole file. +# Use dedicated tools or UI flows instead. _APP_API_BLOCKLIST_METHOD_PATH = ( ("GET", "/api/email/accounts"), # owner-filtered in tool context; use list_email_accounts MCP tool ("POST", "/api/cookbook/state"), # whole-file overwrite — agent must use serve_preset/serve_model instead ("DELETE", "/api/cookbook/state"), + # Host-control routes: package install, engine rebuild, and process + # signalling should not be reachable through the generic API bridge. + ("POST", "/api/cookbook/packages/install"), + ("POST", "/api/cookbook/rebuild-engine"), + ("POST", "/api/cookbook/kill-pid"), # Use the named tools (download_model / serve_model) — they handle # host-name resolution, per-host env_prefix, AND register the task # in cookbook state so it shows in the UI + list_downloads. Hitting @@ -2734,7 +2736,7 @@ _APP_API_BLOCKLIST_METHOD_PATH = ( async def do_app_api(content: str, owner: Optional[str] = None) -> Dict: - """Generic loopback to any internal Odysseus API endpoint. Lets the + """Generic loopback to allowed internal Odysseus API endpoints. Lets the agent reach the full UI-button surface (cookbook, email, notes, calendar, skills, sessions, gallery, research, etc.) without us landing a named tool wrapper for every one. @@ -2748,7 +2750,8 @@ async def do_app_api(content: str, owner: Optional[str] = None) -> Dict: The `endpoints` action returns the OpenAPI surface (method + path + summary) so the agent can discover what's reachable. A blocklist - refuses auth/user/admin paths to keep blast radius bounded. + refuses sensitive auth/user/admin/shell paths and method-specific + host-control routes to keep blast radius bounded. """ import httpx try: @@ -2757,7 +2760,7 @@ async def do_app_api(content: str, owner: Optional[str] = None) -> Dict: return {"error": "Invalid JSON arguments", "exit_code": 1} action = (args.get("action") or "call").lower() - base = _COOKBOOK_BASE + base = _INTERNAL_BASE if action == "endpoints": # Fetch FastAPI's OpenAPI schema so the agent can discover any @@ -2808,7 +2811,7 @@ async def do_app_api(content: str, owner: Optional[str] = None) -> Dict: if not path.startswith("/"): path = "/" + path if any(path.startswith(p) for p in _APP_API_BLOCKLIST_PREFIXES): - return {"error": f"Path blocked for safety: {path}. Auth/user/admin endpoints are off-limits via app_api.", "exit_code": 1} + return {"error": f"Path blocked for safety: {path}. Sensitive endpoints are off-limits via app_api.", "exit_code": 1} method = (args.get("method") or "GET").upper() if method not in ("GET", "POST", "PUT", "PATCH", "DELETE"): @@ -2816,6 +2819,12 @@ async def do_app_api(content: str, owner: Optional[str] = None) -> Dict: if any(method == m and path.startswith(p) for m, p in _APP_API_BLOCKLIST_METHOD_PATH): if "/api/email/accounts" in path: return {"error": "Don't use /api/email/accounts via app_api — it is owner-filtered in tool context and may return empty. Use the `list_email_accounts` email tool, then pass `account` to list_emails/read_email.", "exit_code": 1} + if "/api/cookbook/packages/install" in path: + return {"error": "Don't POST /api/cookbook/packages/install via app_api — package installation is host code execution. Use the dedicated Cookbook dependency UI/flow instead.", "exit_code": 1} + if "/api/cookbook/rebuild-engine" in path: + return {"error": "Don't POST /api/cookbook/rebuild-engine via app_api — engine rebuild mutates local or remote host state. Use the dedicated Cookbook UI/flow instead.", "exit_code": 1} + if "/api/cookbook/kill-pid" in path: + return {"error": "Don't POST /api/cookbook/kill-pid via app_api — process signalling is host control. Use the dedicated Cookbook stop/diagnostic flow instead.", "exit_code": 1} if "/api/model/download" in path: return {"error": "Don't POST /api/model/download directly — use the `download_model` tool (it resolves the server name, sets the venv env_prefix, and registers the task so it shows in the UI).", "exit_code": 1} if "/api/model/serve" in path: @@ -3012,7 +3021,7 @@ async def do_download_model(content: str, owner: Optional[str] = None) -> Dict: if env_cfg.get("ssh_port"): payload["ssh_port"] = env_cfg["ssh_port"] try: async with httpx.AsyncClient(timeout=30) as client: - resp = await client.post(f"{_COOKBOOK_BASE}/api/model/download", + resp = await client.post(f"{_INTERNAL_BASE}/api/model/download", json=payload, headers=_internal_headers()) data = resp.json() if data.get("ok"): @@ -3088,7 +3097,7 @@ async def do_serve_model(content: str, owner: Optional[str] = None) -> Dict: if env_cfg.get("ssh_port"): payload["ssh_port"] = env_cfg["ssh_port"] try: async with httpx.AsyncClient(timeout=30) as client: - resp = await client.post(f"{_COOKBOOK_BASE}/api/model/serve", + resp = await client.post(f"{_INTERNAL_BASE}/api/model/serve", json=payload, headers=_internal_headers()) data = resp.json() if data.get("ok"): @@ -3128,7 +3137,7 @@ async def do_list_served_models(content: str, owner: Optional[str] = None) -> Di cookbook_tasks: List[Dict[str, Any]] = [] try: async with httpx.AsyncClient(timeout=15) as client: - resp = await client.get(f"{_COOKBOOK_BASE}/api/cookbook/tasks/status", + resp = await client.get(f"{_INTERNAL_BASE}/api/cookbook/tasks/status", headers=_internal_headers()) cookbook_tasks = (resp.json() or {}).get("tasks") or [] except Exception as e: @@ -3247,7 +3256,7 @@ async def _cookbook_kill_session(session_id: str, *, remote_host: str = "", state: Dict[str, Any] = {} try: async with httpx.AsyncClient(timeout=10) as client: - resp = await client.get(f"{_COOKBOOK_BASE}/api/cookbook/state", headers=headers) + resp = await client.get(f"{_INTERNAL_BASE}/api/cookbook/state", headers=headers) state = resp.json() or {} except Exception as e: logger.debug(f"cookbook state lookup failed for {session_id}: {e}") @@ -3276,7 +3285,7 @@ async def _cookbook_kill_session(session_id: str, *, remote_host: str = "", try: async with httpx.AsyncClient(timeout=15) as client: - resp = await client.post(f"{_COOKBOOK_BASE}/api/shell/exec", + resp = await client.post(f"{_INTERNAL_BASE}/api/shell/exec", json={"command": cmd}, headers=headers) if resp.status_code >= 400: return {"error": f"shell/exec returned HTTP {resp.status_code}: {resp.text[:200]}", "exit_code": 1} @@ -3297,7 +3306,7 @@ async def _cookbook_kill_session(session_id: str, *, remote_host: str = "", try: matched["status"] = "stopped" async with httpx.AsyncClient(timeout=10) as client: - await client.post(f"{_COOKBOOK_BASE}/api/cookbook/state", + await client.post(f"{_INTERNAL_BASE}/api/cookbook/state", json=state, headers=headers) except Exception as e: logger.debug(f"failed to mark {session_id} stopped in state: {e}") @@ -3360,7 +3369,7 @@ async def do_tail_serve_output(content: str, owner: Optional[str] = None) -> Dic state: Dict[str, Any] = {} try: async with httpx.AsyncClient(timeout=10) as client: - resp = await client.get(f"{_COOKBOOK_BASE}/api/cookbook/state", headers=headers) + resp = await client.get(f"{_INTERNAL_BASE}/api/cookbook/state", headers=headers) state = resp.json() or {} except Exception as e: logger.debug(f"cookbook state lookup failed for {session_id}: {e}") @@ -3398,7 +3407,7 @@ async def do_tail_serve_output(content: str, owner: Optional[str] = None) -> Dic host_label = "local" try: async with httpx.AsyncClient(timeout=20) as client: - resp = await client.post(f"{_COOKBOOK_BASE}/api/shell/exec", + resp = await client.post(f"{_INTERNAL_BASE}/api/shell/exec", json={"command": cmd}, headers=headers) if resp.status_code >= 400: return {"error": f"shell/exec returned HTTP {resp.status_code}: {resp.text[:200]}", "exit_code": 1} @@ -3449,7 +3458,7 @@ async def do_list_downloads(content: str, owner: Optional[str] = None) -> Dict: import httpx try: async with httpx.AsyncClient(timeout=15) as client: - resp = await client.get(f"{_COOKBOOK_BASE}/api/cookbook/tasks/status", + resp = await client.get(f"{_INTERNAL_BASE}/api/cookbook/tasks/status", headers=_internal_headers()) data = resp.json() tasks = [t for t in data.get("tasks", []) if (t.get("type") or "").lower() == "download"] @@ -3500,7 +3509,7 @@ async def do_search_hf_models(content: str, owner: Optional[str] = None) -> Dict params["limit"] = str(limit) try: async with httpx.AsyncClient(timeout=30) as client: - resp = await client.get(f"{_COOKBOOK_BASE}/api/cookbook/hf-latest", + resp = await client.get(f"{_INTERNAL_BASE}/api/cookbook/hf-latest", params=params, headers=_internal_headers()) data = resp.json() models = data.get("models") if isinstance(data, dict) else data @@ -3566,7 +3575,7 @@ async def do_adopt_served_model(content: str, owner: Optional[str] = None) -> Di check = f"tmux has-session -t {shlex.quote(sess)} 2>&1" try: async with httpx.AsyncClient(timeout=10) as client: - r = await client.post(f"{_COOKBOOK_BASE}/api/shell/exec", + r = await client.post(f"{_INTERNAL_BASE}/api/shell/exec", json={"command": check}, headers=headers) data = r.json() if r.headers.get("content-type", "").startswith("application/json") else {} if r.status_code >= 400 or (data.get("exit_code") not in (None, 0)): @@ -3583,7 +3592,7 @@ async def do_adopt_served_model(content: str, owner: Optional[str] = None) -> Di server_up = False try: async with httpx.AsyncClient(timeout=10) as client: - r = await client.post(f"{_COOKBOOK_BASE}/api/shell/exec", + r = await client.post(f"{_INTERNAL_BASE}/api/shell/exec", json={"command": health_cmd}, headers=headers) body = (r.json() or {}).get("stdout", "") if r.headers.get("content-type", "").startswith("application/json") else "" server_up = '"data"' in body or '"object"' in body @@ -3594,7 +3603,7 @@ async def do_adopt_served_model(content: str, owner: Optional[str] = None) -> Di # overwrite the whole file (that'd nuke presets). try: async with httpx.AsyncClient(timeout=10) as client: - r = await client.get(f"{_COOKBOOK_BASE}/api/cookbook/state", headers=headers) + r = await client.get(f"{_INTERNAL_BASE}/api/cookbook/state", headers=headers) state = r.json() if r.headers.get("content-type", "").startswith("application/json") else {} except Exception as e: return {"error": f"could not read cookbook state: {e}", "exit_code": 1} @@ -3630,7 +3639,7 @@ async def do_adopt_served_model(content: str, owner: Optional[str] = None) -> Di state["tasks"] = tasks try: async with httpx.AsyncClient(timeout=10) as client: - await client.post(f"{_COOKBOOK_BASE}/api/cookbook/state", + await client.post(f"{_INTERNAL_BASE}/api/cookbook/state", json=state, headers=headers) except Exception as e: return {"error": f"could not save cookbook state: {e}", "exit_code": 1} @@ -3707,7 +3716,7 @@ async def do_list_serve_presets(content: str, owner: Optional[str] = None) -> Di import httpx try: async with httpx.AsyncClient(timeout=10) as client: - resp = await client.get(f"{_COOKBOOK_BASE}/api/cookbook/state", + resp = await client.get(f"{_INTERNAL_BASE}/api/cookbook/state", headers=_internal_headers()) state = resp.json() or {} except Exception as e: @@ -3755,7 +3764,7 @@ async def do_serve_preset(content: str, owner: Optional[str] = None) -> Dict: try: async with httpx.AsyncClient(timeout=10) as client: - resp = await client.get(f"{_COOKBOOK_BASE}/api/cookbook/state", + resp = await client.get(f"{_INTERNAL_BASE}/api/cookbook/state", headers=_internal_headers()) state = resp.json() or {} except Exception as e: @@ -3799,7 +3808,7 @@ async def do_serve_preset(content: str, owner: Optional[str] = None) -> Dict: try: async with httpx.AsyncClient(timeout=30) as client: - resp = await client.post(f"{_COOKBOOK_BASE}/api/model/serve", + resp = await client.post(f"{_INTERNAL_BASE}/api/model/serve", json=payload, headers=_internal_headers()) data = resp.json() if data.get("ok"): @@ -3851,7 +3860,7 @@ async def do_list_cached_models(content: str, owner: Optional[str] = None) -> Di p["platform"] = args["platform"] try: async with httpx.AsyncClient(timeout=60) as client: - resp = await client.get(f"{_COOKBOOK_BASE}/api/model/cached", + resp = await client.get(f"{_INTERNAL_BASE}/api/model/cached", params=p, headers=headers) data = resp.json() ms = data.get("models", []) if isinstance(data, dict) else (data or []) @@ -3871,7 +3880,7 @@ async def do_list_cached_models(content: str, owner: Optional[str] = None) -> Di servers: list = [] try: async with httpx.AsyncClient(timeout=10) as client: - st = await client.get(f"{_COOKBOOK_BASE}/api/cookbook/state", headers=headers) + st = await client.get(f"{_INTERNAL_BASE}/api/cookbook/state", headers=headers) st_data = st.json() if st.headers.get("content-type", "").startswith("application/json") else {} servers = (st_data.get("env", {}) or {}).get("servers") or [] except Exception as e: @@ -3942,7 +3951,7 @@ async def do_list_cached_models(content: str, owner: Optional[str] = None) -> Di downloaded = [] try: async with httpx.AsyncClient(timeout=10) as client: - st = await client.get(f"{_COOKBOOK_BASE}/api/cookbook/state", headers=headers) + st = await client.get(f"{_INTERNAL_BASE}/api/cookbook/state", headers=headers) state = st.json() if st.headers.get("content-type", "").startswith("application/json") else {} for t in (state.get("tasks") or []): if not isinstance(t, dict) or t.get("type") != "download": @@ -4013,7 +4022,7 @@ async def do_edit_image(content: str, owner: Optional[str] = None) -> Dict: payload["scale"] = args["scale"] try: async with httpx.AsyncClient(timeout=120) as client: - resp = await client.post(f"http://localhost:7000/api/gallery/{action}", json=payload) + resp = await client.post(f"{_INTERNAL_BASE}/api/gallery/{action}", json=payload) data = resp.json() if data.get("success") or data.get("id"): return {"output": f"Image edited ({action}). New image ID: {data.get('id', '?')}", "exit_code": 0} @@ -4038,7 +4047,7 @@ async def do_manage_research(content: str, owner: Optional[str] = None) -> Dict: args = {} action = (args.get("action") or "list").lower() rid = (args.get("id") or args.get("session_id") or args.get("research_id") or "").strip() - data_dir = _Path("data/deep_research") + data_dir = _Path(DEEP_RESEARCH_DIR) # SECURITY: the research id is interpolated straight into a filesystem # path (data/deep_research/.json) for read AND delete. Without this @@ -4129,7 +4138,7 @@ async def do_trigger_research(content: str, owner: Optional[str] = None) -> Dict payload["search_provider"] = args["search_provider"] try: async with httpx.AsyncClient(timeout=30) as client: - resp = await client.post(f"{_COOKBOOK_BASE}/api/research/start", + resp = await client.post(f"{_INTERNAL_BASE}/api/research/start", json=payload, headers=_internal_headers(owner)) if resp.status_code >= 400: return {"error": f"research/start returned HTTP {resp.status_code}: {resp.text[:200]}", "exit_code": 1} @@ -4189,7 +4198,7 @@ async def do_resolve_contact(content: str, owner: Optional[str] = None) -> Dict: async with httpx.AsyncClient(timeout=30) as client: # 2. Email history (sent/received) try: - resp = await client.get("http://localhost:7000/api/email/resolve-contact", params={"name": name}) + resp = await client.get(f"{_INTERNAL_BASE}/api/email/resolve-contact", params={"name": name}) if resp.status_code == 200: for c in (resp.json().get("contacts") or []): email = (c.get("email") or "").strip().lower() @@ -4283,7 +4292,7 @@ async def do_manage_contact(content: str, owner: Optional[str] = None) -> Dict: def _load_vault_config() -> Dict: """Load Vaultwarden config from data/vault.json.""" from pathlib import Path - p = Path("data/vault.json") + p = Path(VAULT_FILE) if p.exists(): try: return json.loads(p.read_text(encoding="utf-8")) @@ -4437,7 +4446,7 @@ async def do_vault_unlock(content: str, owner: Optional[str] = None) -> Dict: # Save session to vault.json from pathlib import Path - p = Path("data/vault.json") + p = Path(VAULT_FILE) cfg = {} if p.exists(): try: diff --git a/src/tool_index.py b/src/tool_index.py index 6d5f4572e..3f8010801 100644 --- a/src/tool_index.py +++ b/src/tool_index.py @@ -12,6 +12,14 @@ import re import time from typing import Dict, List, Optional, Set +from src.embedding_lanes import ( + LANE_CUSTOM, + LANE_FASTEMBED, + build_embedding_lanes, + dedupe_results, + migrate_legacy_collection, +) + try: import numpy as np except ImportError: @@ -20,34 +28,20 @@ except ImportError: logger = logging.getLogger(__name__) # Tools that are ALWAYS included regardless of retrieval results. -# These are the most commonly needed and should never be missing. +# Keep this deliberately tiny. Domain tools (web, documents, email, +# cookbook/model serving, files, settings, etc.) are injected by retrieval or +# keyword intent so a trivial agent prompt like "test" does not carry every +# domain's schemas and rules. ALWAYS_AVAILABLE = frozenset({ - "bash", "python", "web_search", "web_fetch", - # File tools: read AND write/edit. An agent with disk access should always - # be able to change files, not just read them — otherwise a bare "edit X" - # request can miss write_file/edit_file (RAG-only) and the model wrongly - # falls back to edit_document (editor panel). All admin-gated by tool_security. - "read_file", "write_file", "edit_file", - "grep", "glob", "ls", # code-navigation tools (admin-gated by tool_security) - "api_call", # For configured integrations (Miniflux, Gitea, Linkding, etc.) - # The two genuinely AMBIENT cookbook tools — "what's running" and - # "kill it" can be asked any time without prior cookbook context, - # and need to survive typos. The other cookbook tools (downloads, - # presets, serve, cached, servers) are CONTEXTUAL — they fire via - # keyword hints when the user is actually talking about cookbook. - # Keeping the always-on set small leaves room in the ~16-tool - # budget for manage_tasks / manage_calendar / etc. - "list_served_models", "stop_served_model", "tail_serve_output", - # Serving is a core agent capability — keep these always available so - # the router doesn't lose them on phrasings like "servic" / "fire up" / "boot". - "serve_model", "serve_preset", "list_serve_presets", - "list_cached_models", "list_cookbook_servers", - # Fallback when serve_model's allowlist rejects a cmd or when the - # model was launched out-of-band via bash+tmux — without this the - # session is invisible to the cookbook UI even though it's running. - "adopt_served_model", - # Generic API loopback — the catch-all when no named tool fits. - "app_api", + # Memory is ambient — "remember this" can follow any message regardless + # of topic. Without this, RAG drops it and the agent falls back to + # app_api /api/memory/add which fails with 422 on first attempt. + "manage_memory", + # Ask the user a multiple-choice question for a decision/clarification. + # Always reachable so the agent can pause and ask at any point. + "ask_user", + # Write back to the active plan (tick steps done / revise) during execution. + "update_plan", }) # Tools that the Personal Assistant always has access to during scheduled @@ -73,9 +67,9 @@ COLLECTION_NAME = "odysseus_tool_index" # Each tool gets a searchable description that helps retrieval. # These are richer than the system prompt one-liners — they're for embedding. BUILTIN_TOOL_DESCRIPTIONS: Dict[str, str] = { - "bash": "Run shell commands on the server. Install packages, check files, git operations, curl, system info, process management, networking.", - "python": "Execute Python code for computation, data processing, math, scripting, parsing, API calls. Not for writing code for the user.", - "web_search": "Quick single web lookup for a fact, current event, or doc mid-task. NOT for 'research X' / 'do research on X' requests — those are deep-research jobs (use trigger_research). web_search = one query; trigger_research = a full researched report in the sidebar.", + "bash": "Run shell commands on the server. Install packages, check files, git operations, system info, and process management. Do not use for web lookup/search; use web_search or web_fetch when web tools are available.", + "python": "Execute Python code for computation, data processing, math, scripting, and parsing. Not for writing code for the user. Do not use for web lookup/search; use web_search or web_fetch when web tools are available.", + "web_search": "Quick single web lookup for a fact, current event, latest/current information, or doc mid-task. Use this instead of bash/curl/python/requests for web searches. NOT for 'research X' / 'do research on X' requests — those are deep-research jobs (use trigger_research). web_search = one query; trigger_research = a full researched report in the sidebar.", "web_fetch": "Fetch and read the text content of a specific URL/website the user names (e.g. 'check example.com', 'open this link'). Use when you have a concrete URL; for open-ended lookups use web_search instead.", "read_file": "Read a file from disk and return its contents. View source code, config files, logs. Supports an optional line range (offset/limit) for large files.", "grep": "Search file CONTENTS for a regex across a directory tree (ripgrep-backed, honours .gitignore). Returns file:line:match. Use to find where code/symbols/strings live — prefer over bash grep.", @@ -106,7 +100,9 @@ BUILTIN_TOOL_DESCRIPTIONS: Dict[str, str] = { "create_session": "Create a new chat with a name and model.", "list_sessions": "List all chats with their metadata (the UI calls these 'chats'). Use for 'list my chats', 'rename all my chats' (list first, then manage_session to rename each).", "send_to_session": "Send a message to another chat. Cross-chat communication.", - "search_chats": "Search through chat history across all sessions.", + "search_chats": "Search past session transcripts across chats.", + "ask_user": "Ask the user a multiple-choice question to get a decision or clarification. Use this when the task is genuinely ambiguous and the answer changes what you do next — pick between approaches, confirm an assumption, choose among options — instead of guessing. Provide a clear `question` and 2-6 `options` (each with a short `label`, optional `description`). Calling this ENDS your turn: the user sees clickable buttons and their choice arrives as your next message. Don't use it for things you can decide from context or sensible defaults, or for irreversible-action confirmation if a dedicated flow exists.", + "update_plan": "Write back to the ACTIVE PLAN while executing an approved plan: mark steps done or revise them. After finishing a step call this with the full checklist and that step marked done; when the user asks to change the plan call it with the revised checklist. Always pass the COMPLETE markdown checklist (`- [ ]` / `- [x]`), not a diff. The user's docked plan window updates live. No effect when there is no active plan.", "ui_control": "Control the UI and toggle tools on/off. Use this to turn off / turn on / disable / enable individual tools and features: shell (bash), search (web), research, browser, documents, incognito. Open panels (documents library, gallery, email inbox, sessions, notes, memories/brain, skills, settings, cookbook) via `open_panel `. Use `open_email_reply reply` to open an email reply draft document without sending. Also switches between chat/agent modes, changes the current model, and applies/creates themes.", "list_email_accounts": "List configured email accounts and default status. Use before reading or sending mail when the user mentions Gmail, work mail, custom domain mail, another mailbox, or asks to compare/check multiple inboxes.", "list_emails": "List emails for a folder/account, newest first, including read messages by default. Shows subject, sender, date, UID, account, and AI summary. Check inbox, find emails needing replies. Supports account from list_email_accounts for Gmail/work/custom mailboxes. For last/latest/newest email, use max_results=1 and unread_only=false.", @@ -134,7 +130,7 @@ BUILTIN_TOOL_DESCRIPTIONS: Dict[str, str] = { "serve_preset": "Launch a saved Cookbook serve preset by name. Reuses the exact tmux command + host the user already saved. Use for 'run stable diffusion 3.5', 'serve vllm-qwen', 'start the inpaint model' — preset-name matches the user's UI labels.", "adopt_served_model": "Register an existing tmux model server (one started manually or outside the cookbook flow) into Cookbook tracking AND add it as a chat endpoint. Use when the user (or a previous turn) launched something via ssh+tmux and now wants it visible in the UI, stoppable via stop_served_model, and usable in the model picker.", "list_cookbook_servers": "List the cookbook's configured servers (remote GPU boxes + local) and which is the current default. Use this BEFORE download_model/serve_model when the user didn't name a host — to decide where to run, or to ask the user which server when ambiguous. Downloads/serves default to the cookbook's selected server, NOT localhost.", - "app_api": "Generic loopback to ANY Odysseus internal endpoint. Use this when the user wants something the UI can do but there's no named tool for it. Covers calendar, gallery, library/documents, memory, notes, tasks, settings, research, compare, cookbook GPUs/state — every UI button hits some /api/* endpoint and you can hit it too. action='endpoints' with filter= lists available endpoints. action='call' takes method+path+body. Hits same routes the UI uses — auth flows free. NOTE: themes are NOT an API endpoint — use the ui_control tool (create_theme / set_theme), not app_api. SESSIONS/CHATS: do NOT use app_api for these — GET /api/sessions returns EMPTY for tool calls (it's owner-filtered and tool calls authenticate as a different identity). EMAIL ACCOUNTS: do NOT use /api/email/accounts via app_api; use list_email_accounts, list_emails, and read_email instead. To list/rename/archive/delete/fork chats use the list_sessions and manage_session tools instead.", + "app_api": "Generic loopback to allowed Odysseus internal endpoints. Use this when the user wants something the UI can do but there's no named tool for it. Covers calendar, gallery, library/documents, memory, notes, tasks, settings, research, compare, cookbook GPUs/state — allowed UI buttons hit /api/* endpoints and you can hit them too. Sensitive auth/user/admin/shell paths and host-control Cookbook mutation routes are blocked; do NOT use app_api for shell commands, package installs, engine rebuilds, or PID signalling. Use named command tooling for shell commands. action='endpoints' with filter= lists available endpoints. action='call' takes method+path+body. Hits same routes the UI uses — auth flows free. NOTE: themes are NOT an API endpoint — use the ui_control tool (create_theme / set_theme), not app_api. SESSIONS/CHATS: do NOT use app_api for these — GET /api/sessions returns EMPTY for tool calls (it's owner-filtered and tool calls authenticate as a different identity). EMAIL ACCOUNTS: do NOT use /api/email/accounts via app_api; use list_email_accounts, list_emails, and read_email instead. To list/rename/archive/delete/fork chats use the list_sessions and manage_session tools instead.", "edit_image": "Edit an image in the gallery: upscale (increase resolution), remove background (rembg), inpaint (fill selected area), or harmonize (blend edits). Specify image ID and action.", "trigger_research": "Start a deep research job on any topic — appears in the Deep Research sidebar, streams progress, produces a detailed report. Use for 'research X', 'look into Y', 'do deep research on Z', 'investigate'. NOT a scheduled task — it runs now and surfaces in the sidebar.", } @@ -144,32 +140,30 @@ class ToolIndex: """ChromaDB-backed tool index for RAG-based tool selection.""" def __init__(self): - from src.chroma_client import get_chroma_client - from src.embeddings import get_embedding_client - - self._embedder = get_embedding_client() - if not self._embedder: - raise RuntimeError("No embedding client available") - - client = get_chroma_client() - self._collection = client.get_or_create_collection( - name=COLLECTION_NAME, - metadata={"hnsw:space": "cosine"}, + self._lanes = build_embedding_lanes(COLLECTION_NAME) + if not self._lanes: + raise RuntimeError("No embedding lanes available") + self._embedder = self._lanes[0].client + self._collection = next( + (lane.collection for lane in self._lanes if lane.name == LANE_FASTEMBED), + self._lanes[0].collection, ) + migrate_legacy_collection(COLLECTION_NAME, self._lanes) self._fingerprint = "" self._mcp_generation = -1 self._healthy = True - logger.info("ToolIndex initialized") + logger.info("ToolIndex initialized (lanes=%s)", [lane.name for lane in self._lanes]) @property def healthy(self): return self._healthy def _embed(self, texts: List[str]) -> List[List[float]]: - vecs = self._embedder.encode(texts, normalize_embeddings=True) + if not self._lanes: + return [] + vecs = self._lanes[0].encode(texts) if np is not None: return np.array(vecs, dtype=np.float32).tolist() - # Fallback without numpy return [list(v) for v in vecs] def index_builtin_tools(self): @@ -190,23 +184,31 @@ class ToolIndex: # registry (e.g. removed tools like the old vault_* set). # Without this, upsert leaves them in place and RAG keeps # surfacing tools that no longer exist. - try: - existing = self._collection.get(where={"tool_type": "builtin"}) - existing_ids = (existing or {}).get("ids") or [] - stale = [i for i in existing_ids if i not in set(ids)] - if stale: - self._collection.delete(ids=stale) - logger.info(f"Pruned {len(stale)} stale builtin tool entries from index") - except Exception as e: - logger.debug(f"Stale-pruning skipped: {e}") + indexed = False + for lane in self._lanes: + try: + existing = lane.collection.get(where={"tool_type": "builtin"}) + existing_ids = (existing or {}).get("ids") or [] + stale = [i for i in existing_ids if i not in set(ids)] + if stale: + lane.collection.delete(ids=stale) + logger.info(f"Pruned {len(stale)} stale builtin tool entries from {lane.name} index") + except Exception as e: + logger.debug(f"Stale-pruning skipped for {lane.name}: {e}") - embeddings = self._embed(docs) - self._collection.upsert( - ids=ids, - documents=docs, - embeddings=embeddings, - metadatas=metadatas, - ) + try: + lane.collection.upsert( + ids=ids, + documents=docs, + embeddings=lane.encode(docs), + metadatas=metadatas, + ) + indexed = True + except Exception as e: + logger.warning("Builtin tool indexing failed in %s lane: %s", lane.name, e) + if not indexed: + self._healthy = False + raise RuntimeError("Builtin tool indexing failed in all embedding lanes") self._fingerprint = hashlib.sha256( ",".join(sorted(BUILTIN_TOOL_DESCRIPTIONS.keys())).encode() ).hexdigest() @@ -221,15 +223,15 @@ class ToolIndex: gen = getattr(mcp_mgr, '_generation', 0) if gen == self._mcp_generation: return - self._mcp_generation = gen # Remove old MCP entries - try: - existing = self._collection.get(where={"tool_type": "mcp"}) - if existing and existing["ids"]: - self._collection.delete(ids=existing["ids"]) - except Exception: - pass + for lane in self._lanes: + try: + existing = lane.collection.get(where={"tool_type": "mcp"}) + if existing and existing["ids"]: + lane.collection.delete(ids=existing["ids"]) + except Exception: + pass # Get current MCP tools try: @@ -238,6 +240,7 @@ class ToolIndex: all_tools = "" if not all_tools: + self._mcp_generation = gen return # Parse MCP tool descriptions from the prompt text @@ -265,39 +268,59 @@ class ToolIndex: metadatas.append({"tool_name": name, "tool_type": "mcp"}) if not docs: + self._mcp_generation = gen return - embeddings = self._embed(docs) - self._collection.upsert( - ids=ids, - documents=docs, - embeddings=embeddings, - metadatas=metadatas, - ) + indexed = False + for lane in self._lanes: + try: + lane.collection.upsert( + ids=ids, + documents=docs, + embeddings=lane.encode(docs), + metadatas=metadatas, + ) + indexed = True + except Exception as e: + logger.warning("MCP tool indexing failed in %s lane: %s", lane.name, e) + if not indexed: + logger.warning("MCP tool indexing failed in all embedding lanes") + return + self._mcp_generation = gen logger.info(f"Indexed {len(docs)} MCP tools") def retrieve(self, query: str, k: int = 8) -> List[str]: """Retrieve the top-K most relevant tool names for a query.""" - try: - query_embedding = self._embed([query]) - results = self._collection.query( - query_embeddings=query_embedding, - n_results=min(k, self._collection.count() or k), - include=["metadatas", "distances"], - ) - if not results or not results.get("metadatas"): - return [] - - tool_names = [] - for meta_list in results["metadatas"]: - for meta in meta_list: - name = meta.get("tool_name", "") - if name and name not in tool_names: - tool_names.append(name) - return tool_names - except Exception as e: - logger.warning(f"Tool retrieval failed: {e}") - return [] + rows = [] + lane_priority = {LANE_CUSTOM: 0, LANE_FASTEMBED: 1} + for lane in self._lanes: + try: + count = lane.count() + if count == 0: + continue + results = lane.collection.query( + query_embeddings=lane.encode([query]), + n_results=min(k, count), + include=["metadatas", "distances"], + ) + if not results or not results.get("metadatas"): + continue + distances = results.get("distances") or [] + for list_idx, meta_list in enumerate(results["metadatas"]): + distance_list = distances[list_idx] if list_idx < len(distances) else [] + for idx, meta in enumerate(meta_list): + name = meta.get("tool_name", "") + if name: + distance = distance_list[idx] if idx < len(distance_list) else 1.0 + rows.append({ + "tool_name": name, + "score": round(1.0 - distance, 4), + "embedding_lane": lane.name, + }) + except Exception as e: + logger.warning("Tool retrieval failed in %s lane: %s", lane.name, e) + rows.sort(key=lambda row: (-row["score"], lane_priority.get(row["embedding_lane"], 99))) + return [row["tool_name"] for row in dedupe_results(rows, id_key="tool_name", limit=k)] # Structural recurring-schedule intent. Typo-resilient (matches "every dya" # via "every "), and catches bare clock times ("at 7:30 am", "7am"). @@ -316,7 +339,7 @@ class ToolIndex: # request (e.g. "visit and tell me the title"), force-including the # whole email toolset and crowding out the relevant tools — the model then # believed it had only email tools and refused web/other tasks (#1707). - frozenset({"email", "mail", "gmail", "googlemail", "message", "send", "reply", "inbox", "unread"}): + frozenset({"email", "emails", "mail", "mails", "gmail", "googlemail", "message", "messages", "send", "reply", "replies", "inbox", "unread"}): {"list_email_accounts", "list_emails", "read_email", "send_email", "reply_to_email", "bulk_email", "delete_email", "archive_email", "mark_email_read", "resolve_contact", "ui_control"}, frozenset({"calendar", "event", "meeting", "schedule", "appointment"}): {"manage_calendar"}, @@ -380,14 +403,14 @@ class ToolIndex: # Document edit/update intent frozenset({"edit", "change", "fix", "rewrite", "update", "replace", "add a", "tweak", "modify", "rename", "paragraph", - "section", "line", "the doc", "the document", "in the doc"}): + "section", "line", "the doc", "the docs", "the document", "the documents", "in the doc", "in the docs", "in document"}): {"edit_document", "update_document", "create_document", "suggest_document"}, # Document deletion / management — include generic open/find/read/show # verbs + file/doc synonyms so "open my ", "find the ", "delete # " reach manage_documents even without the literal word "document". frozenset({"delete this doc", "delete the doc", "delete document", - "remove document", "remove the doc", "trash", "list documents", - "list docs", "all my docs", "my documents", "my docs", "my files", + "remove document", "remove the doc", "trash", "list document", "list documents", + "list doc", "list docs", "all my docs", "my document", "my documents", "my doc", "my docs", "my files", "open the", "open my", "open document", "open doc", "find the", "find my", "find document", "read the", "read my", "show me the", "show my", "the file", "my file", "the report", "the write-up", @@ -500,3 +523,10 @@ def get_tool_index() -> Optional[ToolIndex]: logger.warning(f"ToolIndex init failed (will retry in {_RETRY_INTERVAL}s): {e}") _tool_index = None return None + + +def reset_tool_index() -> None: + """Clear the singleton so embedding endpoint changes rebuild tool lanes.""" + global _tool_index, _last_attempt + _tool_index = None + _last_attempt = 0.0 diff --git a/src/tool_parsing.py b/src/tool_parsing.py index b31e114f9..3f296c2e6 100644 --- a/src/tool_parsing.py +++ b/src/tool_parsing.py @@ -5,9 +5,10 @@ Regex-based parsing of tool invocations from LLM response text. Supports fenced code blocks, [TOOL_CALL] blocks, and XML-style blocks. """ -import re +import ast import json import logging +import re from typing import List, Optional from src.agent_tools import ToolBlock, TOOL_TAGS @@ -176,11 +177,108 @@ _TOOL_NAME_MAP = { "todos": "manage_notes", } +_MISFENCED_WEB_TOOL_NAMES = { + "web_search": "web_search", + "websearch": "web_search", + "google_search": "web_search", + "google_search_retrieval": "web_search", + "google_search_grounding": "web_search", + "web_fetch": "web_fetch", + "webfetch": "web_fetch", + "fetch_url": "web_fetch", +} + # --------------------------------------------------------------------------- # Parsing functions # --------------------------------------------------------------------------- +def _literal_string(value) -> Optional[str]: + """Return a string from a small literal AST node, or None.""" + try: + parsed = ast.literal_eval(value) + except (ValueError, SyntaxError, TypeError): + return None + if isinstance(parsed, str): + return parsed.strip() + if isinstance(parsed, list): + for item in parsed: + if isinstance(item, str) and item.strip(): + return item.strip() + return None + + +def _parse_misfenced_web_lookup(content: str) -> Optional[ToolBlock]: + """Recover simple web_search/web_fetch calls wrapped in python/bash fences. + + Some local fenced-tool models write: + + ```python + web_search("latest python release") + ``` + + That is an intended tool call, not Python code. Keep this intentionally + narrow: only a single bare function call to a known web tool alias converts. + """ + try: + module = ast.parse(content.strip(), mode="exec") + except SyntaxError: + return None + if len(module.body) != 1 or not isinstance(module.body[0], ast.Expr): + return None + call = module.body[0].value + if not isinstance(call, ast.Call) or not isinstance(call.func, ast.Name): + return None + + mapped = _MISFENCED_WEB_TOOL_NAMES.get(call.func.id.lower()) + if mapped not in ("web_search", "web_fetch"): + return None + if len(call.args) > 1: + return None + + args = {} + if call.args: + key = "url" if mapped == "web_fetch" else "query" + value = _literal_string(call.args[0]) + if not value: + return None + args[key] = value + + allowed = {"query", "queries", "url", "time_filter", "freshness", "max_pages"} + for keyword in call.keywords: + if keyword.arg not in allowed: + return None + key = "query" if keyword.arg == "queries" else keyword.arg + value = _literal_string(keyword.value) + if value is not None: + args[key] = value + continue + try: + parsed = ast.literal_eval(keyword.value) + except (ValueError, SyntaxError, TypeError): + return None + if key == "max_pages" and isinstance(parsed, int): + args[key] = parsed + continue + return None + + if mapped == "web_search": + query = args.get("query") + if not query: + return None + payload = {"query": query} + for key in ("time_filter", "freshness", "max_pages"): + if key in args: + payload[key] = args[key] + if len(payload) == 1: + return ToolBlock("web_search", query) + return ToolBlock("web_search", json.dumps(payload)) + + url = args.get("url") + if not url: + return None + return ToolBlock("web_fetch", url) + def _parse_tool_call_block(raw: str) -> Optional[ToolBlock]: """Parse a [TOOL_CALL] block into a ToolBlock. @@ -329,7 +427,7 @@ def _parse_tool_code_block(raw: str) -> Optional[ToolBlock]: return None -def parse_tool_blocks(text: str) -> List[ToolBlock]: +def parse_tool_blocks(text: str, skip_fenced: bool = False) -> List[ToolBlock]: """Extract executable tool blocks from LLM response text. Supports multiple formats: @@ -338,6 +436,17 @@ def parse_tool_blocks(text: str) -> List[ToolBlock]: 3. XML-style / blocks 4. blocks (MiniMax-M2.5 style) 5. DeepSeek DSML markup (normalized to first) + + `skip_fenced`: when True, Pattern 1 (fenced ```bash/```python/```json code + blocks) is not matched at all. Native function-calling models (GPT/Claude/ + Grok/Qwen3/DeepSeek-V, etc.) commonly write illustrative fenced examples in + prose; for those models we trust the structured tool_calls channel for real + invocations and treat a bare fence as display text rather than an action + (issue #3222). Patterns 2-5 — explicit [TOOL_CALL]///DSML + markup that leaked into content as text — stay fully active regardless, + since that markup is never an illustrative example and dropping it would + silently lose real calls (e.g. DeepSeek-V falling back to DSML when it + can't emit structured tool_calls). """ blocks = [] @@ -345,24 +454,31 @@ def parse_tool_blocks(text: str) -> List[ToolBlock]: # XML patterns below catch it. text = _normalize_dsml(text) - # Pattern 1: fenced code blocks - for m in _TOOL_BLOCK_RE.finditer(text): - tag = m.group(1).lower() - content = m.group(2).strip() - if not content: - continue - # If a code block's content is an XML call (some models wrap - # tool calls in ```python or ```xml fences), parse the invoke instead. - if ' XML call (some models wrap + # tool calls in ```python or ```xml fences), parse the invoke instead. + if ' markup, not literal code. Whether or + # not any call converted, never fall through to append the raw XML as + # a python/bash block — e.g. a hyphenated/namespaced tool name that + # _XML_INVOKE_RE's \w+ can't match would otherwise be executed as code. + continue + if tag in ("python", "bash"): + block = _parse_misfenced_web_lookup(content) if block: blocks.append(block) - invoked = True - if invoked: - continue - blocks.append(ToolBlock(tag, content)) + continue + blocks.append(ToolBlock(tag, content)) # Pattern 2: [TOOL_CALL] blocks (only if no fenced blocks found) if not blocks: @@ -396,12 +512,23 @@ def parse_tool_blocks(text: str) -> List[ToolBlock]: return blocks -def strip_tool_blocks(text: str) -> str: - """Remove executable tool blocks from text for clean display.""" +def strip_tool_blocks(text: str, skip_fenced: bool = False) -> str: + """Remove executable tool blocks from text for clean display. + + `skip_fenced`: when True, fenced ```bash/```python/```json code blocks + (Pattern 1) are left intact instead of being stripped. This must mirror + whatever `skip_fenced` value `parse_tool_blocks` was called with for the + same response: if a fence wasn't executed as a tool call (because it's an + illustrative example from a native function-calling model), it shouldn't + vanish from the persisted/displayed text either — otherwise the example + streams once and then disappears on reload (issue #3222 follow-up). + Patterns 2-5 + DSML markup are always stripped, since that markup should + never reach the user regardless of whether it converted to a tool call. + """ # Normalize DSML first so its markup gets stripped by the # / removers below instead of leaking to the user. text = _normalize_dsml(text) - cleaned = _TOOL_BLOCK_RE.sub('', text) + cleaned = text if skip_fenced else _TOOL_BLOCK_RE.sub('', text) cleaned = _TOOL_CALL_RE.sub('', cleaned) cleaned = _XML_TOOL_CALL_RE.sub('', cleaned) cleaned = _TOOL_CODE_RE.sub('', cleaned) diff --git a/src/tool_policy.py b/src/tool_policy.py new file mode 100644 index 000000000..b70b5c3be --- /dev/null +++ b/src/tool_policy.py @@ -0,0 +1,209 @@ +"""Per-turn tool policy composition for agent execution.""" + +from __future__ import annotations + +import re +from dataclasses import dataclass, field +from types import MappingProxyType +from typing import Iterable, Mapping, Optional, Set, Tuple + + +GUIDE_ONLY_DIRECTIVE = ( + "## GUIDE-ONLY MODE - TOOL POLICY\n" + "The latest user turn explicitly forbids tool use. Do not call tools, do not " + "run shell commands, and do not inspect local files or the environment. " + "Respond in normal text by guiding the user or asking them to paste the " + "output they will produce locally." +) + + +_COMMON_TOOL_NAMES = { + "api_call", + "app_api", + "archive_email", + "ask_teacher", + "ask_user", + "bash", + "bulk_email", + "builtin_browser", + "cancel_download", + "chat_with_model", + "create_document", + "create_session", + "delete_email", + "download_model", + "edit_document", + "edit_file", + "edit_image", + "generate_image", + "glob", + "grep", + "list_cached_models", + "list_cookbook_servers", + "list_downloads", + "list_emails", + "list_models", + "list_serve_presets", + "list_served_models", + "list_sessions", + "ls", + "manage_calendar", + "manage_contact", + "manage_documents", + "manage_endpoints", + "manage_mcp", + "manage_memory", + "manage_notes", + "manage_research", + "manage_session", + "manage_settings", + "manage_skills", + "manage_tasks", + "manage_tokens", + "manage_webhooks", + "mark_email_read", + "pipeline", + "python", + "read_email", + "read_file", + "reply_to_email", + "resolve_contact", + "search_chats", + "search_hf_models", + "send_email", + "send_to_session", + "serve_model", + "serve_preset", + "stop_served_model", + "suggest_document", + "trigger_research", + "ui_control", + "update_document", + "update_plan", + "vault_get", + "vault_search", + "vault_unlock", + "web_fetch", + "web_search", + "write_file", +} + + +_GUIDE_ONLY_PATTERNS: Tuple[Tuple[re.Pattern[str], str], ...] = tuple( + (re.compile(pattern, re.IGNORECASE), reason) + for pattern, reason in ( + (r"\bguide[-\s]?only mode\b", "guide-only mode requested"), + (r"\bno[-\s]?tools? mode\b", "no-tools mode requested"), + (r"\bdo not use (?:any )?tools?\b", "user forbade tool use"), + (r"\bdon'?t use (?:any )?tools?\b", "user forbade tool use"), + (r"\bnot allowed to use (?:any )?tools?\b", "user forbade tool use"), + (r"\bnot allowed to:?.{0,120}\buse (?:any )?tools?\b", "user forbade tool use"), + (r"\bask (?:me )?(?:for confirmation )?before using tools?\b", "user requested confirmation before tools"), + ) +) + + +@dataclass(frozen=True) +class ToolPolicy: + """Effective tool behavior for one agent turn.""" + + disabled_tools: frozenset[str] = frozenset() + hidden_tools: frozenset[str] = frozenset() + reasons: Mapping[str, str] = field(default_factory=dict) + mode: str = "normal" + block_all_tool_calls: bool = False + disable_mcp: bool = False + + def all_disabled_names(self) -> Set[str]: + return set(self.disabled_tools) | set(self.hidden_tools) + + def blocks(self, tool_name: Optional[str]) -> bool: + if not tool_name: + return False + return self.block_all_tool_calls or tool_name in self.disabled_tools or tool_name in self.hidden_tools + + def reason_for(self, tool_name: Optional[str]) -> str: + if tool_name and tool_name in self.reasons: + return self.reasons[tool_name] + if self.block_all_tool_calls and self.mode == "guide_only": + return "Tool use is disabled for this guide-only turn." + return "Tool use is disabled for this turn." + + +def detect_guide_only_turn(message: object) -> Optional[str]: + """Return a reason when the latest user turn strongly requests no tools.""" + + if not isinstance(message, str) or not message.strip(): + return None + text = re.sub(r"\s+", " ", message.strip()) + for pattern, reason in _GUIDE_ONLY_PATTERNS: + if pattern.search(text): + return reason + return None + + +def known_tool_names() -> Set[str]: + """Best-effort set of native tool names for prompt hiding and denylisting.""" + + names = set(_COMMON_TOOL_NAMES) + try: + from src.tool_schemas import FUNCTION_TOOL_SCHEMAS + + for schema in FUNCTION_TOOL_SCHEMAS: + name = (schema.get("function") or {}).get("name") or schema.get("name") + if name: + names.add(name) + except Exception: + pass + try: + from src.agent_loop import TOOL_SECTIONS + + names.update(TOOL_SECTIONS.keys()) + except Exception: + pass + try: + from src.tool_security import PLAN_MODE_READONLY_TOOLS, _PLAN_MODE_KNOWN_MUTATORS + + names.update(PLAN_MODE_READONLY_TOOLS) + names.update(_PLAN_MODE_KNOWN_MUTATORS) + except Exception: + pass + return names + + +def build_effective_tool_policy( + *, + disabled_tools: Optional[Iterable[str]] = None, + last_user_message: object = "", +) -> ToolPolicy: + """Compose the effective policy for one agent turn. + + Existing callers still provide the already-composed disabled-tool denylist. + This function adds higher-level turn policy on top so enforcement is not + delegated to prompt compliance. + """ + + disabled = {str(t) for t in (disabled_tools or []) if t} + hidden: Set[str] = set() + reasons = {tool: "Tool is disabled for this request." for tool in disabled} + + guide_reason = detect_guide_only_turn(last_user_message) + if guide_reason: + all_tools = known_tool_names() + disabled.update(all_tools) + hidden.update(all_tools) + reasons.update({tool: f"{guide_reason}." for tool in all_tools}) + return ToolPolicy( + disabled_tools=frozenset(disabled), + hidden_tools=frozenset(hidden), + reasons=MappingProxyType(dict(reasons)), + mode="guide_only", + block_all_tool_calls=True, + disable_mcp=True, + ) + + return ToolPolicy( + disabled_tools=frozenset(disabled), + hidden_tools=frozenset(hidden), + reasons=MappingProxyType(dict(reasons)), + ) diff --git a/src/tool_schemas.py b/src/tool_schemas.py index e45415d05..e0d01f008 100644 --- a/src/tool_schemas.py +++ b/src/tool_schemas.py @@ -258,7 +258,7 @@ FUNCTION_TOOL_SCHEMAS = [ "type": "function", "function": { "name": "search_chats", - "description": "Search the user's past chat conversations by keyword. Use when the user asks about previous chats, past conversations, or wants to find a discussion they had before. Returns matching sessions with clickable links.", + "description": "Search the user's past session transcripts by keyword. Use when the user asks about previous chats, past conversations, or when direct transcript evidence is better than persistent memory. Returns matching sessions with clickable links and nearby context.", "parameters": { "type": "object", "properties": { @@ -406,7 +406,7 @@ FUNCTION_TOOL_SCHEMAS = [ "type": "function", "function": { "name": "ui_control", - "description": "Control the user interface. Actions: toggle (turn tools on/off), open_panel (open a modal: documents/library, gallery, email, sessions, notes, memories/brain, skills, settings, cookbook), open_email_reply (open an email reply draft document; does NOT send), set_mode, switch_model, set_theme (presets: dark, light, midnight, paper, nord, monokai, gruvbox, dracula, cyberpunk, retrowave, forest, ocean, ume, copper, terminal, vaporwave, lavender, gpt, coffee, claude), create_theme (CREATE any custom theme with a name + colors object — pick distinctive, evocative hex colors that match the requested aesthetic, NOT generic defaults. The theme auto-applies after creation). When a user asks for ANY theme not in the preset list, ALWAYS use create_theme.", + "description": "Control the user interface. Actions: toggle (turn tools on/off), open_panel (open a modal: documents/library, gallery, email, sessions, notes, memories/brain, skills, settings, cookbook), open_email_reply (open an email reply draft document; does NOT send), set_mode, switch_model, set_theme (built-in presets: dark, light, midnight, paper, cyberpunk, retrowave, forest, ocean, ume, copper, terminal, organs, lavender, gpt, claude, cute), create_theme (CREATE any custom theme with a name + colors object — pick distinctive, evocative hex colors that match the requested aesthetic, NOT generic defaults. The theme auto-applies after creation). When a user asks for ANY theme not in the built-in preset list, ALWAYS use create_theme.", "parameters": { "type": "object", "properties": { @@ -447,6 +447,47 @@ FUNCTION_TOOL_SCHEMAS = [ } } }, + { + "type": "function", + "function": { + "name": "ask_user", + "description": "Ask the user a multiple-choice question to get a decision or clarification when the task is genuinely ambiguous and the answer changes what you do next (e.g. pick between approaches, confirm an assumption, choose a target). The user sees clickable option buttons; calling this ENDS your turn and their selection arrives as your next message. Prefer sensible defaults over asking — only ask when you truly cannot proceed well without the user's input. Do NOT use it to confirm irreversible/destructive actions that have a dedicated confirmation flow.", + "parameters": { + "type": "object", + "properties": { + "question": {"type": "string", "description": "The question to ask. Be specific and self-contained."}, + "options": { + "type": "array", + "description": "2-6 mutually exclusive choices. Each is an object with a short `label` and an optional `description` explaining the trade-off.", + "items": { + "type": "object", + "properties": { + "label": {"type": "string", "description": "Concise choice text the user clicks (1-5 words)."}, + "description": {"type": "string", "description": "Optional one-line explanation of this choice."} + }, + "required": ["label"] + } + }, + "multi": {"type": "boolean", "description": "Set true to let the user select multiple options instead of one. Default false."} + }, + "required": ["question", "options"] + } + } + }, + { + "type": "function", + "function": { + "name": "update_plan", + "description": "Write back to the ACTIVE PLAN: mark steps done or revise them. Use this while executing an approved plan — after you finish a step, call update_plan with the full checklist and that step marked `- [x]`; when the user asks to change the plan, call it with the revised checklist. The user's docked plan window updates live. Pass the COMPLETE checklist every time (not a diff). No effect if there is no active plan.", + "parameters": { + "type": "object", + "properties": { + "plan": {"type": "string", "description": "The full updated plan as a GitHub-style markdown checklist — one step per line, `- [ ]` for pending and `- [x]` for done. Always send the whole list."} + }, + "required": ["plan"] + } + } + }, { "type": "function", "function": { @@ -504,8 +545,8 @@ FUNCTION_TOOL_SCHEMAS = [ "uid": {"type": "string", "description": "Event UID (for update/delete)"}, "calendar_href": {"type": "string", "description": "Specific calendar URL (optional; defaults to first calendar)"}, "calendar": {"type": "string", "description": "Filter list_events by calendar name or href"}, - "start": {"type": "string", "description": "list_events range start (ISO datetime); defaults to today"}, - "end": {"type": "string", "description": "list_events range end (ISO datetime); defaults to +14 days"}, + "start": {"type": "string", "description": "list_events range start (ISO datetime); defaults to today. Prefer start; backend also accepts start_date, range_start, from, dtstart, since."}, + "end": {"type": "string", "description": "list_events range end (ISO datetime); defaults to +14 days. Prefer end; backend also accepts end_date, range_end, to, dtend, until."}, "event_type": {"type": "string", "description": "Tag / category for the event. Common values: work, personal, health, travel, meal, social, admin, other. Aliases accepted: tag, category, type."}, "importance": {"type": "string", "enum": ["low", "normal", "high", "critical"], "description": "Priority level (defaults to 'normal')"}, "reminder_minutes": {"type": "integer", "description": "For create_event: create an Odysseus reminder this many minutes before the event, e.g. 5 for 'reminder 5 min before'."}, @@ -909,7 +950,7 @@ FUNCTION_TOOL_SCHEMAS = [ "type": "function", "function": { "name": "app_api", - "description": "Generic loopback to ANY internal Odysseus endpoint. Use this when there's no named tool for what the user wants. Hits the same routes the UI buttons hit (cookbook, gallery, library/documents, memory, notes, calendar, tasks, settings, themes, research, compare, etc.). action='endpoints' returns the OpenAPI surface (use `filter` to narrow). action='call' (default) takes method+path+body. Auth/user/admin paths are blocked for safety. Do not use for email account discovery; use list_email_accounts instead because /api/email/accounts is owner-filtered in tool context.", + "description": "Generic loopback to allowed internal Odysseus endpoints. Use this when there's no named tool for what the user wants. Hits the same routes the UI buttons hit (cookbook, gallery, library/documents, memory, notes, calendar, tasks, settings, themes, research, compare, etc.). action='endpoints' returns the OpenAPI surface (use `filter` to narrow). action='call' (default) takes method+path+body. Sensitive auth/user/admin/shell paths and host-control Cookbook mutation routes are blocked for safety. Do not use for shell commands; use named command tooling instead. Do not use for package installs, engine rebuilds, PID signalling, or email account discovery; use list_email_accounts for email accounts because /api/email/accounts is owner-filtered in tool context.", "parameters": { "type": "object", "properties": { @@ -1191,6 +1232,12 @@ def function_call_to_tool_block(name: str, arguments: str) -> Optional[ToolBlock content = str(queries) else: content = args.get("query", "") + # Preserve the model-requested freshness filter — the web_search schema + # advertises time_filter and the executor parses {"query","time_filter"}, + # but a bare query string dropped it. Mirrors the read_file JSON idiom. + tf = args.get("time_filter") + if content and isinstance(tf, str) and tf in ("day", "week", "month", "year"): + content = json.dumps({"query": content, "time_filter": tf}) elif tool_type == "read_file": # Plain path (back-compat) unless a line range is requested → JSON. if args.get("offset") or args.get("limit"): @@ -1211,14 +1258,24 @@ def function_call_to_tool_block(name: str, arguments: str) -> Optional[ToolBlock content = "\n".join(parts) elif tool_type == "edit_document": blocks = [] - for edit in args.get("edits", []): + edits = args.get("edits", []) + if not isinstance(edits, list): + edits = [] + for edit in edits: + if not isinstance(edit, dict): + continue blocks.append( f'<<>>\n{edit.get("find", "")}\n<<>>\n{edit.get("replace", "")}\n<<>>' ) content = "\n".join(blocks) elif tool_type == "suggest_document": blocks = [] - for s in args.get("suggestions", []): + suggestions = args.get("suggestions", []) + if not isinstance(suggestions, list): + suggestions = [] + for s in suggestions: + if not isinstance(s, dict): + continue blocks.append( f'<<>>\n{s.get("find", "")}\n<<>>\n{s.get("replace", "")}\n<<>>\n{s.get("reason", "")}\n<<>>' ) diff --git a/src/tool_security.py b/src/tool_security.py index 8ffa50f9b..82d2c3d67 100644 --- a/src/tool_security.py +++ b/src/tool_security.py @@ -51,6 +51,101 @@ NON_ADMIN_BLOCKED_TOOLS = { } +# Plan mode: the agent may investigate but must not mutate anything. Only these +# read-only/inspection tools stay enabled; everything else (writes, sends, +# manage_*, model serving, MCP, etc.) is blocked. Allowlist rather than blocklist +# so any newly added tool defaults to BLOCKED in plan mode — fail safe. +# +# bash/python are deliberately NOT here: the shell can mutate (write files, hit +# the network) and can't be constrained to read-only at the tool layer, so plan +# mode blocks it outright rather than relying on a prompt to keep it well-behaved. +# Code/file discovery is covered by the dedicated read-only tools below +# (read_file, grep, glob, ls) instead of freestyle shell. +PLAN_MODE_READONLY_TOOLS = { + "read_file", + "grep", + "glob", + "ls", + "web_search", + "web_fetch", + "search_chats", + "list_models", + "list_sessions", + "list_emails", + "read_email", + "list_served_models", + "list_downloads", + "list_cached_models", + "search_hf_models", + "list_serve_presets", + "list_cookbook_servers", + "resolve_contact", + "chat_with_model", + "ask_teacher", +} + + +# The agent's tool gate is a DENYLIST: execute_tool_block blocks any tool whose +# name is in `disabled_tools`. Plan mode's policy is the opposite — an allowlist +# (PLAN_MODE_READONLY_TOOLS). To apply an allowlist through a denylist, plan mode +# returns the inverse: every known tool name minus the allowlist. +# +# Known tool names come from FUNCTION_TOOL_SCHEMAS, but that source is imperfect: +# some tools are only XML-invocable (e.g. manage_notes, generate_image) and never +# appear there, and the import can fail outright. Either gap would drop a mutating +# tool from the subtraction and silently leave it enabled. This set is the static +# backstop for both: union it in so known mutators are always subtracted, and so a +# failed import still blocks them (fail closed, never open). Only mutators belong +# here — read-only tools are covered by the allowlist. Keep in sync when adding +# new mutating tools. +_PLAN_MODE_KNOWN_MUTATORS = { + "write_file", "create_document", "edit_document", "update_document", + "suggest_document", "manage_documents", "create_session", "manage_session", + "send_to_session", "pipeline", "manage_memory", "manage_skills", + "manage_tasks", "manage_notes", "manage_endpoints", "manage_mcp", + "manage_webhooks", "manage_tokens", "manage_settings", "manage_contact", + "manage_calendar", "api_call", "app_api", "ui_control", + "send_email", "reply_to_email", "bulk_email", "delete_email", + "archive_email", "mark_email_read", "download_model", "serve_model", + "stop_served_model", "cancel_download", "adopt_served_model", "serve_preset", + "generate_image", "edit_image", "trigger_research", "manage_research", + # Shell is never read-only-safe; block it explicitly so it stays out of plan + # mode even if the schema list fails to load. + "bash", "python", +} + + +def plan_mode_disabled_tools() -> Set[str]: + """Tool names to add to the denylist in plan mode. + + Plan mode allows only PLAN_MODE_READONLY_TOOLS. The gate is a denylist, so + return the inverse: every known tool name minus the allowlist. Known names + come from the function-tool schemas, backstopped by _PLAN_MODE_KNOWN_MUTATORS + (see above) so XML-only tools and a failed schema import can't leave a mutator + enabled. MCP tools are handled separately — the loop drops the MCP manager + entirely in plan mode.""" + try: + # agent_tools / tool_parsing / tool_schemas form a mutually-circular + # cluster that only resolves cleanly when entered via agent_tools. + # Import it first so the lazy schema import works even from a cold + # import (e.g. tests) — not just after the app has wired everything up. + import src.agent_tools # noqa: F401 + from src.tool_schemas import FUNCTION_TOOL_SCHEMAS + + all_names = { + (t.get("function") or {}).get("name") + for t in FUNCTION_TOOL_SCHEMAS + } + all_names.discard(None) + except Exception as exc: + logger.warning("Unable to load tool schemas for plan-mode gating: %s", exc) + all_names = set() + # Subtract the allowlist from all known tool names (schema-derived plus the + # static mutator backstop). Fail closed: if the schema import failed above, + # the backstop alone still blocks known mutators. + return (all_names | _PLAN_MODE_KNOWN_MUTATORS) - PLAN_MODE_READONLY_TOOLS + + def is_public_blocked_tool(tool_name: Optional[str]) -> bool: """Return True when a non-admin/public user must not execute this tool. diff --git a/src/tool_utils.py b/src/tool_utils.py new file mode 100644 index 000000000..cf71e78c5 --- /dev/null +++ b/src/tool_utils.py @@ -0,0 +1,39 @@ +""" +This module intentionally imports NOTHING from the project (except +src.constants which imports nothing from src). Adding a project import here +will reintroduce the circular dependency that this module exists to break. +""" + +from src.constants import MAX_OUTPUT_CHARS + +_mcp_manager = None + +# --------------------------------------------------------------------------- +# MCP Manager singleton +# --------------------------------------------------------------------------- + +def set_mcp_manager(manager): + """Set the global MCP manager instance.""" + global _mcp_manager + _mcp_manager = manager + +def get_mcp_manager(): + """Get the global MCP manager instance.""" + return _mcp_manager + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- +def _truncate(text: str, limit: int = MAX_OUTPUT_CHARS) -> str: + """ + Truncate text to *limit* characters with a suffix note. + + Callers treat the result as text, so always return a string: coerce a + non-string (None -> "", otherwise str(...)) instead of returning it raw, + which would just move the crash downstream. + """ + if not isinstance(text, str): + text = "" if text is None else str(text) + if len(text) > limit: + return text[:limit] + f"\n... (truncated, {len(text)} chars total)" + return text diff --git a/src/upload_handler.py b/src/upload_handler.py index bb0cb300f..95bce306d 100644 --- a/src/upload_handler.py +++ b/src/upload_handler.py @@ -12,6 +12,10 @@ import threading from datetime import datetime, timedelta from typing import Dict, Any, Optional from fastapi import HTTPException, UploadFile + +from src.upload_limits import format_byte_limit, get_chat_upload_max_bytes + + def secure_filename(filename: str) -> str: """Sanitize a filename (replaces werkzeug.utils.secure_filename).""" import unicodedata @@ -73,7 +77,7 @@ class UploadHandler: def __init__(self, base_dir: str, upload_dir: str): self.base_dir = base_dir self.upload_dir = upload_dir - self.max_upload_size = 10 * 1024 * 1024 # 10MB + self.max_upload_size = get_chat_upload_max_bytes() self.max_concurrent_uploads = 3 self.cleanup_days = 30 # Per-IP per-minute cap. save_upload() counts EACH file, and the chat @@ -518,7 +522,7 @@ class UploadHandler: if file_size > self.max_upload_size: raise HTTPException( status_code=400, - detail=f"File size exceeds {self.max_upload_size/1024/1024}MB limit" + detail=f"File size exceeds {format_byte_limit(self.max_upload_size)} limit" ) # Get original filename and sanitize it diff --git a/src/upload_limits.py b/src/upload_limits.py index e81284703..2be42077b 100644 --- a/src/upload_limits.py +++ b/src/upload_limits.py @@ -1,7 +1,12 @@ """Small helpers for route-local upload size caps.""" +import os + from fastapi import HTTPException, UploadFile +DEFAULT_CHAT_UPLOAD_MAX_BYTES = 10 * 1024 * 1024 +CHAT_UPLOAD_MAX_BYTES_ENV = "ODYSSEUS_CHAT_UPLOAD_MAX_BYTES" + def format_byte_limit(limit: int) -> str: if limit % (1024 * 1024) == 0: @@ -11,6 +16,51 @@ def format_byte_limit(limit: int) -> str: return f"{limit} bytes" +def read_byte_limit_env(name: str, default: int) -> int: + raw = os.getenv(name) + if raw is None or not raw.strip(): + return default + try: + limit = int(raw) + except ValueError as exc: + raise ValueError(f"{name} must be an integer byte count") from exc + if limit < 1: + raise ValueError(f"{name} must be greater than 0") + return limit + + +def get_chat_upload_max_bytes() -> int: + return read_byte_limit_env(CHAT_UPLOAD_MAX_BYTES_ENV, DEFAULT_CHAT_UPLOAD_MAX_BYTES) + + +# Per-route upload byte-limits, single-sourced here (issue #3364). Each is +# validated + env-overridable via read_byte_limit_env: set the matching +# ODYSSEUS_*_MAX_BYTES env var to an integer byte count to tune it; an invalid +# value fails fast at import rather than crashing mid-request. Defaults match +# the prior per-route values, so behavior is unchanged unless an env var is set. +GALLERY_UPLOAD_MAX_BYTES = read_byte_limit_env( + "ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES", 100 * 1024 * 1024 +) +GALLERY_TRANSFORM_UPLOAD_MAX_BYTES = read_byte_limit_env( + "ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES", 25 * 1024 * 1024 +) +MEMORY_IMPORT_MAX_BYTES = read_byte_limit_env( + "ODYSSEUS_MEMORY_IMPORT_MAX_BYTES", 10 * 1024 * 1024 +) +PERSONAL_UPLOAD_MAX_BYTES = read_byte_limit_env( + "ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES", 25 * 1024 * 1024 +) +EMAIL_COMPOSE_UPLOAD_MAX_BYTES = read_byte_limit_env( + "ODYSSEUS_EMAIL_COMPOSE_UPLOAD_MAX_BYTES", 25 * 1024 * 1024 +) +STT_MAX_AUDIO_BYTES = read_byte_limit_env( + "ODYSSEUS_STT_MAX_AUDIO_BYTES", 25 * 1024 * 1024 +) +ICS_MAX_BYTES = read_byte_limit_env( + "ODYSSEUS_ICS_MAX_BYTES", 10 * 1024 * 1024 +) + + async def read_upload_limited(upload: UploadFile, limit: int, label: str = "Upload") -> bytes: """Read an UploadFile with a hard byte cap.""" data = await upload.read(limit + 1) diff --git a/src/webhook_manager.py b/src/webhook_manager.py index e43f8e4ed..267ceaa38 100644 --- a/src/webhook_manager.py +++ b/src/webhook_manager.py @@ -136,11 +136,62 @@ def validate_events(events_str: str) -> str: return ",".join(events) +# Broad candidate matcher for the IP-redaction pass. Deliberately loose: a +# bracketed host authority ([fe80::1%eth0]:8080 and friends) with an optional +# :port, or a bare IPv6 run — hex groups joined by colons, an optional trailing +# dotted-quad for IPv4-mapped forms (::ffff:192.168.0.1), and an optional %zone. +# It does NOT encode the IPv6 grammar; ipaddress.ip_address() is the real +# validator (see _redact_ip_candidate), so any colon-bearing string it rejects +# (clock times, MACs, "std::vector") is left alone. Every branch is a single +# greedy class or a repetition over a mandatory ':'/'.' delimiter, so there is no +# nested-quantifier backtracking (ReDoS-safe). +_IP_CANDIDATE = re.compile( + r'\[[^\[\]\s]*\](?::\d+)?' + r'|(? str: + """Redact a candidate token that the stdlib confirms is an IP address. + + A bare token is redacted only when it parses as IPv6 — bare IPv4 is left to + the dedicated IPv4 pass. A bracketed token is a host authority, so a v4 or v6 + literal inside [ ] is redacted as a whole. This keeps output consistent (one + [redacted], never nested or partial) for scoped/mapped/ported forms. + """ + token = match.group(0) + bracketed = token.startswith('[') + candidate = token + if bracketed: + # Keep only what's inside [...]; the trailing :port is dropped. + candidate = candidate[1:candidate.index(']')] + # A zone id (fe80::1%eth0) is not part of the address ipaddress parses. + candidate = candidate.split('%', 1)[0] + # The loose bare pattern can trail one stray ':' (e.g. "::1:" in "host ::1: + # down"); drop it unless it's the "::" compression marker. + if candidate.endswith(':') and not candidate.endswith('::'): + candidate = candidate[:-1] + try: + addr = ipaddress.ip_address(candidate) + except ValueError: + return token + if bracketed or isinstance(addr, ipaddress.IPv6Address): + return '[redacted]' + return token + + def sanitize_error(error: str, max_len: int = 200) -> str: """Strip potentially sensitive details from error messages.""" - # Remove IP addresses and ports - cleaned = re.sub(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(:\d+)?', '[redacted]', error) - # Remove hostnames in URLs + # Redact IPv6 (and bracketed-authority) addresses first, so an IPv4-mapped + # form like ::ffff:192.168.0.1 is scrubbed as one unit instead of having its + # embedded IPv4 removed first and leaving a stray "::ffff:" behind. Broad + # candidates are validated by ipaddress.ip_address(), so the false-positive + # guards (clock times, MACs, C++ "::") come from the stdlib, not a regex. + cleaned = _IP_CANDIDATE.sub(_redact_ip_candidate, error) + # Remove remaining bare IPv4 addresses and ports. + cleaned = re.sub(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}(:\d+)?', '[redacted]', cleaned) + # Remove hostnames in URLs. cleaned = re.sub(r'https?://[^\s/]+', '[redacted-url]', cleaned) return cleaned[:max_len] diff --git a/start-macos.sh b/start-macos.sh index ca83b4cb3..b9f06f2bf 100755 --- a/start-macos.sh +++ b/start-macos.sh @@ -20,14 +20,14 @@ cd "$REPO_DIR" # the command line every run — consistent with how app.py reads them via # python-dotenv. Variables already set in the shell take priority over .env. if [ -f .env ]; then - while IFS='=' read -r key value; do - [[ "$key" =~ ^[[:space:]]*# ]] && continue - [[ -z "${key// }" ]] && continue - value="${value%%#*}" - value="${value#"${value%%[![:space:]]*}"}" - value="${value%"${value##*[![:space:]]}"}" - [ -n "$key" ] && [ -z "${!key+x}" ] && export "$key=$value" - done < .env + while IFS='=' read -r key value; do + [[ "$key" =~ ^[[:space:]]*# ]] && continue + [[ -z "${key// }" ]] && continue + value="${value%%#*}" + value="${value#"${value%%[![:space:]]*}"}" + value="${value%"${value##*[![:space:]]}"}" + [ -n "$key" ] && [ -z "${!key+x}" ] && export "$key=$value" + done < .env fi # Shell overrides (ODYSSEUS_PORT / ODYSSEUS_HOST) take top priority, then .env @@ -36,7 +36,7 @@ PORT="${ODYSSEUS_PORT:-${APP_PORT:-7860}}" # 7860, not 7000 — macOS AirPlay HOST="${ODYSSEUS_HOST:-${APP_BIND:-127.0.0.1}}" # Set APP_BIND=0.0.0.0 in .env for LAN/Tailscale access. PROBE_HOST="$HOST" if [ "$PROBE_HOST" = "0.0.0.0" ] || [ "$PROBE_HOST" = "::" ]; then - PROBE_HOST="127.0.0.1" + PROBE_HOST="127.0.0.1" fi # Friendly message on any failure — re-running is safe (every step is idempotent). @@ -46,20 +46,20 @@ echo "▶ Odysseus quick start for macOS" # Fail fast if the port is already taken (e.g. a previous run still running). if (exec 3<>"/dev/tcp/$PROBE_HOST/$PORT") 2>/dev/null; then - echo "✗ Port $PORT is already in use on $PROBE_HOST. Stop what's using it, or pick another port:" - echo " ODYSSEUS_PORT=7900 ./start-macos.sh" - exit 1 + echo "✗ Port $PORT is already in use on $PROBE_HOST. Stop what's using it, or pick another port:" + echo " ODYSSEUS_PORT=7900 ./start-macos.sh" + exit 1 fi # 1. Homebrew — the macOS package manager. We can't safely auto-install it # (it wants its own interactive confirmation), so point the user at it. if ! command -v brew >/dev/null 2>&1; then - echo - echo "Homebrew is required but not installed. Install it (one command), then re-run this script:" - echo ' /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"' - echo - echo "More info: https://brew.sh" - exit 1 + echo + echo "Homebrew is required but not installed. Install it (one command), then re-run this script:" + echo ' /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"' + echo + echo "More info: https://brew.sh" + exit 1 fi # 2. Find a Python 3.11+ to build the environment with. @@ -72,15 +72,15 @@ fi # (or non-mac) we just use whatever Python 3.11+ is on PATH. PY="" if [ "$(uname -m)" = "arm64" ]; then - cands="/opt/homebrew/bin/python3.13 /opt/homebrew/bin/python3.12 /opt/homebrew/bin/python3.11" + cands="/opt/homebrew/bin/python3.13 /opt/homebrew/bin/python3.12 /opt/homebrew/bin/python3.11" else - cands="python3 python3.13 python3.12 python3.11" + cands="python3 python3.13 python3.12 python3.11" fi for cand in $cands; do - p="$(command -v "$cand" 2>/dev/null)" || continue - if "$p" -c 'import sys; raise SystemExit(0 if sys.version_info[:2] >= (3, 11) else 1)' 2>/dev/null; then - PY="$p"; break - fi + p="$(command -v "$cand" 2>/dev/null)" || continue + if "$p" -c 'import sys; raise SystemExit(0 if sys.version_info[:2] >= (3, 11) else 1)' 2>/dev/null; then + PY="$p"; break + fi done # System dependencies (each installed only if missing, so re-runs stay fast and @@ -98,54 +98,62 @@ done # Install a Homebrew formula only if its command isn't already present. A failed # install warns but does not abort — Cookbook can be set up later. brew_ensure() { - if command -v "$1" >/dev/null 2>&1; then - echo " ✓ $2 already installed" - return 0 - fi - echo " installing $2…" - if ! brew install "$2"; then - echo " ⚠ Couldn't install $2 right now — Cookbook (local model serving) may be limited." - echo " You can install it later with: brew install $2" - fi + if command -v "$1" >/dev/null 2>&1; then + echo " ✓ $2 already installed" + return 0 + fi + echo " installing $2…" + if ! brew install "$2"; then + echo " ⚠ Couldn't install $2 right now — Cookbook (local model serving) may be limited." + echo " You can install it later with: brew install $2" + fi } echo "▶ Checking dependencies (Homebrew)…" if [ -n "$PY" ]; then - echo " (using $("$PY" --version 2>&1) at $PY)" + echo " (using $("$PY" --version 2>&1) at $PY)" else - echo " installing python@3.11…" - brew install python@3.11 || true - PY="$(command -v /opt/homebrew/bin/python3.11 || command -v python3.11 || true)" + echo " installing python@3.11…" + brew install python@3.11 || true + PY="$(command -v /opt/homebrew/bin/python3.11 || command -v python3.11 || true)" fi brew_ensure tmux tmux brew_ensure llama-server llama.cpp +brew_ensure apfel apfel if [ -z "$PY" ] || [ ! -x "$PY" ]; then - echo "✗ Couldn't find a Python 3.11+ to build the environment with." - echo " Check: ls /opt/homebrew/bin/python3* (or install one: brew install python@3.11)" - exit 1 + echo "✗ Couldn't find a Python 3.11+ to build the environment with." + echo " Check: ls /opt/homebrew/bin/python3* (or install one: brew install python@3.11)" + exit 1 fi # 3. Python environment + dependencies (kept inside the repo, in venv/). # Named `venv` to match the manual steps and build-macos-app.sh, so the # clickable .app reuses this same environment. if [ ! -d venv ]; then - echo "▶ Creating Python environment…" - "$PY" -m venv venv + echo "▶ Creating Python environment…" + "$PY" -m venv venv fi VENV_PY="./venv/bin/python3" -echo "▶ Installing Python packages (first run downloads a few — can take a few minutes)…" -"$VENV_PY" -m pip install --quiet --upgrade pip -# Not --quiet: this is the slow step, so show progress (and any real errors). -"$VENV_PY" -m pip install -r requirements.txt +REQ_HASH="$(md5 -q requirements.txt 2>/dev/null || md5sum requirements.txt | cut -d' ' -f1)" +REQ_HASH_FILE="venv/.requirements_hash" +if [ ! -f "$REQ_HASH_FILE" ] || [ "$REQ_HASH" != "$(cat "$REQ_HASH_FILE" 2>/dev/null)" ]; then + echo "▶ Installing Python packages (first run downloads a few — can take a few minutes)…" + "$VENV_PY" -m pip install --quiet --upgrade pip + # Not --quiet: this is the slow step, so show progress (and any real errors). + "$VENV_PY" -m pip install -r requirements.txt + echo "$REQ_HASH" > "$REQ_HASH_FILE" +else + echo "▶ Python packages up to date — skipping install" +fi # chromadb-client (HTTP-only) conflicts with the full chromadb package. If # it got installed (e.g., from an older requirements-optional.txt), remove # it to prevent ChromaDB from silently failing in HTTP-only mode. if "$VENV_PY" -m pip show chromadb-client >/dev/null 2>&1; then - echo "▶ Cleaning up conflicting chromadb-client package…" - "$VENV_PY" -m pip uninstall -y chromadb-client - "$VENV_PY" -m pip install --force-reinstall chromadb + echo "▶ Cleaning up conflicting chromadb-client package…" + "$VENV_PY" -m pip uninstall -y chromadb-client + "$VENV_PY" -m pip install --force-reinstall chromadb fi # 4. First-run setup: creates data dirs and prints an initial admin password @@ -154,19 +162,39 @@ fi echo "▶ Preparing Odysseus…" ODYSSEUS_SKIP_RUN_HINT=1 ./venv/bin/python setup.py +# Local provider bootstrap. +# On Apple Silicon macOS, Apfel is treated as a sibling local model server +# to Ollama: if Homebrew has it installed, we start its OpenAI-compatible +# server on the port next to Ollama, since the default port is 11434 and that's busy (because of ollama). +MACHINE_ARCH="$(uname -m)" +APFEL_PID="" +if [ "$MACHINE_ARCH" = "arm64" ]; then + if command -v apfel >/dev/null 2>&1; then + APFEL_LOG="${TMPDIR:-/tmp}/odysseus-apfel.log" + echo "▶ Starting Apfel server in the background on port 11435…" + echo " logging to $APFEL_LOG" + nohup apfel --serve --port 11435 >"$APFEL_LOG" 2>&1 & + APFEL_PID=$! + else + echo "▶ Apfel is not installed (brew formula missing); skipping Apfel server bootstrap." + fi +else + echo "▶ Non-ARM macOS detected; skipping Apfel server bootstrap." +fi + # 5. Launch. Bind to loopback by default; opt into LAN/Tailscale with # ODYSSEUS_HOST=0.0.0.0. URL_HOST="$HOST" if [ "$URL_HOST" = "0.0.0.0" ] || [ "$URL_HOST" = "::" ]; then - URL_HOST="127.0.0.1" + URL_HOST="127.0.0.1" fi URL="http://$URL_HOST:$PORT" TAILSCALE_URL="" if [ "$HOST" = "0.0.0.0" ] && command -v tailscale >/dev/null 2>&1; then - TS_IP="$(tailscale ip -4 2>/dev/null | head -n 1 || true)" - if [ -n "$TS_IP" ]; then - TAILSCALE_URL="http://$TS_IP:$PORT" - fi + TS_IP="$(tailscale ip -4 2>/dev/null | head -n 1 || true)" + if [ -n "$TS_IP" ]; then + TAILSCALE_URL="http://$TS_IP:$PORT" + fi fi # Open the browser automatically once the server is accepting connections — so @@ -175,33 +203,33 @@ fi # ODYSSEUS_NO_OPEN=1 (e.g. over SSH / headless). POLLER_PID="" if [ -z "$ODYSSEUS_NO_OPEN" ] && command -v open >/dev/null 2>&1; then - ( - for _ in $(seq 1 90); do - if (exec 3<>"/dev/tcp/$PROBE_HOST/$PORT") 2>/dev/null; then - printf '\n' - printf ' ┌────────────────────────────────────────────┐\n' - printf ' │ ✓ Odysseus is ready — opening your browser │\n' - printf ' │ %-40s │\n' "$URL" - printf ' │ (Press Ctrl+C in this window to stop) │\n' - printf ' └────────────────────────────────────────────┘\n\n' - open "$URL" - break - fi - sleep 1 - done - ) & - POLLER_PID=$! + ( + for _ in $(seq 1 90); do + if (exec 3<>"/dev/tcp/$PROBE_HOST/$PORT") 2>/dev/null; then + printf '\n' + printf ' ┌────────────────────────────────────────────┐\n' + printf ' │ ✓ Odysseus is ready — opening your browser │\n' + printf ' │ %-40s │\n' "$URL" + printf ' │ (Press Ctrl+C in this window to stop) │\n' + printf ' └────────────────────────────────────────────┘\n\n' + open "$URL" + break + fi + sleep 1 + done + ) & + POLLER_PID=$! fi # Setup is done — drop the setup-failure handler, and clean up the background # opener when the server exits or the user presses Ctrl+C. trap - ERR -trap '[ -n "$POLLER_PID" ] && kill "$POLLER_PID" 2>/dev/null' EXIT INT TERM +trap '[ -n "$POLLER_PID" ] && kill "$POLLER_PID" 2>/dev/null; [ -n "$APFEL_PID" ] && kill "$APFEL_PID" 2>/dev/null' EXIT INT TERM echo echo "▶ Starting Odysseus — it will open in your browser at $URL" if [ -n "$TAILSCALE_URL" ]; then - echo " Tailscale/LAN URL: $TAILSCALE_URL" + echo " Tailscale/LAN URL: $TAILSCALE_URL" fi echo " (this takes a few seconds; press Ctrl+C here to stop)" echo diff --git a/static/app.js b/static/app.js index 08ab12161..8216d6485 100644 --- a/static/app.js +++ b/static/app.js @@ -1555,6 +1555,7 @@ function initializeEventListeners() { const MODE_TOOLS = [ { btnId: 'web-toggle-btn', checkboxId: 'web-toggle', stateKey: 'web' }, { btnId: 'bash-toggle-btn', checkboxId: 'bash-toggle', stateKey: 'bash' }, + { btnId: 'plan-toggle-btn', checkboxId: 'plan-toggle', stateKey: 'plan' }, ]; function _modeKey(stateKey, mode) { return `${stateKey}_${mode}`; } @@ -1563,6 +1564,9 @@ function initializeEventListeners() { const state = loadToggleState(); const key = _modeKey(stateKey, mode); if (Object.prototype.hasOwnProperty.call(state, key)) return !!state[key]; + // Plan mode is opt-in: never default it on, otherwise every agent turn + // would be forced into planning. + if (stateKey === 'plan') return false; return mode === 'agent'; // default: ON in agent, OFF in chat } @@ -1575,6 +1579,7 @@ function initializeEventListeners() { const TOOL_TOGGLE_TOAST_LABELS = { web: 'Web search', bash: 'Shell', + plan: 'Plan mode', }; function showToolToggleToast(stateKey, active) { @@ -1586,7 +1591,15 @@ function initializeEventListeners() { function applyModeToToggles(mode) { MODE_TOOLS.forEach(({ btnId, checkboxId, stateKey }) => { const btn = el(btnId); - if (!btn || btn.style.display === 'none') return; + if (!btn) return; + // Hide bash and plan buttons in chat mode + if (mode === 'chat' && (stateKey === 'bash' || stateKey === 'plan')) { + btn.style.display = 'none'; + return; + } + // Show buttons in agent mode (or for web toggle in any mode) + btn.style.display = ''; + if (btn.style.display === 'none') return; const on = loadToolPref(stateKey, mode); btn.classList.toggle('active', on); if (checkboxId) { const chk = el(checkboxId); if (chk) chk.checked = on; } @@ -1601,6 +1614,14 @@ function initializeEventListeners() { const state = loadToggleState(); let currentMode = state.mode || 'chat'; + // Immediately hide bash/plan buttons in chat mode on page load + if (currentMode === 'chat') { + const bashBtn = el('bash-toggle-btn'); + const planBtn = el('plan-toggle-btn'); + if (bashBtn) bashBtn.style.display = 'none'; + if (planBtn) planBtn.style.display = 'none'; + } + function setMode(mode) { currentMode = mode; const st = loadToggleState(); @@ -1688,6 +1709,81 @@ function initializeEventListeners() { } setupToggle('web-toggle-btn', 'web-toggle', 'web'); setupToggle('bash-toggle-btn', 'bash-toggle', 'bash'); + try { workspaceModule.initWorkspace(); } catch (_) {} + setupToggle('plan-toggle-btn', 'plan-toggle', 'plan'); + + // Set plan mode on/off directly (checkbox + button state + saved pref) WITHOUT + // going through the button's click handler — used by the plan menu and by the + // "Approve & Run" flow. Going through .click() would hit the plan-menu + // intercept below (a stored plan re-opens the menu instead of toggling), which + // is exactly the bug that left approved plans stuck in plan mode. + function _setPlanMode(on) { + const btn = el('plan-toggle-btn'); + const chk = el('plan-toggle'); + const mode = (loadToggleState().mode) || 'chat'; + if (chk) chk.checked = !!on; + if (btn) { btn.classList.toggle('active', !!on); btn.setAttribute('aria-pressed', String(!!on)); } + saveToolPref('plan', mode, !!on); + } + window._setPlanMode = _setPlanMode; + + // ── Plan-button menu ── + // When a plan exists for this chat, clicking the plan button opens a small + // menu (Show plan / Plan mode on-off) instead of plain-toggling — so the plan + // window can be re-opened and docked at any time while the agent works. With + // no plan, the button behaves as before (one-click toggle). + (function initPlanMenu() { + const planBtn = el('plan-toggle-btn'); + if (!planBtn) return; + const _hasPlan = () => { try { return !!(window._getStoredPlan && window._getStoredPlan()); } catch (_) { return false; } }; + const _close = () => { const m = document.getElementById('plan-menu'); if (m) m.remove(); }; + function _open() { + _close(); + const planChk = el('plan-toggle'); + const on = !!(planChk && planChk.checked); + const menu = document.createElement('div'); + menu.id = 'plan-menu'; + menu.className = 'overflow-menu plan-menu'; + menu.innerHTML = + '' + + ''; + document.body.appendChild(menu); + const r = planBtn.getBoundingClientRect(); + menu.style.position = 'fixed'; + menu.style.left = Math.round(r.left) + 'px'; + menu.style.top = Math.round(r.top - menu.offsetHeight - 6) + 'px'; + menu.querySelector('[data-act="show"]').addEventListener('click', () => { + _close(); + const txt = window._getStoredPlan ? window._getStoredPlan() : ''; + if (txt && window.planWindowModule) window.planWindowModule.openPlanWindow(txt, null); + }); + menu.querySelector('[data-act="toggle"]').addEventListener('click', () => { + _close(); + _setPlanMode(!on); // flip state directly (no click → no menu re-open) + }); + // Dismiss on any outside click (capture so it beats other handlers) / Escape. + setTimeout(() => { + const off = (e) => { + if (!menu.contains(e.target) && e.target !== planBtn) { + _close(); document.removeEventListener('click', off, true); document.removeEventListener('keydown', esc, true); + } + }; + const esc = (e) => { if (e.key === 'Escape') { _close(); document.removeEventListener('click', off, true); document.removeEventListener('keydown', esc, true); } }; + document.addEventListener('click', off, true); + document.addEventListener('keydown', esc, true); + }, 0); + } + planBtn.addEventListener('click', (e) => { + // With a stored plan, the button opens the menu (Show plan / toggle). + // Without one, it falls through to the normal one-click toggle. + if (_hasPlan()) { e.preventDefault(); e.stopImmediatePropagation(); _open(); } + }, true); // capture phase: intercept before setupToggle's bubble handler + })(); + try { workspaceModule.initWorkspace(); } catch (_) {} // Document editor toggle (special: uses module panel, not a checkbox) @@ -2417,7 +2513,7 @@ function initializeEventListeners() { }; // Keys hidden by default on first run (no localStorage yet) - const UI_VIS_DEFAULT_OFF = new Set(['models-section', 'rag-toggle-btn']); + const UI_VIS_DEFAULT_OFF = new Set(['models-section', 'rag-toggle-btn', 'text-emojis']); // Keys that need admin to toggle off (reserved for future use) const UI_VIS_ADMIN_ONLY = new Set([]); @@ -2445,11 +2541,9 @@ function initializeEventListeners() { document.querySelectorAll('.section[draggable]').forEach(el => { el.setAttribute('draggable', dragEnabled ? 'true' : 'false'); }); - // Text-only emojis toggle. Default is ON (the checkbox defaults to - // checked because text-emojis isn't in UI_VIS_DEFAULT_OFF), so treat - // an absent value as enabled — otherwise the toggle looked on at - // startup but the effect only activated after the user flipped it. - applyTextEmojis(state['text-emojis'] !== false); + // Text-only emojis toggle. Default is OFF so model-emitted shortcodes + // like `:blush:` render through the normal monochrome emoji path. + applyTextEmojis(state['text-emojis'] === true); // Hide thinking sections toggle (show-thinking: checked=show, unchecked=hide) document.body.classList.toggle('hide-thinking', state['show-thinking'] === false); } diff --git a/static/index.html b/static/index.html index a4637d350..522129fe9 100644 --- a/static/index.html +++ b/static/index.html @@ -307,13 +307,22 @@ Add a memory — e.g. 'I prefer concise replies' +

Add Skill

-

Create a skill by hand — title, what it solves, and an approach.

+

Import a skill from GitHub or skills.sh (folder with SKILL.md and optional templates).

+
+
+ + Import URL — e.g. GitHub tree link to a skill folder +
+ +
+

Or create a skill by hand — title, what it solves, and an approach.

Title — short name, e.g. “build-vllm-wheel” @@ -331,7 +340,7 @@ Tags — comma-separated, e.g. python, build, vllm
- +
@@ -1075,6 +1084,12 @@ + +
+
diff --git a/static/js/admin.js b/static/js/admin.js index 5019096af..e4a39adf3 100644 --- a/static/js/admin.js +++ b/static/js/admin.js @@ -5,6 +5,7 @@ import uiModule from './ui.js'; import settingsModule from './settings.js'; import { providerLogo } from './providers.js'; import { sortModelObjects } from './modelSort.js'; +import { PROVIDER_DEVICE_FLOWS, formatDeviceFlowError, runProviderDeviceFlow } from './providerDeviceFlow.js'; let initialized = false; let modalEl = null; @@ -87,8 +88,12 @@ async function loadUsers() { `; // Allowed models — checkbox list - const allowedSet = new Set((u.privileges && u.privileges.allowed_models) || []); - const allEmpty = allowedSet.size === 0; + const allowedModels = Array.isArray(u.privileges && u.privileges.allowed_models) + ? u.privileges.allowed_models + : []; + const allowedSet = new Set(allowedModels); + const modelsRestricted = !!(u.privileges && u.privileges.allowed_models_restricted); + const blockAllModels = !!(u.privileges && u.privileges.block_all_models); html += `
Allowed models @@ -97,7 +102,7 @@ async function loadUsers() { None
-
${allEmpty ? 'All models allowed (no restrictions)' : allowedSet.size + ' model(s) allowed'}
+
${blockAllModels ? 'No models allowed' : (!modelsRestricted ? 'All models allowed (no restrictions)' : (allowedSet.size === 0 ? 'No models allowed' : allowedSet.size + ' model(s) allowed'))}
Loading models...
@@ -119,7 +124,7 @@ async function loadUsers() { // Load models list on first expand if (!_modelsLoaded && !privPanel.classList.contains('hidden')) { _modelsLoaded = true; - _loadModelsForUser(u.username, allowedSet, privPanel); + _loadModelsForUser(u.username, allowedSet, modelsRestricted, blockAllModels, privPanel); } }); @@ -199,26 +204,32 @@ async function loadUsers() { } catch (e) { list.innerHTML = '
Failed to load users
'; } } -async function _loadModelsForUser(username, allowedSet, privPanel) { +async function _loadModelsForUser(username, allowedSet, modelsRestricted, blockAllModels, privPanel) { const listEl = privPanel.querySelector(`.priv-models-list[data-user="${username}"]`); if (!listEl) return; try { - const res = await fetch('/api/models', { credentials: 'same-origin' }); + // Use /api/model-endpoints rather than /api/models — the latter is + // backed by `cached_models`, so endpoints that haven't been probed yet + // (e.g. a freshly-added cloud API like DeepSeek) simply don't show up + // until some other endpoint happens to trigger a cache refresh. The + // endpoints listing always reflects every configured endpoint. + const res = await fetch('/api/model-endpoints', { credentials: 'same-origin' }); const data = await res.json(); const allModels = []; - (data.items || []).forEach(item => { - if (item.offline) return; - (item.models || []).forEach(mid => { - allModels.push({ mid, epName: item.endpoint_name || '', display: mid.split('/').pop() }); + (Array.isArray(data) ? data : []).forEach(ep => { + if (!ep.online) return; + (ep.models || []).forEach(mid => { + allModels.push({ mid, epName: ep.name || '', display: mid.split('/').pop() }); }); }); if (!allModels.length) { listEl.innerHTML = 'No models available'; return; } - const allEmpty = allowedSet.size === 0; + let restricted = modelsRestricted; + let blockAll = blockAllModels; listEl.innerHTML = sortModelObjects(allModels).map(m => { - const checked = allEmpty || allowedSet.has(m.mid) ? 'checked' : ''; + const checked = !blockAll && (!restricted || allowedSet.has(m.mid)) ? 'checked' : ''; return `