Merge remote-tracking branch 'origin/dev' into test-main-dev-merge-20260615

# Conflicts:
#	src/tool_implementations.py
#	static/js/research/panel.js
This commit is contained in:
pewdiepie-archdaemon
2026-06-15 21:20:15 +09:00
312 changed files with 20047 additions and 2952 deletions
+6
View File
@@ -10,6 +10,12 @@ dist/
build/
.env
.env.bak.*
# Secrets: keep plaintext and every transient secrets.env variant out of
# the build context. If an encrypted secrets.env is used, it is mounted
# at runtime — never baked into the image. Mirrored in .gitignore.
secrets.env
secrets.env.*
!secrets.env.example
/data/
/logs/
.git/
+7
View File
@@ -190,3 +190,10 @@ SEARXNG_INSTANCE=http://localhost:8080
# These overlays only expose the GPU devices. The slim Odysseus image
# still needs CUDA/ROCm userspace via Cookbook -> Dependencies (vLLM,
# llama-cpp-python, etc.) before models can actually serve on GPU.
# ============================================================
# Storage Paths (Docker Compose)
# ============================================================
# APP_DATA_DIR=./data
# APP_LOGS_DIR=./logs
+9
View File
@@ -0,0 +1,9 @@
# Code owners.
#
# Intentionally empty for now. The catch-all rule that mapped every path to a
# single owner froze all merges the moment "Require review from Code Owners"
# was enabled, because no other maintainer's approval could satisfy the gate.
# A per-area ownership map (security/auth, CI, frontend, agent internals, with
# multiple named owners per line) is being worked out in issue #593; once
# agreed it replaces this file. Until then, required reviews and the security
# CI gate (docs/security-ci.md) remain in force via branch protection.
+48
View File
@@ -0,0 +1,48 @@
# Dependabot keeps dependencies and pinned action versions current.
#
# Why this matters for security: every workflow in this repo pins its GitHub
# Actions to an exact commit (a SHA), which is safe but freezes them in time.
# Dependabot opens a small, reviewable pull request whenever a newer version
# exists -- for Python packages, npm packages, the Docker base image, and the
# pinned Actions themselves -- so staying patched does not require manual work.
# Updates are grouped so a week's bumps arrive as one PR per ecosystem, not a
# flood of separate ones.
version: 2
updates:
# Python dependencies (requirements.txt + requirements-optional.txt).
- package-ecosystem: pip
directory: "/"
schedule:
interval: weekly
open-pull-requests-limit: 5
groups:
python:
patterns: ["*"]
# Frontend / tooling npm packages (package.json).
- package-ecosystem: npm
directory: "/"
schedule:
interval: weekly
open-pull-requests-limit: 5
groups:
npm:
patterns: ["*"]
# The pinned action SHAs used across .github/workflows.
- package-ecosystem: github-actions
directory: "/"
schedule:
interval: weekly
open-pull-requests-limit: 5
groups:
actions:
patterns: ["*"]
# The Docker base image in the Dockerfile.
- package-ecosystem: docker
directory: "/"
schedule:
interval: weekly
open-pull-requests-limit: 5
+123
View File
@@ -0,0 +1,123 @@
# Pull Request Review Template
Use this shape as a copyable reference for substantive PR reviews; GitHub does
not auto-apply this file to review comments. Omit sections that do not add
useful signal. Lead with confirmed findings; keep speculative notes out of the
public review unless they are framed as a concrete open question.
## Small PR Path
For narrow docs, typo, test-only, or obvious local fixes, a short review is
enough:
```md
LGTM after checking:
- scope:
- validation:
- residual risk:
```
Use the fuller structure below for larger, risky, multi-finding, or
security-sensitive reviews.
## Findings
**<sub><sub>![P2 Badge](https://img.shields.io/badge/P2-yellow?style=flat)</sub></sub> issue (test): Short issue title**
- **Problem:** Concrete broken flow, contract, input, or risk.
- **Impact:** Why this matters to users, CI, maintainers, data, security, or scale.
- **Ask:** Smallest practical correction or decision the author should make.
- **Location:** `path:line`
## Open Questions
- **question (scope, non-blocking): Short author question** Ask the concrete
intent, scope, or tradeoff question.
## Validation
- Ran:
- Not run:
- Residual risk:
## PR Hygiene
- Target/template/checks:
- Related, duplicate, or superseding context:
## No Findings Variant
```md
## Findings
none confirmed
## Validation
- Ran:
- Not run:
- Residual risk:
```
## Legend
- **Findings:** Verified, author-actionable issues that should be fixed or
consciously accepted before merge.
- **Priority badges:** The shields.io badges below are optional formatting for
priority labels. Plain `P0`, `P1`, `P2`, or `P3` text is also acceptable when
an external image dependency is undesirable or may not render.
- **P0:** `![P0 Badge](https://img.shields.io/badge/P0-red?style=flat)` -
release-blocking or actively dangerous.
- **P1:** `![P1 Badge](https://img.shields.io/badge/P1-orange?style=flat)` -
serious bug, security risk, data-loss risk, or broken primary flow.
- **P2:** `![P2 Badge](https://img.shields.io/badge/P2-yellow?style=flat)` -
meaningful correctness, test, maintainability, or edge-case issue.
- **P3:** `![P3 Badge](https://img.shields.io/badge/P3-lightgrey?style=flat)` -
minor polish or low-risk cleanup.
- **Intent labels:**
- **`issue`:** A confirmed defect, regression, broken contract, or concrete
risk.
- **`suggestion`:** A non-blocking improvement that would make the PR clearer,
safer, or easier to maintain.
- **`nit`:** A tiny, non-blocking cleanup or style note. Use it only when the
author can safely ignore it without changing the review outcome.
- **`question`:** A real author-facing clarification about intent, scope, or
tradeoffs. Do not use questions to hide an issue that should be stated
directly.
- **`LGTM`:** "Looks good to me." Use only when the review found no blocking
issues, or when any remaining notes are clearly optional.
- **Decorations:** Optional labels in parentheses that clarify the finding type,
scope, or merge impact.
- **`security`:** Auth, authorization, ownership, secrets, SSRF, injection,
unsafe external input, or other trust-boundary concerns.
- **`test`:** Missing, failing, misleading, brittle, or insufficient tests.
- **`scope`:** PR scope, feature boundaries, unrelated churn, or work that
should be split into a separate issue or PR.
- **`ci`:** CI configuration, workflow failures, flaky checks, or validation
signal quality.
- **`api`:** Route, request/response, public function, schema, persistence, or
integration contract changes.
- **`docs`:** User-facing docs, contributor docs, examples, or comments that
need to change with the code.
- **`non-blocking`:** Useful feedback that should not prevent merge by
itself.
- **Finding fields:**
- **Problem:** What is wrong, what contract is ambiguous, or what risk the PR
introduces.
- **Impact:** Why the problem matters in practical terms.
- **Ask:** The smallest concrete fix, test, or decision requested from the PR
author.
- **Location:** The most useful repo-relative file and line reference for the
finding, using `path:line`.
- **Optional sections:**
- **Open Questions:** Genuine scope or intent questions; omit when there are
no real questions.
- **Validation:** What the reviewer ran, what was intentionally not run, and
what risk remains after review.
- **PR Hygiene:** Target-branch, template, CI/check, duplicate, related-work,
or superseding-PR notes.
- **`none confirmed`:** Use only when no review-worthy findings were confirmed;
still list validation gaps or residual risk when relevant.
+6 -6
View File
@@ -19,10 +19,10 @@ jobs:
name: Python syntax (compileall)
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
with:
persist-credentials: false
- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: "3.11"
# Byte-compile sources — catches syntax errors without installing deps.
@@ -32,10 +32,10 @@ jobs:
name: JS syntax (node --check)
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
with:
persist-credentials: false
- uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
- uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
with:
node-version: "20"
# Syntax-check our own JS (skip vendored libs in static/lib).
@@ -54,7 +54,7 @@ jobs:
# ROADMAP "fresh install smoke tests" item; make this required once green.
continue-on-error: true
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
with:
fetch-depth: 0
persist-credentials: false
@@ -81,7 +81,7 @@ jobs:
echo "docs_only=false" >> "$GITHUB_OUTPUT"
fi
- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
if: steps.docs-check.outputs.docs_only != 'true'
with:
python-version: "3.11"
+61
View File
@@ -0,0 +1,61 @@
# CodeQL code scanning
#
# Purpose: GitHub's own static analysis engine reads the application source
# (Python backend + the JavaScript frontend) and looks for real
# vulnerabilities -- SQL/command injection, path traversal, auth mistakes,
# unsafe deserialization. Findings appear in the repo's Security tab. This is
# the deepest check in the suite and the most valuable for a high-profile
# target.
#
# It runs on every push to main and on a weekly schedule (to catch newly
# disclosed query patterns against unchanged code). It deliberately does NOT
# run on pull requests: most PRs here come from forks, whose read-only token
# cannot publish results, which would produce confusing failures. To scan pull
# requests too, a maintainer can instead enable CodeQL "default setup" in
# Settings -> Security -> Code scanning (one toggle, no file needed) -- see
# docs/security-ci.md.
name: CodeQL
on:
push:
branches: [main]
schedule:
# Weekly, Monday 06:00 UTC.
- cron: '0 6 * * 1'
workflow_dispatch:
permissions: {}
concurrency:
group: codeql-${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
analyze:
name: Analyze (${{ matrix.language }})
runs-on: ubuntu-latest
permissions:
contents: read
security-events: write # publish results to the Security tab
strategy:
fail-fast: false
matrix:
# Both are interpreted, so CodeQL needs no build step (build-mode none).
language: [python, javascript-typescript]
steps:
- name: Checkout repository
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
with:
persist-credentials: false
- name: Initialize CodeQL
uses: github/codeql-action/init@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4.36.2
with:
languages: ${{ matrix.language }}
build-mode: none
- name: Perform CodeQL analysis
uses: github/codeql-action/analyze@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4.36.2
with:
category: "/language:${{ matrix.language }}"
+52
View File
@@ -0,0 +1,52 @@
# Container security: Dockerfile lint
#
# Purpose: the Docker image is how most people run Odysseus, so it is part of
# the attack surface. hadolint lints the Dockerfile for mistakes and insecure
# patterns (running as root longer than needed, unpinned base image, bad apt
# usage). Blocking.
#
# The image vulnerability scan (Trivy, advisory) lives in its own file,
# container-trivy.yml. Keeping it separate lets that advisory scan be
# path-filtered and held to a read-only token on pull requests without
# weakening this blocking gate, which must always report so a required check
# never hangs.
#
# Note: a separate open PR (#120) proposes a local `scripts/scan_image.py`.
# This job is complementary -- it is a CI gate, not a script a contributor has
# to remember to run.
name: Container scan
on:
pull_request:
push:
branches: [main]
workflow_dispatch:
permissions: {}
concurrency:
group: container-scan-${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
hadolint:
name: hadolint (Dockerfile lint)
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- name: Checkout repository
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
with:
persist-credentials: false
- name: Lint Dockerfile
uses: hadolint/hadolint-action@2332a7b74a6de0dda2e2221d575162eba76ba5e5 # v3.3.0
with:
dockerfile: Dockerfile
# DL3008: pinning apt package versions is impractical on a -slim base
# image. Debian purges old package versions from its repos, so a
# pinned version breaks future rebuilds. The base image itself is
# what should be pinned (tracked by Dependabot's docker ecosystem).
ignore: DL3008
+125
View File
@@ -0,0 +1,125 @@
# Container image vulnerability scan (advisory)
#
# Trivy builds the application image and scans it for known-vulnerable OS and
# Python packages. Advisory only -- it reports findings to the repo's Security
# tab without blocking a merge, because the image inevitably contains
# already-known CVEs in upstream packages that are not this project's bug.
#
# Split from the Dockerfile lint (container-scan.yml) for two reasons:
#
# - Least privilege. The image build runs Dockerfile instructions, which on a
# pull request are attacker-influenceable. That path (the `scan` job) is
# held to a read-only token and never publishes results. Only `publish`,
# which runs on push to main (curated, fast-forwarded from reviewed dev),
# gets security-events:write to upload SARIF.
# - Cost. Docs-only changes do not rebuild the image (paths-ignore below),
# matching docker-publish.yml. hadolint stays on the broad trigger in
# container-scan.yml so the blocking gate always reports.
name: Container scan (Trivy)
on:
pull_request:
paths-ignore:
- '**.md'
- 'docs/**'
- '.github/ISSUE_TEMPLATE/**'
push:
branches: [main]
paths-ignore:
- '**.md'
- 'docs/**'
- '.github/ISSUE_TEMPLATE/**'
workflow_dispatch:
permissions: {}
concurrency:
group: container-trivy-${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
# Pull requests and manual runs: build and scan under a read-only token.
# The build executes PR-supplied Dockerfile instructions, so this job must
# not hold any write scope, and it does not upload to the Security tab.
scan:
name: Trivy (image scan, advisory)
if: github.event_name != 'push'
runs-on: ubuntu-latest
# Advisory: a CVE in an upstream package must not block a PR.
continue-on-error: true
permissions:
contents: read
steps:
- name: Checkout repository
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
with:
persist-credentials: false
- name: Set up Buildx
uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5 # v4.1.0
# Build without pushing so a broken Dockerfile is caught here, and the
# exact image we ship is what gets scanned.
- name: Build image
uses: docker/build-push-action@f9f3042f7e2789586610d6e8b85c8f03e5195baf # v7.2.0
with:
context: .
push: false
load: true
tags: odysseus:ci
- name: Scan image with Trivy
uses: aquasecurity/trivy-action@ed142fd0673e97e23eac54620cfb913e5ce36c25 # v0.36.0
with:
image-ref: odysseus:ci
format: table
ignore-unfixed: true
env:
# Pin the vuln DB source to GHCR to avoid rate-limited Docker Hub
# mirrors that flake on shared runners.
TRIVY_DB_REPOSITORY: ghcr.io/aquasecurity/trivy-db:2
# Push to main only: build, scan, and publish SARIF to the Security tab.
# This is the only path that runs trusted code, so it is the only one granted
# security-events:write.
publish:
name: Trivy (image scan + SARIF upload)
if: github.event_name == 'push'
runs-on: ubuntu-latest
continue-on-error: true
permissions:
contents: read
security-events: write # upload SARIF to the Security tab
steps:
- name: Checkout repository
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
with:
persist-credentials: false
- name: Set up Buildx
uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5 # v4.1.0
- name: Build image
uses: docker/build-push-action@f9f3042f7e2789586610d6e8b85c8f03e5195baf # v7.2.0
with:
context: .
push: false
load: true
tags: odysseus:ci
- name: Scan image with Trivy
uses: aquasecurity/trivy-action@ed142fd0673e97e23eac54620cfb913e5ce36c25 # v0.36.0
with:
image-ref: odysseus:ci
format: sarif
output: trivy-results.sarif
ignore-unfixed: true
env:
TRIVY_DB_REPOSITORY: ghcr.io/aquasecurity/trivy-db:2
- name: Upload Trivy results
uses: github/codeql-action/upload-sarif@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4.36.2
with:
sarif_file: trivy-results.sarif
category: trivy-image
+71
View File
@@ -0,0 +1,71 @@
# Supply-chain review
#
# Purpose: defend against "side-chain" / supply-chain attacks -- a pull request
# that adds (or bumps) a dependency to a version with a known vulnerability or a
# disallowed license. Two layers:
#
# - dependency-review: runs ONLY on pull requests. It compares the
# dependencies before and after the PR and blocks the merge if the change
# pulls in a package with a known security advisory. This is the gate.
# - pip-audit: scans the project's current Python requirements against the
# advisory database. Advisory only (it never blocks a merge), because it can
# flag a pre-existing issue in an already-shipped dependency.
name: Dependency review
on:
pull_request:
push:
branches: [main]
workflow_dispatch:
# Default-deny token; jobs grant only read access.
permissions: {}
concurrency:
group: dependency-review-${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
dependency-review:
name: dependency-review (PR gate)
# Only meaningful on a pull request -- it needs a base..head diff to review.
if: github.event_name == 'pull_request'
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- name: Checkout repository
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
with:
persist-credentials: false
- name: Review dependency changes
uses: actions/dependency-review-action@a1d282b36b6f3519aa1f3fc636f609c47dddb294 # v5.0.0
with:
# Fail the PR on any newly introduced moderate-or-worse advisory.
fail-on-severity: moderate
pip-audit:
name: pip-audit (advisory)
runs-on: ubuntu-latest
# Advisory: report known-vulnerable Python deps without blocking the merge.
continue-on-error: true
permissions:
contents: read
steps:
- name: Checkout repository
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
with:
persist-credentials: false
- name: Set up Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: '3.12'
- name: Run pip-audit on requirements
run: |
set -euo pipefail
pip install pip-audit==2.10.0
pip-audit -r requirements.txt -r requirements-optional.txt --strict
+60
View File
@@ -0,0 +1,60 @@
# Secret scanning
#
# Purpose: stop credentials (API keys, tokens, passwords, private keys) from
# ever living in the Git history. Odysseus deliberately keeps real secrets in
# files that are gitignored (.env, data/), but a slip in a future commit -- or a
# malicious pull request that sneaks one in -- would otherwise go unnoticed.
# This job reads the repository and the full commit history and fails if it
# finds anything that looks like a secret.
#
# It runs the official gitleaks BINARY directly (pinned to an exact version and
# verified against the project's published SHA-256 checksum) rather than the
# gitleaks GitHub Action, because the Action asks for a paid license on
# organization-owned repos. The binary is free and behaves identically.
name: Secret scan
on:
pull_request:
push:
branches: [main]
workflow_dispatch:
# Start with zero permissions; the single job opts back in to read-only.
permissions: {}
concurrency:
group: secret-scan-${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
gitleaks:
name: gitleaks
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- name: Checkout repository
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
with:
# Full history so a secret committed in an earlier commit (and later
# deleted) is still caught -- deletion does not remove it from Git.
fetch-depth: 0
persist-credentials: false
# Pinned version + checksum so a tampered release binary cannot run here.
# Bump VERSION/SHA256 together; the checksum comes from the matching
# gitleaks_<version>_checksums.txt on the GitHub release.
- name: Run gitleaks (pinned, checksum-verified)
env:
GITLEAKS_VERSION: 8.30.1
GITLEAKS_SHA256: 551f6fc83ea457d62a0d98237cbad105af8d557003051f41f3e7ca7b3f2470eb
run: |
set -euo pipefail
TARBALL="gitleaks_${GITLEAKS_VERSION}_linux_x64.tar.gz"
curl -fsSL -o "${TARBALL}" \
"https://github.com/gitleaks/gitleaks/releases/download/v${GITLEAKS_VERSION}/${TARBALL}"
echo "${GITLEAKS_SHA256} ${TARBALL}" | sha256sum -c -
tar -xzf "${TARBALL}" gitleaks
# Scan the whole history. Findings print to the log and fail the job.
./gitleaks git --no-banner --redact --verbose .
+80
View File
@@ -0,0 +1,80 @@
# Workflow security (CI that audits the CI)
#
# Purpose: the GitHub Actions workflows themselves are an attack surface. A
# poorly written workflow can leak the repository token, run attacker-supplied
# code from a pull request, or pull in a tampered third-party action. These two
# tools check every workflow file in this repo for those mistakes:
#
# - actionlint: catches workflow syntax errors and shell-script bugs inside
# `run:` steps before they reach main.
# - zizmor: a security linter for Actions. Flags template-injection holes,
# unpinned actions, credential persistence, and over-broad token
# permissions -- exactly the patterns the rest of this CI is built to avoid.
#
# Add this early: it then audits every workflow added after it.
name: Workflow security
on:
pull_request:
push:
branches: [main]
workflow_dispatch:
# Default-deny token; each job grants only read access to the code.
permissions: {}
concurrency:
group: workflow-security-${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
actionlint:
name: actionlint
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- name: Checkout repository
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
with:
persist-credentials: false
# Pinned version + checksum so a tampered binary cannot run here.
- name: Run actionlint (pinned, checksum-verified)
env:
ACTIONLINT_VERSION: 1.7.12
ACTIONLINT_SHA256: 8aca8db96f1b94770f1b0d72b6dddcb1ebb8123cb3712530b08cc387b349a3d8
run: |
set -euo pipefail
TARBALL="actionlint_${ACTIONLINT_VERSION}_linux_amd64.tar.gz"
curl -fsSL -o "${TARBALL}" \
"https://github.com/rhysd/actionlint/releases/download/v${ACTIONLINT_VERSION}/${TARBALL}"
echo "${ACTIONLINT_SHA256} ${TARBALL}" | sha256sum -c -
tar -xzf "${TARBALL}" actionlint
./actionlint -color
zizmor:
name: zizmor (Actions SAST)
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- name: Checkout repository
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
with:
persist-credentials: false
- name: Set up Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: '3.12'
# Pinned zizmor release. --offline keeps the audit hermetic (no network
# calls about the actions it inspects); --min-severity=low surfaces
# everything so nothing slips through under the gate.
- name: Run zizmor
run: |
set -euo pipefail
pip install zizmor==1.25.2
zizmor --offline --min-severity=low .github/workflows/
+13
View File
@@ -14,6 +14,15 @@ venv/
.env
.env.bak.*
!.env.example
# Local uv lockfile (optional, per-platform — see "Faster installs with uv" in README)
requirements.lock
# SOPS workflow — encrypted `secrets.env` is intentionally committable,
# but every variant (plaintext, manual decrypt copy, editor backup)
# must stay out of git. Mirrored in .dockerignore so the same artifacts
# also cannot enter image build layers.
secrets.env.*
!secrets.env.example
# Data — all user data stays local
data/
@@ -61,6 +70,9 @@ output.txt.txt
*.tiff
*.pdf
# …except shipped static assets
!static/icons/*.png
# …except shipped demo assets in docs/ that the README links to.
!docs/*.jpg
!docs/*.jpeg
@@ -89,3 +101,4 @@ docs/windows-port/
compound.config.json
*.error.log
_scratch/
/odysseus/
+1 -1
View File
@@ -1,4 +1,4 @@
FROM python:3.12-slim
FROM python:3.14-slim
# System deps. tmux is required by Cookbook for background downloads/serves.
# openssh-client is required for Cookbook remote server tests, setup, probes,
+34 -2
View File
@@ -12,6 +12,8 @@
A self-hosted AI workspace -- meant to be the self-hosted version of the UI experience you get from ChatGPT and Claude. But with more jank and fun. Running on your own hardware, with your own data -- local-first, privacy-first, and no trojan.
[![Packaging status](https://repology.org/badge/vertical-allrepos/odysseus-ai.svg)](https://repology.org/project/odysseus-ai/versions)
## Features
- **Chat** -- chat with any local model or API; adding them is super simple.<br> <sub>vLLM · llama.cpp · Ollama · OpenRouter · OpenAI · GitHub Copilot</sub>
- **Agent** -- hand it tools and let it run the whole task itself.<br> <sub>built on [opencode](https://github.com/anomalyco/opencode) · MCP · web · files · shell · skills · memory</sub>
@@ -73,6 +75,10 @@ binds the web UI to `127.0.0.1` by default. If the port is taken, set
`APP_PORT=7001` in `.env` and recreate the container. Set `APP_BIND=0.0.0.0`
only when you intentionally want LAN/reverse-proxy access.
> **On Apple Silicon (M-series) Macs:** Docker can't reach the Metal GPU, so
> Cookbook serves local models on CPU only. For GPU-accelerated model serving,
> run natively instead — see [Apple Silicon](#apple-silicon) below.
### Native Linux / macOS
```bash
git clone https://github.com/pewdiepie-archdaemon/odysseus.git
@@ -218,7 +224,7 @@ docker compose exec odysseus sh -lc 'test -e /dev/kfd && test -d /dev/dri && ls
> the CUDA Toolkit at runtime. If Cookbook logs show `Unable to find cudart
> library`, `Could NOT find CUDAToolkit`, `CUDA Toolkit not found`, or
> tensors/layers assigned to CPU, that is a Cookbook/llama.cpp build issue —
> not a Docker passthrough failure. Re-install the serve engine via
> not a Docker passthrough failure. Reinstall the serve engine via
> **Cookbook → Dependencies** to get a CUDA-enabled build.
>
> The same split applies to AMD/ROCm: seeing `/dev/kfd` and `/dev/dri` inside
@@ -329,10 +335,29 @@ To expose Odysseus on a local network or Tailscale with HTTPS:
| Package | Feature unlocked |
|---------|-----------------|
| `faster-whisper` | Local speech-to-text (microphone -> text) via the "local" STT provider. |
| `duckduckgo-search` | DuckDuckGo as a search provider option. |
| `ddgs` | DuckDuckGo as a search provider option. |
| `PyMuPDF` | PDF page rendering in the side viewer panel and form-filling. (Note: AGPL-3.0) |
| `markitdown` | Office/EPUB document text extraction (converts .docx/.xlsx/.pptx/.xls/.epub to Markdown). |
### Faster, reproducible installs with uv (optional)
[uv](https://docs.astral.sh/uv/) works as a drop-in replacement for the
venv + pip steps in the native install guides, no project changes are needed but this change results in faster installs along with a lockfile for reproducible environments. After [installing `uv`](https://docs.astral.sh/uv/getting-started/installation/), use:
```bash
uv venv venv --python 3.13
uv pip install -r requirements.txt
# then continue as usual: python setup.py, uvicorn, ...
```
`requirements.txt` is intentionally unpinned, so two installs at different times can produce different package versions. If you want a reproducible environment (e.g. across your own machines, or to roll back after a bad upgrade), snapshot and restore exact versions with:
```bash
uv pip compile requirements.txt -o requirements.lock # snapshot current resolution
uv pip sync requirements.lock # reproduce it exactly later
```
`requirements.lock` is gitignored and platform-specific (compile it on the OS you deploy to). Regenerate it deliberately when you want to take upgrades. The plain `uv pip install -r requirements.txt` keeps following the unpinned requirements like pip does.
### Outlook / Office 365 email
Odysseus email accounts currently use IMAP/SMTP username-password auth. Outlook
and Microsoft 365 generally require OAuth instead, so normal Microsoft mailbox
@@ -364,6 +389,7 @@ Odysseus serves plain HTTP on its app port. Docker Compose binds Odysseus and th
4. Keep raw service and model ports internal-only.
Cloudflare Access, Tailscale, Caddy, nginx, and Traefik can all fit this pattern; none are required by Odysseus. If your access layer reaches Odysseus on the same host, proxy to `http://127.0.0.1:7000` and keep `AUTH_ENABLED=true`, `LOCALHOST_BYPASS=false`, and `SECURE_COOKIES=true`.
`ALLOWED_ORIGINS` lists exact permitted origins for cross-origin browser/API clients; ordinary same-origin reverse-proxy access usually does not need a special CORS entry.
Common internal-only ports from the default docs/compose setup:
@@ -395,8 +421,11 @@ Key settings:
| `SEARXNG_SECRET` | generated on first Docker boot | Optional SearXNG cookie/CSRF secret. Leave blank unless you need to pin it. |
| `APP_BIND` | `127.0.0.1` | Docker Compose host bind address for the web UI. Use `0.0.0.0` only for intentional LAN/reverse-proxy access. |
| `APP_PORT` | `7000` | Docker Compose host port for the web UI. |
| `APP_DATA_DIR` | `./data` | Docker Compose host directory for application data volumes. |
| `APP_LOGS_DIR` | `./logs` | Docker Compose host directory for application logs. |
| `AUTH_ENABLED` | `true` | Enable/disable login |
| `LOCALHOST_BYPASS` | `false` | Development-only auth bypass for loopback requests. Keep false for shared/network deployments. |
| `ALLOWED_ORIGINS` | `http://localhost,http://127.0.0.1` | Comma-separated exact permitted origins for cross-origin browser/API clients. |
| `SECURE_COOKIES` | `false` | Set true when serving Odysseus through HTTPS at a trusted proxy or private access gateway. |
| `DATABASE_URL` | `sqlite:///./data/app.db` | Database connection string |
| `CHROMADB_HOST` | `localhost` | ChromaDB host for vector memory. Docker overrides this to `chromadb`. |
@@ -440,6 +469,9 @@ docs/ landing page (index.html) + preview clips
All user data lives in `data/` (gitignored): `app.db` (sessions, messages, documents),
`memory.json`, `presets.json`, `uploads/`, `personal_docs/`, `chroma/`, `settings.json`.
To back up or restore everything in `data/`, see the
[Backup & Restore guide](docs/backup-restore.md).
## Star History
<a href="https://www.star-history.com/?repos=pewdiepie-archdaemon%2Fodysseus&type=date&legend=top-left">
+78 -17
View File
@@ -47,6 +47,7 @@ from fastapi.responses import JSONResponse, FileResponse, HTMLResponse
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.middleware.gzip import GZipMiddleware
# Core imports
from core.constants import (
@@ -55,7 +56,7 @@ from core.constants import (
)
from core.database import SessionLocal, ApiToken
from core.middleware import SecurityHeadersMiddleware, is_cors_preflight
from core.auth import AuthManager
from core.auth import AuthManager, normalize_known_username
from core.exceptions import (
SessionNotFoundError, InvalidFileUploadError,
LLMServiceError, WebSearchError,
@@ -68,10 +69,37 @@ from src.generated_images import GENERATED_IMAGE_HEADERS, resolve_generated_imag
from starlette.responses import RedirectResponse
# ========= LOGGING =========
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
)
import logging.handlers
from core.constants import DATA_DIR
_root_logger = logging.getLogger()
_root_logger.setLevel(logging.INFO)
_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
# Clear existing handlers to avoid duplicates
for _h in list(_root_logger.handlers):
_root_logger.removeHandler(_h)
_console_h = logging.StreamHandler()
_console_h.setFormatter(_formatter)
_root_logger.addHandler(_console_h)
try:
_log_dir = os.path.join(DATA_DIR, "logs")
os.makedirs(_log_dir, exist_ok=True)
_log_file = os.path.join(_log_dir, "app.log")
# RotatingFileHandler is not multi-process safe (e.g. if uvicorn is run with --workers N).
# Odysseus is single-process by convention, so this is acceptable, but be aware that
# concurrent log rotation issues can arise if multiple workers are configured.
_file_h = logging.handlers.RotatingFileHandler(
_log_file, maxBytes=5 * 1024 * 1024, backupCount=3, encoding="utf-8"
)
_file_h.setFormatter(_formatter)
_root_logger.addHandler(_file_h)
except Exception as e:
_root_logger.warning(f"Failed to initialize file logging handler (falling back to console-only): {e}")
logger = logging.getLogger(__name__)
# ========= APP =========
@@ -104,6 +132,16 @@ app.add_middleware(
],
)
# ========= RESPONSE COMPRESSION (gzip) =========
# The frontend's text assets (style.css, index.html, the JS bundles) shipped
# uncompressed on every cold load. gzip cuts CSS/JS/HTML by ~75-85% on the wire
# with no behavioural change. Starlette's GZipMiddleware excludes
# `text/event-stream` by default, so the SSE streams (chat, shell, research,
# model-probe — all served with media_type="text/event-stream") are never
# compressed or buffered; only complete bodies over minimum_size are. The
# security-header middleware composes cleanly on top.
app.add_middleware(GZipMiddleware, minimum_size=1024, compresslevel=6)
# ========= SECURITY HEADERS MIDDLEWARE =========
app.add_middleware(SecurityHeadersMiddleware)
@@ -129,6 +167,7 @@ _TIMEOUT_EXEMPT_PREFIXES = (
"/api/cookbook/setup", # remote pacman/apt installs
"/api/upload", # large files
"/api/image", # diffusion proxies (inpaint/harmonize/upscale/etc.) — own 120s httpx timeout
"/api/memory/audit", # retains own 120s LLM inactivity timeout
)
@@ -217,8 +256,16 @@ if AUTH_ENABLED:
try:
rows = db.query(ApiToken).filter(ApiToken.is_active == True).all()
for r in rows:
owner_key = normalize_known_username(auth_manager.users, getattr(r, "owner", None))
if not owner_key:
logger.warning(
"Ignoring active API token '%s' for unknown auth user '%s'",
getattr(r, "id", ""),
getattr(r, "owner", None),
)
continue
scopes = [s.strip() for s in (getattr(r, "scopes", "") or "chat").split(",") if s.strip()]
new_map[r.token_prefix].append((r.id, r.token_hash, getattr(r, "owner", None), scopes))
new_map[r.token_prefix].append((r.id, r.token_hash, owner_key, scopes))
finally:
db.close()
_token_cache.clear()
@@ -472,14 +519,20 @@ components = initialize_managers(BASE_DIR, rag_manager)
session_manager = components["session_manager"]
from src.assistant_log import set_session_manager as _set_asst_sm
_set_asst_sm(session_manager)
# Set the global session manager singleton (used by core.models.Session.add_message)
from core.models import set_session_manager_instance
set_session_manager_instance(session_manager)
app.state.session_manager = session_manager
memory_manager = components["memory_manager"]
memory_vector = components.get("memory_vector")
upload_handler = components["upload_handler"]
app.state.upload_handler = upload_handler
personal_docs_mgr = components["personal_docs_manager"]
api_key_manager = components["api_key_manager"]
preset_manager = components["preset_manager"]
chat_processor = components["chat_processor"]
research_handler = components["research_handler"]
app.state.research_handler = research_handler
chat_handler = components["chat_handler"]
model_discovery = components["model_discovery"]
skills_manager = components["skills_manager"]
@@ -573,7 +626,7 @@ app.include_router(setup_preset_routes(preset_manager))
# Diagnostics
from routes.diagnostics_routes import setup_diagnostics_routes
app.include_router(setup_diagnostics_routes(rag_manager, rag_available, research_handler))
app.include_router(setup_diagnostics_routes(rag_manager, rag_available, research_handler, memory_vector))
# Cleanup
from routes.cleanup_routes import setup_cleanup_routes
@@ -651,6 +704,9 @@ app.include_router(setup_shell_routes())
from routes.cookbook_routes import setup_cookbook_routes
app.include_router(setup_cookbook_routes())
from routes.workspace_routes import setup_workspace_routes
app.include_router(setup_workspace_routes())
# Hardware model fitting (cookbook "What Fits?" tab)
from routes.hwfit_routes import setup_hwfit_routes
app.include_router(setup_hwfit_routes())
@@ -923,16 +979,21 @@ async def _startup_event():
async def _warmup_endpoints():
try:
import httpx
endpoints = model_discovery.get_endpoints() if model_discovery else []
for ep in endpoints[:5]:
url = ep.get("url", "").replace("/chat/completions", "/models")
if url:
try:
async with httpx.AsyncClient(timeout=5.0) as client:
await client.get(url)
logger.info(f"Warmup ping OK: {url}")
except Exception as e:
logger.debug(f"Warmup ping failed for endpoint: {e}")
# model_discovery has no get_endpoints(); that call raised
# AttributeError every run and silently disabled warmup/keepalive.
# Resolve the /models probe URLs via the real discovery API, off the
# event loop since discovery does a blocking port scan.
urls = (
await asyncio.to_thread(model_discovery.warmup_ping_urls)
if model_discovery else []
)
for url in urls:
try:
async with httpx.AsyncClient(timeout=5.0) as client:
await client.get(url)
logger.info(f"Warmup ping OK: {url}")
except Exception as e:
logger.debug(f"Warmup ping failed for endpoint: {e}")
except Exception as e:
logger.debug(f"Warmup ping skipped: {e}")
+129 -13
View File
@@ -3,6 +3,7 @@ Authentication module — multi-user password hashing, session tokens, config pe
Config stored in data/auth.json. Uses bcrypt directly.
"""
import enum
import json
import os
import secrets
@@ -67,6 +68,14 @@ TOKEN_TTL = 60 * 60 * 24 * 7 # 7 days
RESERVED_USERNAMES = frozenset({"internal-tool", "api", "demo", "system"})
def normalize_known_username(users: Dict[str, Any], username: str | None) -> Optional[str]:
"""Return a normalized username only when it exists in the auth user map."""
key = str(username or "").strip().lower()
if not key or key not in users:
return None
return key
def _hash_password(password: str) -> str:
return bcrypt.hashpw(password.encode("utf-8"), bcrypt.gensalt()).decode("utf-8")
@@ -75,6 +84,15 @@ def _verify_password(password: str, hashed: str) -> bool:
return bcrypt.checkpw(password.encode("utf-8"), hashed.encode("utf-8"))
class SetAdminResult(enum.Enum):
"""Outcome of AuthManager.set_admin, so callers can map each case to a
precise response instead of guessing from a bare bool."""
OK = "ok"
USER_NOT_FOUND = "user_not_found"
NOT_AUTHORIZED = "not_authorized" # requester is not an admin
LAST_ADMIN = "last_admin" # would remove the last remaining admin
class AuthManager:
"""Manages multi-user password + session-token auth system."""
@@ -96,6 +114,7 @@ class AuthManager:
self._load()
self._load_sessions()
self._migrate_single_user()
self._drop_reserved_loaded_users()
self._migrate_legacy_admin_role()
def _load(self):
@@ -148,7 +167,13 @@ class AuthManager:
def _migrate_single_user(self):
"""Migrate old single-user format to multi-user format."""
if "password_hash" in self._config and "users" not in self._config:
old_user = self._config.get("username", "admin")
old_user = str(self._config.get("username", "admin") or "admin").strip().lower()
if old_user in RESERVED_USERNAMES:
logger.warning(
"Migrating legacy single-user reserved username '%s' to 'admin'",
old_user,
)
old_user = "admin"
old_hash = self._config["password_hash"]
self._config = {
"users": {
@@ -162,6 +187,30 @@ class AuthManager:
self._save()
logger.info(f"Migrated single-user auth to multi-user (admin: {old_user})")
def _drop_reserved_loaded_users(self):
"""Fail closed for legacy/manual auth rows that collide with sentinels."""
users = self._config.get("users")
if not isinstance(users, dict):
return
normalized = {}
removed = []
for username, data in users.items():
key = str(username or "").strip().lower()
if not key:
continue
if key in RESERVED_USERNAMES:
removed.append(key)
continue
normalized[key] = data
if removed or normalized != users:
self._config["users"] = normalized
self._save()
if removed:
logger.warning(
"Removed reserved username(s) from auth config: %s",
", ".join(sorted(set(removed))),
)
def _migrate_legacy_admin_role(self):
"""Normalize setup.py's old role='admin' marker to is_admin=True."""
changed = False
@@ -244,6 +293,22 @@ class AuthManager:
return False
if not self.users.get(requesting_user, {}).get("is_admin"):
return False
# Revoke API bearer tokens before removing the auth row. The bearer
# path authenticates from ApiToken rows and does not require the
# owner to still exist, so a successful delete must not leave active
# rows behind. If the token store is unavailable, fail closed and
# keep the user/session state intact so the admin can retry.
try:
from core.database import get_db_session, ApiToken
with get_db_session() as db:
removed_tokens = db.query(ApiToken).filter(ApiToken.owner == username).delete()
if removed_tokens:
logger.info(
f"Revoked {removed_tokens} API token(s) owned by deleted user '{username}'"
)
except Exception:
logger.warning(f"Failed to revoke API tokens for deleted user '{username}'")
return False
del self._config["users"][username]
self._save()
# Purge all sessions belonging to this user. validate_token doesn't
@@ -258,18 +323,6 @@ class AuthManager:
revoked += 1
if revoked:
self._save_sessions()
# Also revoke API bearer tokens owned by this user. The bearer auth
# path authenticates straight against ApiToken rows and never
# re-checks that the owner still exists, so leaving the rows behind
# would let a deleted user keep full API access indefinitely.
try:
from core.database import get_db_session, ApiToken
with get_db_session() as db:
removed = db.query(ApiToken).filter(ApiToken.owner == username).delete()
if removed:
logger.info(f"Revoked {removed} API token(s) owned by deleted user '{username}'")
except Exception:
logger.warning(f"Failed to revoke API tokens for deleted user '{username}'")
logger.info(f"Deleted user '{username}' (by {requesting_user}); revoked {revoked} active session(s)")
return True
@@ -344,6 +397,69 @@ class AuthManager:
logger.info(f"Updated privileges for '{username}': {current}")
return True
def set_admin(self, username: str, is_admin: bool,
requesting_user: str) -> SetAdminResult:
"""Promote/demote an existing user to/from admin. Admin only.
Refuses to remove the last remaining admin so the instance can never
be locked out of admin access; self-demotion is allowed as long as
another admin remains. Admin status is re-checked live on every
request, so unlike delete/rename no session or token revocation is
needed — a demoted admin simply fails the next is_admin() gate.
Promotion stashes the user's current privilege map and demotion
restores it, so a temporary admin stint can't silently broaden a
user's non-admin access; users without a stash (created as admin,
or promoted before stashing existed) demote to DEFAULT_PRIVILEGES.
Counting admins and flipping the flag happen in one critical section
so two concurrent demotions can't race the admin count to zero.
"""
username = (username or "").strip().lower()
requesting_user = (requesting_user or "").strip().lower()
is_admin = bool(is_admin)
with self._config_lock:
target = self._config.get("users", {}).get(username)
if target is None:
return SetAdminResult.USER_NOT_FOUND
if not self.users.get(requesting_user, {}).get("is_admin"):
return SetAdminResult.NOT_AUTHORIZED
currently_admin = bool(target.get("is_admin"))
if currently_admin == is_admin:
return SetAdminResult.OK # no-op; leave privileges untouched
if currently_admin and not is_admin:
admin_count = sum(1 for d in self.users.values() if d.get("is_admin"))
if admin_count <= 1:
return SetAdminResult.LAST_ADMIN
# Write order matters for lock-free readers: get_privileges()
# reads without _config_lock and trusts is_admin, so the admin
# flag must be flipped while the stored map is safe to expose —
# before writing admin privileges on promote, after restoring
# the pre-admin map on demote.
if is_admin:
target["is_admin"] = True
# Stash the pre-admin map so a later demotion can restore it.
# While is_admin is set the stored map is inert: get_privileges
# short-circuits to ADMIN_PRIVILEGES and set_privileges refuses
# admins, so only set_admin ever touches the stash.
target["privileges_before_admin"] = dict(
target.get("privileges") or DEFAULT_PRIVILEGES
)
target["privileges"] = dict(ADMIN_PRIVILEGES)
else:
# Restore the stashed pre-admin map. Fall back to defaults for
# users created as admins (their stored map is ADMIN_PRIVILEGES,
# which must not leak past demotion — e.g. can_use_bash) and
# for admins promoted before the stash existed.
target["privileges"] = dict(
target.pop("privileges_before_admin", None)
or DEFAULT_PRIVILEGES
)
target["is_admin"] = False
self._save()
logger.info("Set is_admin=%s for '%s' (by '%s')", is_admin, username, requesting_user)
return SetAdminResult.OK
def change_password(self, username: str, current_password: str, new_password: str) -> bool:
username = username.strip().lower()
if username not in self.users:
+194 -25
View File
@@ -688,6 +688,7 @@ def _migrate_add_last_message_at_column():
db_path = DATABASE_URL.replace("sqlite:///", "")
if not os.path.exists(db_path):
return
conn = None
try:
conn = sqlite3.connect(db_path)
cursor = conn.execute("PRAGMA table_info(sessions)")
@@ -713,10 +714,14 @@ def _migrate_add_last_message_at_column():
"ON sessions(archived, last_message_at)"
)
conn.commit()
conn.close()
logging.getLogger(__name__).info("Migrated: added + backfilled 'last_message_at' on sessions")
except Exception as e:
logging.getLogger(__name__).warning(f"last_message_at migration failed: {e}")
finally:
try:
conn.close()
except Exception:
pass
def _migrate_add_document_archived_column():
"""Add `archived` to documents (soft-archive flag). Guarded + idempotent."""
@@ -724,6 +729,7 @@ def _migrate_add_document_archived_column():
db_path = DATABASE_URL.replace("sqlite:///", "")
if not os.path.exists(db_path):
return
conn = None
try:
conn = sqlite3.connect(db_path)
cursor = conn.execute("PRAGMA table_info(documents)")
@@ -732,9 +738,13 @@ def _migrate_add_document_archived_column():
conn.execute("ALTER TABLE documents ADD COLUMN archived BOOLEAN DEFAULT 0")
conn.commit()
logging.getLogger(__name__).info("Migrated: added 'archived' to documents")
conn.close()
except Exception as e:
logging.getLogger(__name__).warning(f"documents.archived migration failed: {e}")
finally:
try:
conn.close()
except Exception:
pass
def _migrate_add_owner_column():
@@ -743,6 +753,7 @@ def _migrate_add_owner_column():
db_path = DATABASE_URL.replace("sqlite:///", "")
if not os.path.exists(db_path):
return
conn = None
try:
conn = sqlite3.connect(db_path)
cursor = conn.execute("PRAGMA table_info(sessions)")
@@ -752,9 +763,13 @@ def _migrate_add_owner_column():
conn.execute("CREATE INDEX IF NOT EXISTS ix_sessions_owner ON sessions(owner)")
conn.commit()
logging.getLogger(__name__).info("Migrated: added 'owner' column to sessions")
conn.close()
except Exception as e:
logging.getLogger(__name__).warning(f"Migration check failed: {e}")
finally:
try:
conn.close()
except Exception:
pass
def _migrate_model_endpoints():
"""Recreate model_endpoints table if schema changed (url->base_url)."""
@@ -762,6 +777,7 @@ def _migrate_model_endpoints():
db_path = DATABASE_URL.replace("sqlite:///", "")
if not os.path.exists(db_path):
return
conn = None
try:
conn = sqlite3.connect(db_path)
cursor = conn.execute("PRAGMA table_info(model_endpoints)")
@@ -770,9 +786,13 @@ def _migrate_model_endpoints():
conn.execute("DROP TABLE IF EXISTS model_endpoints")
conn.commit()
logging.getLogger(__name__).info("Migrated: dropped old model_endpoints table (schema change)")
conn.close()
except Exception as e:
logging.getLogger(__name__).warning(f"model_endpoints migration check failed: {e}")
finally:
try:
conn.close()
except Exception:
pass
def _migrate_add_hidden_models_column():
"""Add hidden_models column to model_endpoints if it doesn't exist."""
@@ -780,6 +800,7 @@ def _migrate_add_hidden_models_column():
db_path = DATABASE_URL.replace("sqlite:///", "")
if not os.path.exists(db_path):
return
conn = None
try:
conn = sqlite3.connect(db_path)
cursor = conn.execute("PRAGMA table_info(model_endpoints)")
@@ -788,9 +809,13 @@ def _migrate_add_hidden_models_column():
conn.execute("ALTER TABLE model_endpoints ADD COLUMN hidden_models TEXT")
conn.commit()
logging.getLogger(__name__).info("Migrated: added 'hidden_models' column to model_endpoints")
conn.close()
except Exception as e:
logging.getLogger(__name__).warning(f"hidden_models migration failed: {e}")
finally:
try:
conn.close()
except Exception:
pass
def _migrate_add_model_endpoint_owner_column():
"""Add owner column to model_endpoints if it doesn't exist.
@@ -805,6 +830,7 @@ def _migrate_add_model_endpoint_owner_column():
db_path = DATABASE_URL.replace("sqlite:///", "")
if not os.path.exists(db_path):
return
conn = None
try:
conn = sqlite3.connect(db_path)
cursor = conn.execute("PRAGMA table_info(model_endpoints)")
@@ -814,9 +840,13 @@ def _migrate_add_model_endpoint_owner_column():
conn.execute("CREATE INDEX IF NOT EXISTS ix_model_endpoints_owner ON model_endpoints(owner)")
conn.commit()
logging.getLogger(__name__).info("Migrated: added 'owner' column + index to model_endpoints")
conn.close()
except Exception as e:
logging.getLogger(__name__).warning(f"model_endpoints.owner migration failed: {e}")
finally:
try:
conn.close()
except Exception:
pass
def _migrate_add_provider_auth_id_column():
@@ -825,6 +855,7 @@ def _migrate_add_provider_auth_id_column():
db_path = DATABASE_URL.replace("sqlite:///", "")
if not os.path.exists(db_path):
return
conn = None
try:
conn = sqlite3.connect(db_path)
cursor = conn.execute("PRAGMA table_info(model_endpoints)")
@@ -834,9 +865,13 @@ def _migrate_add_provider_auth_id_column():
conn.execute("CREATE INDEX IF NOT EXISTS ix_model_endpoints_provider_auth_id ON model_endpoints(provider_auth_id)")
conn.commit()
logging.getLogger(__name__).info("Migrated: added 'provider_auth_id' column + index to model_endpoints")
conn.close()
except Exception as e:
logging.getLogger(__name__).warning(f"model_endpoints.provider_auth_id migration failed: {e}")
finally:
try:
conn.close()
except Exception:
pass
def _migrate_add_model_type_column():
@@ -845,6 +880,7 @@ def _migrate_add_model_type_column():
db_path = DATABASE_URL.replace("sqlite:///", "")
if not os.path.exists(db_path):
return
conn = None
try:
conn = sqlite3.connect(db_path)
cursor = conn.execute("PRAGMA table_info(model_endpoints)")
@@ -853,9 +889,13 @@ def _migrate_add_model_type_column():
conn.execute("ALTER TABLE model_endpoints ADD COLUMN model_type TEXT DEFAULT 'llm'")
conn.commit()
logging.getLogger(__name__).info("Migrated: added 'model_type' column to model_endpoints")
conn.close()
except Exception as e:
logging.getLogger(__name__).warning(f"model_type migration failed: {e}")
finally:
try:
conn.close()
except Exception:
pass
def _migrate_add_model_endpoint_refresh_columns():
"""Add endpoint classification / refresh policy columns if missing."""
@@ -863,6 +903,7 @@ def _migrate_add_model_endpoint_refresh_columns():
db_path = DATABASE_URL.replace("sqlite:///", "")
if not os.path.exists(db_path):
return
conn = None
try:
conn = sqlite3.connect(db_path)
cursor = conn.execute("PRAGMA table_info(model_endpoints)")
@@ -876,9 +917,13 @@ def _migrate_add_model_endpoint_refresh_columns():
if columns and "model_refresh_timeout" not in columns:
conn.execute("ALTER TABLE model_endpoints ADD COLUMN model_refresh_timeout INTEGER")
conn.commit()
conn.close()
except Exception as e:
logging.getLogger(__name__).warning(f"model_endpoints refresh-policy migration failed: {e}")
finally:
try:
conn.close()
except Exception:
pass
def _migrate_add_task_run_model_column():
"""Add model column to task_runs if it doesn't exist (records which model ran)."""
@@ -886,6 +931,7 @@ def _migrate_add_task_run_model_column():
db_path = DATABASE_URL.replace("sqlite:///", "")
if not os.path.exists(db_path):
return
conn = None
try:
conn = sqlite3.connect(db_path)
cursor = conn.execute("PRAGMA table_info(task_runs)")
@@ -894,9 +940,13 @@ def _migrate_add_task_run_model_column():
conn.execute("ALTER TABLE task_runs ADD COLUMN model TEXT")
conn.commit()
logging.getLogger(__name__).info("Migrated: added 'model' column to task_runs")
conn.close()
except Exception as e:
logging.getLogger(__name__).warning(f"task_runs model migration failed: {e}")
finally:
try:
conn.close()
except Exception:
pass
def _migrate_add_supports_tools_column():
"""Add supports_tools column to model_endpoints if it doesn't exist."""
@@ -904,6 +954,7 @@ def _migrate_add_supports_tools_column():
db_path = DATABASE_URL.replace("sqlite:///", "")
if not os.path.exists(db_path):
return
conn = None
try:
conn = sqlite3.connect(db_path)
cursor = conn.execute("PRAGMA table_info(model_endpoints)")
@@ -912,9 +963,13 @@ def _migrate_add_supports_tools_column():
conn.execute("ALTER TABLE model_endpoints ADD COLUMN supports_tools BOOLEAN")
conn.commit()
logging.getLogger(__name__).info("Migrated: added 'supports_tools' column to model_endpoints")
conn.close()
except Exception as e:
logging.getLogger(__name__).warning(f"supports_tools migration failed: {e}")
finally:
try:
conn.close()
except Exception:
pass
def _migrate_add_cached_models_column():
@@ -923,6 +978,7 @@ def _migrate_add_cached_models_column():
db_path = DATABASE_URL.replace("sqlite:///", "")
if not os.path.exists(db_path):
return
conn = None
try:
conn = sqlite3.connect(db_path)
cursor = conn.execute("PRAGMA table_info(model_endpoints)")
@@ -930,9 +986,13 @@ def _migrate_add_cached_models_column():
if columns and "cached_models" not in columns:
conn.execute("ALTER TABLE model_endpoints ADD COLUMN cached_models TEXT")
conn.commit()
conn.close()
except Exception as e:
logging.getLogger(__name__).warning(f"cached_models migration failed: {e}")
finally:
try:
conn.close()
except Exception:
pass
def _migrate_add_pinned_models_column():
"""Add pinned_models column to model_endpoints if it doesn't exist."""
@@ -940,6 +1000,7 @@ def _migrate_add_pinned_models_column():
db_path = DATABASE_URL.replace("sqlite:///", "")
if not os.path.exists(db_path):
return
conn = None
try:
conn = sqlite3.connect(db_path)
cursor = conn.execute("PRAGMA table_info(model_endpoints)")
@@ -948,9 +1009,13 @@ def _migrate_add_pinned_models_column():
conn.execute("ALTER TABLE model_endpoints ADD COLUMN pinned_models TEXT")
conn.commit()
logging.getLogger(__name__).info("Migrated: added 'pinned_models' column to model_endpoints")
conn.close()
except Exception as e:
logging.getLogger(__name__).warning(f"pinned_models migration failed: {e}")
finally:
try:
conn.close()
except Exception:
pass
def _migrate_add_notes_sort_order():
"""Add sort_order, image_url, repeat columns to notes if they don't exist."""
@@ -958,6 +1023,7 @@ def _migrate_add_notes_sort_order():
db_path = DATABASE_URL.replace("sqlite:///", "")
if not os.path.exists(db_path):
return
conn = None
try:
conn = sqlite3.connect(db_path)
cursor = conn.execute("PRAGMA table_info(notes)")
@@ -975,9 +1041,13 @@ def _migrate_add_notes_sort_order():
if columns and "agent_session_id" not in columns:
conn.execute("ALTER TABLE notes ADD COLUMN agent_session_id TEXT")
conn.commit()
conn.close()
except Exception as e:
logging.getLogger(__name__).warning(f"notes migration failed: {e}")
finally:
try:
conn.close()
except Exception:
pass
def _migrate_add_mode_column():
"""Add mode column to sessions table if it doesn't exist."""
@@ -985,6 +1055,7 @@ def _migrate_add_mode_column():
db_path = DATABASE_URL.replace("sqlite:///", "")
if not os.path.exists(db_path):
return
conn = None
try:
conn = sqlite3.connect(db_path)
cursor = conn.execute("PRAGMA table_info(sessions)")
@@ -993,9 +1064,13 @@ def _migrate_add_mode_column():
conn.execute("ALTER TABLE sessions ADD COLUMN mode TEXT")
conn.commit()
logging.getLogger(__name__).info("Migrated: added 'mode' column to sessions")
conn.close()
except Exception as e:
logging.getLogger(__name__).warning(f"Migration check for mode failed: {e}")
finally:
try:
conn.close()
except Exception:
pass
def _migrate_add_folder_column():
"""Add folder column to sessions table if it doesn't exist."""
@@ -1003,6 +1078,7 @@ def _migrate_add_folder_column():
db_path = DATABASE_URL.replace("sqlite:///", "")
if not os.path.exists(db_path):
return
conn = None
try:
conn = sqlite3.connect(db_path)
cursor = conn.execute("PRAGMA table_info(sessions)")
@@ -1011,9 +1087,13 @@ def _migrate_add_folder_column():
conn.execute("ALTER TABLE sessions ADD COLUMN folder TEXT")
conn.commit()
logging.getLogger(__name__).info("Migrated: added 'folder' column to sessions")
conn.close()
except Exception as e:
logging.getLogger(__name__).warning(f"Migration check for folder failed: {e}")
finally:
try:
conn.close()
except Exception:
pass
def _migrate_add_token_columns():
"""Add cumulative token tracking columns to sessions table."""
@@ -1021,6 +1101,7 @@ def _migrate_add_token_columns():
db_path = DATABASE_URL.replace("sqlite:///", "")
if not os.path.exists(db_path):
return
conn = None
try:
conn = sqlite3.connect(db_path)
cursor = conn.execute("PRAGMA table_info(sessions)")
@@ -1030,9 +1111,13 @@ def _migrate_add_token_columns():
conn.execute("ALTER TABLE sessions ADD COLUMN total_output_tokens INTEGER DEFAULT 0")
conn.commit()
logging.getLogger(__name__).info("Migrated: added token tracking columns to sessions")
conn.close()
except Exception as e:
logging.getLogger(__name__).warning(f"Migration check for token columns failed: {e}")
finally:
try:
conn.close()
except Exception:
pass
def _migrate_add_owner_to_table(table_name: str, index_name: str):
"""Generic helper: add owner TEXT column + index to a table if missing."""
@@ -1040,6 +1125,7 @@ def _migrate_add_owner_to_table(table_name: str, index_name: str):
db_path = DATABASE_URL.replace("sqlite:///", "")
if not os.path.exists(db_path):
return
conn = None
try:
conn = sqlite3.connect(db_path)
cursor = conn.execute(f"PRAGMA table_info({table_name})")
@@ -1049,9 +1135,13 @@ def _migrate_add_owner_to_table(table_name: str, index_name: str):
conn.execute(f"CREATE INDEX IF NOT EXISTS {index_name} ON {table_name}(owner)")
conn.commit()
logging.getLogger(__name__).info(f"Migrated: added 'owner' column to {table_name}")
conn.close()
except Exception as e:
logging.getLogger(__name__).warning(f"Migration owner column for {table_name} failed: {e}")
finally:
try:
conn.close()
except Exception:
pass
def _migrate_add_multiuser_owner_columns():
"""Add owner column to memories, gallery_images, user_tools, comparisons."""
@@ -1076,6 +1166,7 @@ def _migrate_add_api_token_scopes_column():
db_path = DATABASE_URL.replace("sqlite:///", "")
if not os.path.exists(db_path):
return
conn = None
try:
conn = sqlite3.connect(db_path)
columns = [row[1] for row in conn.execute("PRAGMA table_info(api_tokens)").fetchall()]
@@ -1084,9 +1175,13 @@ def _migrate_add_api_token_scopes_column():
conn.execute("UPDATE api_tokens SET scopes = 'chat' WHERE scopes IS NULL OR scopes = ''")
conn.commit()
logging.getLogger(__name__).info("Migrated: added scopes column to api_tokens")
conn.close()
except Exception as e:
logging.getLogger(__name__).warning(f"api_tokens.scopes migration failed: {e}")
finally:
try:
conn.close()
except Exception:
pass
def _migrate_assign_legacy_owner():
"""Assign all null-owner data to the first (admin) user.
@@ -1128,6 +1223,7 @@ def _migrate_assign_legacy_owner():
return
logger = logging.getLogger(__name__)
conn = None
try:
conn = sqlite3.connect(db_path)
# Every table with an `owner` column. New tables added later will be
@@ -1152,9 +1248,13 @@ def _migrate_assign_legacy_owner():
except Exception as e:
logger.warning(f"Legacy owner assignment for {table} failed: {e}")
conn.commit()
conn.close()
except Exception as e:
logger.warning(f"Legacy owner migration failed: {e}")
finally:
try:
conn.close()
except Exception:
pass
# Also migrate memory.json
mem_path = MEMORY_FILE
@@ -1502,6 +1602,7 @@ class CalendarCal(TimestampMixin, Base):
# NULL for local calendars and for CalDAV calendars created before
# multi-account support was added (treated as "use any configured account").
account_id = Column(String, nullable=True, index=True)
caldav_base_url = Column(String, nullable=True)
events = relationship("CalendarEvent", back_populates="calendar", cascade="all, delete-orphan")
@@ -1532,10 +1633,27 @@ class CalendarEvent(TimestampMixin, Base):
# vanishes upstream). NULL/local = created locally (agent, email triage, or
# a UI event whose write-back failed) and must NOT be pruned by the sync.
origin = Column(String, nullable=True, index=True)
remote_href = Column(String, nullable=True) # CalDAV object URL for updates/deletes
remote_etag = Column(String, nullable=True) # Last seen CalDAV ETag, when available
caldav_sync_pending = Column(String, nullable=True) # create | update | delete retry marker
calendar = relationship("CalendarCal", back_populates="events")
class CalendarDeletedEvent(TimestampMixin, Base):
"""Hidden CalDAV delete tombstone retained until remote delete succeeds."""
__tablename__ = "caldav_deleted_events"
uid = Column(String, primary_key=True, index=True)
owner = Column(String, nullable=True, index=True)
calendar_id = Column(String, nullable=True, index=True)
remote_href = Column(String, nullable=True)
remote_etag = Column(String, nullable=True)
caldav_base_url = Column(String, nullable=True)
summary = Column(String, nullable=True)
last_error = Column(Text, nullable=True)
class Integration(TimestampMixin, Base):
"""An external service connection (email, RSS, webhook, etc.)."""
__tablename__ = "integrations"
@@ -1667,6 +1785,7 @@ def init_db():
_migrate_add_calendar_is_utc()
_migrate_add_calendar_origin()
_migrate_add_calendar_account_id()
_migrate_add_caldav_sync_columns()
_migrate_chat_messages_fts()
_migrate_encrypt_email_passwords()
_migrate_encrypt_signatures()
@@ -1773,6 +1892,7 @@ def _migrate_add_email_smtp_security():
db_path = DATABASE_URL.replace("sqlite:///", "")
if not os.path.exists(db_path):
return
conn = None
try:
conn = sqlite3.connect(db_path)
cursor = conn.execute("PRAGMA table_info(email_accounts)")
@@ -1788,9 +1908,13 @@ def _migrate_add_email_smtp_security():
)
conn.commit()
logging.getLogger(__name__).info("Migrated: added smtp_security column to email_accounts")
conn.close()
except Exception as e:
logging.getLogger(__name__).warning(f"smtp_security migration skipped: {e}")
finally:
try:
conn.close()
except Exception:
pass
def _migrate_encrypt_endpoint_keys():
@@ -1891,6 +2015,7 @@ def _migrate_add_calendar_is_utc():
db_path = DATABASE_URL.replace("sqlite:///", "")
if not os.path.exists(db_path):
return
conn = None
try:
conn = sqlite3.connect(db_path)
cursor = conn.execute("PRAGMA table_info(calendar_events)")
@@ -1899,9 +2024,13 @@ def _migrate_add_calendar_is_utc():
conn.execute("ALTER TABLE calendar_events ADD COLUMN is_utc BOOLEAN DEFAULT 0 NOT NULL")
conn.commit()
logging.getLogger(__name__).info("Migrated: added 'is_utc' column to calendar_events")
conn.close()
except Exception as e:
logging.getLogger(__name__).warning(f"is_utc migration failed: {e}")
finally:
try:
conn.close()
except Exception:
pass
def _migrate_add_calendar_origin():
@@ -1912,6 +2041,7 @@ def _migrate_add_calendar_origin():
db_path = DATABASE_URL.replace("sqlite:///", "")
if not os.path.exists(db_path):
return
conn = None
try:
conn = sqlite3.connect(db_path)
cursor = conn.execute("PRAGMA table_info(calendar_events)")
@@ -1921,9 +2051,13 @@ def _migrate_add_calendar_origin():
conn.execute("CREATE INDEX IF NOT EXISTS ix_calendar_events_origin ON calendar_events(origin)")
conn.commit()
logging.getLogger(__name__).info("Migrated: added 'origin' column to calendar_events")
conn.close()
except Exception as e:
logging.getLogger(__name__).warning(f"calendar_events.origin migration failed: {e}")
finally:
try:
conn.close()
except Exception:
pass
def _migrate_add_calendar_account_id():
@@ -1933,6 +2067,7 @@ def _migrate_add_calendar_account_id():
db_path = DATABASE_URL.replace("sqlite:///", "")
if not os.path.exists(db_path):
return
conn = None
try:
conn = sqlite3.connect(db_path)
cursor = conn.execute("PRAGMA table_info(calendars)")
@@ -1942,9 +2077,38 @@ def _migrate_add_calendar_account_id():
conn.execute("CREATE INDEX IF NOT EXISTS ix_calendars_account_id ON calendars(account_id)")
conn.commit()
logging.getLogger(__name__).info("Migrated: added 'account_id' column to calendars")
conn.close()
except Exception as e:
logging.getLogger(__name__).warning(f"calendars.account_id migration failed: {e}")
finally:
try:
conn.close()
except Exception:
pass
def _migrate_add_caldav_sync_columns():
"""Add remote CalDAV metadata used for bidirectional sync."""
import sqlite3
db_path = DATABASE_URL.replace("sqlite:///", "")
if not os.path.exists(db_path):
return
try:
conn = sqlite3.connect(db_path)
ev_columns = [row[1] for row in conn.execute("PRAGMA table_info(calendar_events)").fetchall()]
if ev_columns and "remote_href" not in ev_columns:
conn.execute("ALTER TABLE calendar_events ADD COLUMN remote_href TEXT")
if ev_columns and "remote_etag" not in ev_columns:
conn.execute("ALTER TABLE calendar_events ADD COLUMN remote_etag TEXT")
if ev_columns and "caldav_sync_pending" not in ev_columns:
conn.execute("ALTER TABLE calendar_events ADD COLUMN caldav_sync_pending TEXT")
cal_columns = [row[1] for row in conn.execute("PRAGMA table_info(calendars)").fetchall()]
if cal_columns and "caldav_base_url" not in cal_columns:
conn.execute("ALTER TABLE calendars ADD COLUMN caldav_base_url TEXT")
conn.commit()
conn.close()
except Exception as e:
logging.getLogger(__name__).warning(f"CalDAV sync metadata migration failed: {e}")
def _migrate_add_calendar_metadata():
@@ -1953,6 +2117,7 @@ def _migrate_add_calendar_metadata():
db_path = DATABASE_URL.replace("sqlite:///", "")
if not os.path.exists(db_path):
return
conn = None
try:
conn = sqlite3.connect(db_path)
cursor = conn.execute("PRAGMA table_info(calendar_events)")
@@ -1964,9 +2129,13 @@ def _migrate_add_calendar_metadata():
if columns and "last_pinged" not in columns:
conn.execute("ALTER TABLE calendar_events ADD COLUMN last_pinged DATETIME")
conn.commit()
conn.close()
except Exception as e:
logging.getLogger(__name__).warning(f"calendar_events migration failed: {e}")
finally:
try:
conn.close()
except Exception:
pass
def get_db():
"""
+48 -13
View File
@@ -11,14 +11,24 @@ from typing import Dict, List, Any, Optional, TYPE_CHECKING
if TYPE_CHECKING:
from .session_manager import SessionManager
# Module-level session manager reference (set at app startup)
_session_manager: Optional["SessionManager"] = None
# Module-level session manager singleton (single source of truth)
_SESSION_MANAGER_INSTANCE: Optional["SessionManager"] = None
def set_session_manager(manager: "SessionManager"):
"""Set the global session manager reference."""
global _session_manager
_session_manager = manager
def set_session_manager_instance(manager: "SessionManager"):
"""Set the global SessionManager singleton."""
global _SESSION_MANAGER_INSTANCE
_SESSION_MANAGER_INSTANCE = manager
def get_session_manager_instance() -> Optional["SessionManager"]:
"""Get the global SessionManager singleton."""
return _SESSION_MANAGER_INSTANCE
# Keep legacy name for backward compatibility
set_session_manager = set_session_manager_instance
get_session_manager = get_session_manager_instance
@dataclass
@@ -42,7 +52,17 @@ class ChatMessage:
@dataclass
class Session:
"""A chat session — pure data container."""
"""A chat session — pure data container.
``.history`` is the authoritative mutable message list. Callers may
read, append, pop, or reassign it directly — these changes take
effect immediately. ``_history`` remains a compatibility alias that
always resolves to the authoritative ``history`` list.
Each session gets its own unique history list at construction time
(the dataclass default is never shared between instances).
"""
id: str
name: str
endpoint_url: str
@@ -56,24 +76,35 @@ class Session:
message_count: int = 0
def __post_init__(self):
if self.history is None:
self.history = []
if self.headers is None:
self.headers = {}
# Ensure each session gets its OWN list (not the shared dataclass default)
if self.history is None:
self.history = []
@property
def _history(self) -> List[ChatMessage]:
"""Compatibility alias for callers that still reference ``_history``."""
return self.history
@_history.setter
def _history(self, messages: List[ChatMessage]):
self.history = messages
def add_message(self, message: ChatMessage):
"""
Add a message to this session.
Delegates to SessionManager for persistence if available,
otherwise just appends to history.
Appends to the authoritative history list and increments
message_count. Delegates to SessionManager for persistence
if available.
"""
self.history.append(message)
self.message_count = len(self.history)
# Delegate to session manager for persistence
if _session_manager:
_session_manager._persist_message(self.id, message)
if _SESSION_MANAGER_INSTANCE:
_SESSION_MANAGER_INSTANCE._persist_message(self.id, message)
def get_context_messages(self) -> List[Dict[str, Any]]:
"""Get messages in format for LLM API.
@@ -94,3 +125,7 @@ class Session:
def get(self, key: str, default=None):
"""Dict-like access for compatibility."""
return getattr(self, key, default)
def __getitem__(self, key: str):
"""Allow session['field'] syntax."""
return getattr(self, key)
+7 -1
View File
@@ -191,6 +191,8 @@ def _windows_bash_fallbacks() -> List[str]:
base = os.environ.get(env_name)
if base:
roots.append(ntpath.join(base, "Git"))
if env_name == "LocalAppData":
roots.append(ntpath.join(base, "Programs", "Git"))
roots.extend(_WINDOWS_BASH_DEFAULT_ROOTS)
paths: List[str] = []
@@ -298,7 +300,7 @@ def is_wsl() -> bool:
import sys
if sys.platform.startswith("linux") or os.name == "posix":
try:
with open("/proc/version", "r") as f:
with open("/proc/version", "r", encoding="utf-8", errors="ignore") as f:
if "microsoft" in f.read().lower():
return True
except Exception:
@@ -366,6 +368,10 @@ def _ssh_exec_argv(
strict_host_key_checking: bool | None = None,
) -> list[str]:
"""Build a consistent ssh argv for remote command execution."""
remote_value = str(remote or "").strip()
remote_host = remote_value.rsplit("@", 1)[-1]
if not remote_value or remote_value.startswith("-") or not remote_host or remote_host.startswith("-"):
raise ValueError("Invalid SSH remote host")
argv = ["ssh"]
if connect_timeout is not None:
argv.extend(["-o", f"ConnectTimeout={int(connect_timeout)}"])
+45 -4
View File
@@ -17,6 +17,9 @@ from typing import Dict, Optional
from .database import Session as DbSession, ChatMessage as DbChatMessage, Document as DbDocument, SessionLocal, utcnow_naive
from .models import Session, ChatMessage
# Re-export singleton accessors from models for convenience
from .models import set_session_manager_instance, get_session_manager_instance
logger = logging.getLogger(__name__)
@@ -188,12 +191,17 @@ class SessionManager:
"""
Add a message to a session and persist to database.
Updates the authoritative history list and persists through this
manager directly so tests and temporary managers do not depend on the
process-wide session-manager singleton.
Args:
session_id: Session ID
message: ChatMessage to add
"""
session = self.get_session(session_id)
session.history.append(message)
session._history = session.history
session.message_count = len(session.history)
self._persist_message(session_id, message)
@@ -232,7 +240,10 @@ class SessionManager:
)
db.add(db_message)
db_session.message_count = len(self.sessions.get(session_id, {}).history) if session_id in self.sessions else 0
if session_id in self.sessions:
db_session.message_count = len(self.sessions[session_id].history)
else:
db_session.message_count = 0
_now = datetime.now(timezone.utc)
db_session.last_accessed = _now
# Clean "last conversation" timestamp — only bumped here on a
@@ -283,6 +294,7 @@ class SessionManager:
# Update in-memory
session.history = session.history[:keep_count]
session._history = session.history
logger.info(f"Truncated session {session_id} to {keep_count} messages")
return True
@@ -333,6 +345,7 @@ class SessionManager:
db.commit()
session.history = list(messages)
session._history = session.history
session.message_count = len(messages)
logger.info("Replaced session %s history with %d messages", session_id, len(messages))
return True
@@ -608,24 +621,52 @@ class SessionManager:
def save_sessions(self):
"""No-op for DB compatibility."""
def ensure_task_session(self, session_id: str, name: str, endpoint_url: str, model: str, owner: str = None, task: object = None) -> Session:
"""Create a task session if it doesn't exist, or return the existing one.
Unlike create_session, this checks the cache first and does NOT
overwrite an existing in-memory session. The task scheduler must
use this instead of direct dict assignment.
"""
if session_id in self.sessions:
return self.sessions[session_id]
session = self.create_session(session_id, name, endpoint_url, model, owner=owner)
if task is not None:
task.session_id = session_id
return session
# ------------------------------------------------------------------
# Cleanup
# ------------------------------------------------------------------
def cleanup_empty_sessions(self, auto_archive_days: int = 30) -> dict:
"""Clean up empty and old sessions."""
def cleanup_empty_sessions(self, auto_archive_days: int = 30, min_age_hours: int = 1) -> dict:
"""Clean up empty and old sessions.
Args:
auto_archive_days: Age in days before non-important sessions are archived.
min_age_hours: Minimum age in hours before an empty session can be deleted.
Prevents deleting sessions that were just created.
"""
db = SessionLocal()
stats = {'deleted_empty': 0, 'archived_old': 0, 'total_checked': 0}
try:
all_sessions = db.query(DbSession).all()
cutoff_date = utcnow_naive() - timedelta(days=auto_archive_days)
min_age = utcnow_naive() - timedelta(hours=min_age_hours)
for db_session in all_sessions:
stats['total_checked'] += 1
# Delete empty sessions
# Delete empty sessions only if older than min_age_hours
if db_session.message_count == 0:
if db_session.created_at is not None:
created = db_session.created_at
if created.tzinfo is None:
created = created.replace(tzinfo=timezone.utc)
if created > min_age:
continue # Too young to delete
if db_session.id in self.sessions:
del self.sessions[db_session.id]
db.delete(db_session)
+5 -5
View File
@@ -16,18 +16,18 @@ services:
ports:
- "${APP_BIND:-127.0.0.1}:${APP_PORT:-7000}:7000"
volumes:
- ./data:/app/data:z
- ./logs:/app/logs:z
- ${APP_DATA_DIR:-./data}:/app/data:z
- ${APP_LOGS_DIR:-./logs}:/app/logs:z
# Cookbook remote-server SSH identity. Odysseus can generate a key here;
# add the shown public key to each remote server's authorized_keys.
- ./data/ssh:/app/.ssh:z
- ${APP_DATA_DIR:-./data}/ssh:/app/.ssh:z
# Cookbook local model cache. Inside Docker, "Local" means the Odysseus
# container, so persist its HuggingFace cache under ./data/huggingface.
- ./data/huggingface:/app/.cache/huggingface:z
- ${APP_DATA_DIR:-./data}/huggingface:/app/.cache/huggingface:z
# Cookbook-installed Python CLIs/packages (vLLM, llama-cpp-python, etc.)
# land under /app/.local for the odysseus user. Persist them so a
# container recreate does not silently remove installed serve engines.
- ./data/local:/app/.local:z
- ${APP_DATA_DIR:-./data}/local:/app/.local:z
extra_hosts:
# Lets the container reach local services on the Docker host, including
# Ollama at http://host.docker.internal:11434.
+5 -5
View File
@@ -15,18 +15,18 @@ services:
ports:
- "${APP_BIND:-127.0.0.1}:${APP_PORT:-7000}:7000"
volumes:
- ./data:/app/data:z
- ./logs:/app/logs:z
- ${APP_DATA_DIR:-./data}:/app/data:z
- ${APP_LOGS_DIR:-./logs}:/app/logs:z
# Cookbook remote-server SSH identity. Odysseus can generate a key here;
# add the shown public key to each remote server's authorized_keys.
- ./data/ssh:/app/.ssh:z
- ${APP_DATA_DIR:-./data}/ssh:/app/.ssh:z
# Cookbook local model cache. Inside Docker, "Local" means the Odysseus
# container, so persist its HuggingFace cache under ./data/huggingface.
- ./data/huggingface:/app/.cache/huggingface:z
- ${APP_DATA_DIR:-./data}/huggingface:/app/.cache/huggingface:z
# Cookbook-installed Python CLIs/packages (vLLM, llama-cpp-python, etc.)
# land under /app/.local for the odysseus user. Persist them so a
# container recreate does not silently remove installed serve engines.
- ./data/local:/app/.local:z
- ${APP_DATA_DIR:-./data}/local:/app/.local:z
extra_hosts:
# Lets the container reach local services on the Docker host, including
# Ollama at http://host.docker.internal:11434.
+5 -5
View File
@@ -4,18 +4,18 @@ services:
ports:
- "${APP_BIND:-127.0.0.1}:${APP_PORT:-7000}:7000"
volumes:
- ./data:/app/data:z
- ./logs:/app/logs:z
- ${APP_DATA_DIR:-./data}:/app/data:z
- ${APP_LOGS_DIR:-./logs}:/app/logs:z
# Cookbook remote-server SSH identity. Odysseus can generate a key here;
# add the shown public key to each remote server's authorized_keys.
- ./data/ssh:/app/.ssh:z
- ${APP_DATA_DIR:-./data}/ssh:/app/.ssh:z
# Cookbook local model cache. Inside Docker, "Local" means the Odysseus
# container, so persist its HuggingFace cache under ./data/huggingface.
- ./data/huggingface:/app/.cache/huggingface:z
- ${APP_DATA_DIR:-./data}/huggingface:/app/.cache/huggingface:z
# Cookbook-installed Python CLIs/packages (vLLM, llama-cpp-python, etc.)
# land under /app/.local for the odysseus user. Persist them so a
# container recreate does not silently remove installed serve engines.
- ./data/local:/app/.local:z
- ${APP_DATA_DIR:-./data}/local:/app/.local:z
extra_hosts:
# Lets the container reach local services on the Docker host, including
# Ollama at http://host.docker.internal:11434.
+194
View File
@@ -0,0 +1,194 @@
# Agent migration manifests
Odysseus should be able to learn from another agent without blindly trusting
that agent's whole state. The safe migration path is:
```text
source agent export -> source adapter -> agent-migration.v1 manifest -> preview -> apply
```
The manifest is intentionally source-neutral. OpenClaw, Hermes, a folder of
Markdown notes, or any other agent can have its own adapter, but Odysseus only
needs to understand the normalized manifest.
## Why not import everything as memory?
Durable memory should stay compact and useful. Long notes, logs, session
transcripts, and project archives are useful context, but they are not all
memories. A good migration keeps two layers separate:
- **Archive documents** preserve source material for search, reading, and later
extraction.
- **Memory candidates** are short facts or preferences that can be reviewed
before being saved into Odysseus memory.
This keeps Odysseus' existing memory-review flow intact while giving it better
source material to review.
## Manifest shape
`agent-migration.v1` is a JSON object:
```json
{
"schema_version": "agent-migration.v1",
"generated_at": "2026-06-06T00:00:00Z",
"source": {
"name": "example-agent",
"kind": "generic"
},
"summary": {
"item_count": 3,
"counts_by_kind": {
"memory": 1,
"skill": 1,
"conversation_thread": 1,
"archive_document": 1
},
"warning_count": 0
},
"items": [],
"warnings": []
}
```
Each item has a stable `id`, a `kind`, source metadata, and enough content for a
future importer to preview it before applying.
Supported item kinds in the first pass:
- `memory` — a candidate memory with `text`, `category`, `source`, and
provenance metadata.
- `skill` — a `SKILL.md` file with content and parsed frontmatter metadata.
- `conversation_thread` — a normalized transcript thread from an exported chat
history. Message content is optional; adapters can preserve only thread
metadata, message counts, timestamps, and hashes when a manifest should stay
small or avoid embedding private transcript text.
- `archive_document` — long-form source material. Content is optional; adapters
can preserve only path/hash/size metadata when a manifest should stay small.
## Build a manifest
Use the read-only helper:
```bash
python3 scripts/agent_migration_manifest.py \
--source-name old-agent \
--source-kind generic \
--memory-json /path/to/memories.json \
--skills-dir /path/to/skills \
--conversation-json /path/to/conversations.json \
--archive /path/to/notes \
--output /tmp/agent-migration.json
```
The helper does not write to `data/`, call an LLM, import Odysseus modules, or
modify the source. It only writes JSON.
Memory JSON may be:
```json
[
"A plain memory string",
{
"text": "A categorized memory",
"category": "preference",
"source": "old-agent"
}
]
```
or an object containing a list under `memories`, `memory`, `items`, or `data`.
Skills are scanned recursively for `SKILL.md`:
```bash
python3 scripts/agent_migration_manifest.py \
--source-name hermes \
--source-kind hermes \
--skills-dir ~/.hermes/skills \
--output /tmp/hermes-skills-manifest.json
```
Archive documents are metadata-only by default. To embed text content:
```bash
python3 scripts/agent_migration_manifest.py \
--source-name notes-export \
--archive /path/to/markdown-notes \
--include-archive-content \
--output /tmp/notes-manifest.json
```
Conversation exports are also metadata-only by default:
```bash
python3 scripts/agent_migration_manifest.py \
--source-name chatgpt-export \
--source-kind chatgpt \
--conversation-json /path/to/conversations.json \
--output /tmp/chatgpt-conversations-manifest.json
```
The first pass supports generic conversation JSON such as:
```json
[
{
"id": "thread-1",
"title": "Project plan",
"messages": [
{"role": "user", "content": "Can we design this?"},
{"role": "assistant", "content": "Yes, start with a narrow slice."}
]
}
]
```
It also recognizes ChatGPT-style `mapping` exports from `conversations.json`.
To embed normalized messages:
```bash
python3 scripts/agent_migration_manifest.py \
--source-name chatgpt-export \
--source-kind chatgpt \
--conversation-json /path/to/conversations.json \
--include-conversation-content \
--max-conversation-messages 2000 \
--output /tmp/chatgpt-conversations-with-content.json
```
Content embedding is explicit because exported chat histories can be huge and
private. A future source-specific adapter can add ZIP traversal, attachment
metadata, and provider-specific project/workspace fields while still emitting
the same `conversation_thread` manifest item.
## Recommended apply behavior
A future Odysseus importer should treat the manifest as untrusted user-provided
data and apply it in stages:
1. Show a dry-run summary with counts, warnings, duplicates, and sample items.
2. Back up current `data/` state before writing anything.
3. Import archive documents as documents or another searchable source, not as
memory.
4. Import conversation threads as searchable archived context first, with
citations back to the source thread. Do not turn whole transcripts into
memory.
5. Show memory candidates for review before saving through the normal memory
path.
6. Import skills only after name/category conflict checks.
7. Skip secrets by default. Credentials need explicit, provider-specific flows.
## What belongs in source adapters?
Adapters can be source-specific. The core manifest should not be.
For example, an OpenClaw adapter may know about OpenClaw's workspace files. A
Hermes adapter may know about `~/.hermes/config.yaml` and `~/.hermes/skills`.
A ChatGPT adapter may know about `conversations.json`, uploaded-file metadata,
and image attachment directories. A Claude adapter may know about Claude's
export shape and project boundaries. A generic adapter may only know about
memory JSON, conversation JSON, `SKILL.md`, and Markdown folders.
Nonstandard folders should be adapter details, not required Odysseus concepts.
+129
View File
@@ -0,0 +1,129 @@
# Backup & Restore
Odysseus keeps all of your state in the `data/` directory — the SQLite database
(`app.db`), the Fernet encryption key (`data/.app_key`), the vault, memory, RAG
indexes, personal documents, and uploads. The `scripts/odysseus-backup` tool
snapshots that directory into a single gzip tarball and restores it later.
Snapshots are safe to take while the app is running: SQLite databases are copied
through SQLite's own `.backup` API rather than a raw file copy, so an in-flight
write can't corrupt the snapshot.
> **A snapshot contains your secrets.** The tarball includes the Fernet
> encryption key (`data/.app_key`), the vault, sessions, and any stored
> provider/API tokens — so treat it like a password. Store backups somewhere
> private, never commit them to Git, and prefer an encrypted destination when
> copying them offsite.
## Quick start
Run the tool from the repository root:
```bash
# Create a snapshot → backups/odysseus-backup-<YYYYMMDD-HHMMSS>.tar.gz
./scripts/odysseus-backup snapshot
# List existing snapshots (most recent first)
./scripts/odysseus-backup list
# Check a tarball's integrity without extracting it
./scripts/odysseus-backup verify backups/odysseus-backup-20260101-120000.tar.gz
# Restore (destructive — see the warning below)
./scripts/odysseus-backup restore backups/odysseus-backup-20260101-120000.tar.gz --yes
```
The script depends only on the Python standard library, so any `python3` on your
`PATH` will run it — you don't need the app's virtualenv active.
Every command prints a JSON result. Add `--pretty` for indented output.
## Commands
### `snapshot`
Writes a `tar.gz` of `data/` to `backups/<timestamp>.tar.gz`.
| Flag | Effect |
| --- | --- |
| `--out PATH` | Write to a specific path instead of the default `backups/` location. Must be **outside** `data/`. |
| `--include-research` | Include `data/deep_research/` (skipped by default — research runs are large). |
| `--include-attachments` | Include `data/mail-attachments/` (skipped by default — cached IMAP extractions, re-derivable). |
By default the snapshot includes everything under `data/` **except**
`deep_research/` and `mail-attachments/`. Personal uploads and documents are
included.
```bash
# Snapshot straight to a mounted NAS path
./scripts/odysseus-backup snapshot --out /mnt/nas/odysseus-$(date +%F).tar.gz
# Full snapshot including research runs and mail attachments
./scripts/odysseus-backup snapshot --include-research --include-attachments
```
### `list`
Lists the tarballs in `backups/`, most recent first, with size and modification
time.
### `verify PATH`
Opens the tarball read-only and walks every member to confirm it is intact and
safe to restore. Nothing is extracted. Use this before relying on an old backup
or after copying one across machines.
### `restore PATH --yes`
Overwrites `data/` from a tarball.
> **Restore is destructive.** It replaces the current `data/` directory. `--yes`
> is required so a mistyped command can't wipe your live state.
Restore is not a blind delete: before extracting, the tool **renames your current
`data/` to `data.before-restore-<timestamp>`** in the repository root. If a
restore turns out to be wrong, your previous state is still there — delete the
restored `data/` and rename the stashed directory back. The restore path is also
validated entry-by-entry: archives containing absolute paths, `..` segments,
symlinks, or anything outside `data/` are rejected.
## Scheduling offsite backups
The tarball output composes cleanly with cron and any copy tool. For example, a
nightly snapshot copied offsite:
```cron
0 3 * * * cd /path/to/odysseus && ./scripts/odysseus-backup snapshot --out "/mnt/nas/odysseus-$(date +\%F).tar.gz"
```
Swap the `--out` target for `scp`, `rclone`, `s3cmd`, or similar to push the
snapshot to remote storage.
## Docker vs native installs
The tool reads `data/` and writes `backups/` relative to the repository root, so
where you run it matters:
- **Native installs** — run it from the repo root as shown above. `data/` and
`backups/` are both in the repo directory.
- **Docker**`docker-compose.yml` bind-mounts the host's `./data` to
`/app/data`, so the live data is also present on the host. **Run the tool on
the host** from the repo root; the snapshot reads the bind-mounted `./data` and
writes to `./backups` on the host. Running it *inside* the container is not
recommended, because `backups/` is not a mounted volume and the tarball would
be lost when the container is recreated.
> **ChromaDB caveat (Docker only).** In the Docker setup, ChromaDB stores its
> vectors in a separate Compose-managed volume (declared as `chromadb-data`),
> **not** under `./data`. `odysseus-backup` therefore does not capture the Docker
> ChromaDB store. Back it up separately if you need it. Compose prefixes the
> volume with the project name, so find the real name first
> (`docker volume ls | grep chromadb`), then archive it — for example:
>
> ```bash
> docker run --rm -v <project>_chromadb-data:/data -v "$PWD":/backup \
> alpine tar czf /backup/chromadb.tar.gz -C /data .
> ```
>
> On native installs ChromaDB lives at `data/chroma/` and is included in the
> snapshot normally.
+10 -3
View File
@@ -25,9 +25,16 @@
--radius: 8px;
}
* { box-sizing: border-box; }
html { scroll-behavior: smooth; scroll-snap-type: y proximity; scroll-padding-top: 60px; }
/* Each section is a full-viewport "page" with its content centered, so only
one shows at a time and the snap is obvious. */
html { scroll-behavior: smooth; scroll-padding-top: 60px; }
/* REMOVED: "scroll-snap-type: y proximity"
The idea was: >>Each section is a full-viewport "page" with its content centered,
so only one shows at a time and the snap is obvious.<<
PROBLEM: sections easily grow taller than 100vh IRL
This cause forced jumps mid-read. It's intrusive UX.
The landing-page is not a PowerPoint presentation!
Preserved: CSS snap-points to avoid destroying code meta-data*/
.hero, section {
scroll-snap-align: start; min-height: 100vh;
display: flex; flex-direction: column; justify-content: center;
+102
View File
@@ -0,0 +1,102 @@
# Security CI guide
This project runs a set of automated security checks on every pull request and
on every push to `main`. This page explains what each one does, whether it can
block a merge, and the few one-time settings you should turn on to get the full
benefit.
## What runs, and why
Each check lives in its own file under `.github/workflows/`. They run
automatically; you do not start them.
| Check | What it protects against | Blocks a merge? |
|---|---|---|
| **Secret scan** (gitleaks) | An API key, token, or password being committed by mistake or on purpose | Yes |
| **Workflow security** (actionlint + zizmor) | A broken or insecure automation file that could leak the repo's access token | Yes |
| **Dependency review** | A pull request that adds a software library with a known security hole | Yes |
| **pip-audit** | Known security holes in the Python libraries already used | No (advisory) |
| **Container scan: hadolint** | Mistakes and insecure patterns in the `Dockerfile` | Yes |
| **Container scan: Trivy** | Known security holes in the Docker image | No (advisory) |
| **CodeQL** | Real bugs in the app's own code: injection, auth mistakes, path traversal | No (advisory) |
"Blocks a merge" means a red X appears on the pull request and, once you enable
the setting below, the **Merge** button is disabled until it is fixed.
"Advisory" means it reports problems into the repository's **Security** tab so
you can review them on your own schedule, but it never stops a merge. These are
advisory on purpose: they often flag long-standing issues in other people's
libraries, not something a given pull request introduced.
## Where results appear
- **Checks tab of a pull request**: the pass/fail of each check. A green tick is
good; a red X needs attention.
- **Security tab of the repository**: detailed findings from the advisory
scanners (Trivy and CodeQL). This is your dashboard.
## If a check fails
- **Secret scan failed**: a real credential may have been committed. Treat it as
leaked: rotate (regenerate) that key or token immediately, then remove it from
the file. Do not just delete the commit; assume it was seen.
- **Dependency review failed**: the pull request adds a library with a known
vulnerability. Ask the contributor to use a patched version, or decline the
change.
- **hadolint / workflow security failed**: the contributor changed the
`Dockerfile` or an automation file in a way the linter rejects. Ask them to
address the message shown in the failed check.
## One-time settings to turn on
These two settings unlock the full value. You only do them once.
### 1. Require the blocking checks before merging
This makes the **Merge** button refuse to work until the gating checks pass.
1. Go to the repository on GitHub.
2. Click **Settings** (top right of the repo).
3. In the left sidebar, click **Branches**.
4. Under **Branch protection rules**, click **Add branch ruleset** (or **Add
rule**), and set the branch name pattern to `dev` (this is the branch all
pull requests target; `main` is fast-forwarded at releases).
5. Enable **Require status checks to pass before merging**.
6. In the search box that appears, add these checks by name:
- `Python syntax (compileall)`
- `JS syntax (node --check)`
- `gitleaks`
- `actionlint`
- `zizmor (Actions SAST)`
- `hadolint (Dockerfile lint)`
- `dependency-review (PR gate)`
The first two come from the correctness CI (`ci.yml`); the rest are this
security suite. Leave pytest, pip-audit, Trivy, and CodeQL unchecked so they
stay advisory.
7. Also enable **Require a pull request before merging** and **Require review
from Code Owners** (this uses the `.github/CODEOWNERS` file so every change
needs your sign-off).
8. Click **Create** / **Save changes**.
Note: a check name only appears in the list after it has run at least once, so
let the workflows run on one pull request first, then add them here.
### 2. Turn on the Security tab features
1. **Settings -> Code security** (or **Code security and analysis**).
2. Turn on **Dependency graph** (usually on by default for public repos) -- this
powers Dependency review and Dependabot.
3. Turn on **Dependabot alerts** and **Dependabot security updates**.
4. Under **Code scanning**, you have two ways to scan the app code with CodeQL:
- The included `codeql.yml` workflow already scans `main` and runs weekly.
- To also scan **pull requests** (recommended, since most contributions come
from forks), click **Set up -> Default** under Code scanning. GitHub then
runs CodeQL on pull requests for you, with no token limitations.
## Keeping it current
`.github/dependabot.yml` opens small weekly pull requests to update Python and
npm packages, the Docker base image, and the pinned automation actions
themselves. Review and merge those like any other pull request; they keep the
project patched without manual tracking.
+28 -3
View File
@@ -30,14 +30,26 @@ function Fail($msg) {
exit 1
}
function Test-WindowsBashStub($path) {
if (-not $path) { return $false }
$lowered = $path.ToLowerInvariant()
foreach ($stub in @("system32\bash.exe", "sysnative\bash.exe", "windowsapps\bash.exe")) {
if ($lowered.Contains($stub)) { return $true }
}
return $false
}
function Find-GitBash {
$cmd = Get-Command bash -ErrorAction SilentlyContinue
if ($cmd) { return $cmd.Source }
if ($cmd -and -not (Test-WindowsBashStub $cmd.Source)) { return $cmd.Source }
$roots = @()
foreach ($name in @("ProgramFiles", "ProgramW6432", "ProgramFiles(x86)", "LocalAppData")) {
$base = [Environment]::GetEnvironmentVariable($name)
if ($base) { $roots += (Join-Path $base "Git") }
if ($base) {
$roots += (Join-Path $base "Git")
if ($name -eq "LocalAppData") { $roots += (Join-Path $base "Programs\Git") }
}
}
$roots += @("C:\Program Files\Git", "C:\Program Files (x86)\Git")
@@ -129,7 +141,20 @@ if (-not (Find-GitBash)) {
Write-Host " https://git-scm.com/download/win" -ForegroundColor Yellow
}
# 6. Start the server (use `python -m uvicorn` - bare `uvicorn` may not be on PATH)
# 6. Point CUDA_PATH at a real CUDA toolkit so GPU llama-cpp-python can import.
$cudaBase = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA"
if (Test-Path $cudaBase) {
$cudaBest = Get-ChildItem $cudaBase -Directory -ErrorAction SilentlyContinue |
Where-Object { Test-Path (Join-Path $_.FullName "bin") } |
Sort-Object { try { [version]($_.Name -replace "^v", "") } catch { [version]"0.0" } } -Descending |
Select-Object -First 1
if ($cudaBest) {
$env:CUDA_PATH = $cudaBest.FullName
Write-Host ("Using CUDA_PATH = " + $cudaBest.FullName) -ForegroundColor Cyan
}
}
# 7. Start the server (use `python -m uvicorn` - bare `uvicorn` may not be on PATH)
Write-Step ("Starting Odysseus at http://{0}:{1}" -f $BindHost, $Port)
Write-Host "Press Ctrl+C to stop."
Write-Host ""
+2 -3
View File
@@ -93,16 +93,15 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
if category_filter:
msg += f" in category '{category_filter}'"
return [TextContent(type="text", text=msg + ".")]
lines = [f"Found {len(memories)} memory entries:\n"]
for m in memories[:100]:
for m in memories:
cat = m.get("category", "fact")
mid = m.get("id", "?")[:8]
text = m.get("text", "")
if len(text) > 150:
text = text[:150] + "..."
lines.append(f"- [{cat}] `{mid}` — {text}")
if len(memories) > 100:
lines.append(f"... and {len(memories) - 100} more")
return [TextContent(type="text", text="\n".join(lines))]
elif action == "add":
+12 -9
View File
@@ -5,16 +5,16 @@
"packages": {
"": {
"dependencies": {
"@anthropic-ai/sdk": "^0.98.0"
"@anthropic-ai/sdk": "^0.104.1"
},
"devDependencies": {
"@antithesishq/bombadil": "^0.3.2"
"@antithesishq/bombadil": "^0.5.0"
}
},
"node_modules/@anthropic-ai/sdk": {
"version": "0.98.0",
"resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.98.0.tgz",
"integrity": "sha512-N7aXtCvC5g6T1Y4V29lJjceu/zTkVkIZF0jdBvagr0TRFHuKeImffalGWEfqZKrvjH+IQbzJWw6TmSmUzrlMgg==",
"version": "0.104.1",
"resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.104.1.tgz",
"integrity": "sha512-gGACa/+IaiXzRRmF96aOhamoBgapKRBiFWbmmTFP8aMkpaEcuStF+Q61bjo4vPxBM7gqWJNZqsngslRdnLHv0Q==",
"license": "MIT",
"dependencies": {
"json-schema-to-ts": "^3.1.1",
@@ -33,11 +33,14 @@
}
},
"node_modules/@antithesishq/bombadil": {
"version": "0.3.2",
"resolved": "https://registry.npmjs.org/@antithesishq/bombadil/-/bombadil-0.3.2.tgz",
"integrity": "sha512-ATy1w9ZY5gbny1H8DFc7rxZitT7DLLLFDiGcRZe+8TQiUrV5tLO+IJGOVNNLp3RpCqjZqSsxGiKoQsx31ipV1g==",
"version": "0.5.0",
"resolved": "https://registry.npmjs.org/@antithesishq/bombadil/-/bombadil-0.5.0.tgz",
"integrity": "sha512-s0zImmr0iyvSP6QcVLvf40CUiZYIdWBAxiq20uhzujwvfitYa3PGJN652k/pLtVccHM/JrGQxZdvLnihZpltHA==",
"dev": true,
"license": "MIT"
"license": "MIT",
"bin": {
"bombadil": "bin/bombadil.js"
}
},
"node_modules/@babel/runtime": {
"version": "7.29.7",
+2 -2
View File
@@ -4,9 +4,9 @@
"url": "https://github.com/pewdiepie-archdaemon/odysseus.git"
},
"devDependencies": {
"@antithesishq/bombadil": "^0.3.2"
"@antithesishq/bombadil": "^0.5.0"
},
"dependencies": {
"@anthropic-ai/sdk": "^0.98.0"
"@anthropic-ai/sdk": "^0.104.1"
}
}
+4
View File
@@ -15,4 +15,8 @@ markers = [
"area_helpers: self-tests for the shared test helpers in tests/helpers/",
"area_unit: pure parser / utility tests that do not clearly belong elsewhere",
"area_uncategorized: tests not yet matched by the taxonomy (fallback)",
# Fast-lane marker (issue #3443). Opt-in and orthogonal to the area_*/sub_*
# taxonomy. The fast lane runs `not slow`; mark a test slow only with
# duration evidence (see tests/run_focus.py --durations and tests/README.md).
"slow: opt-in marker for known-slow tests; excluded by the fast lane (not slow)",
]
+2 -2
View File
@@ -15,7 +15,7 @@ faster-whisper
# DuckDuckGo as a search provider option.
# Install if you want DDG in the search-provider dropdown.
# Alternatives: SearXNG, Brave, Tavily, Serper, Google PSE.
duckduckgo-search
ddgs
# PDF form-filling feature (fillable AcroForm detection, field extraction,
# value/annotation/signature stamping, page rendering for the form overlay).
@@ -33,4 +33,4 @@ PyMuPDF
# magika (onnxruntime), already a core dep via fastembed. We avoid the
# [all]/Azure/audio extras (cloud + heavy). Pinned to a release >30 days old per
# the dependency-age discussion in issue #485.
markitdown[docx,pptx,xlsx,xls]==0.1.5
markitdown[docx,pptx,xlsx,xls]==0.1.6
+6 -2
View File
@@ -3,8 +3,8 @@ uvicorn
python-multipart
python-dotenv
httpx
pydantic>=2.0
pydantic-settings>=2.0
pydantic>=2.13.4
pydantic-settings>=2.14.1
SQLAlchemy
pypdf
beautifulsoup4
@@ -43,3 +43,7 @@ qrcode[pil]
croniter
pytest
pytest-asyncio
# starlette.testclient prefers httpx2 since Starlette 1.2.0 and warns on every
# TestClient import when only classic httpx is present. Runtime code keeps
# using `httpx` above; this is test-client only.
httpx2
+31
View File
@@ -0,0 +1,31 @@
import re
from fastapi import HTTPException
_REMOTE_HOST_RE = re.compile(
r"^(?:[A-Za-z0-9][A-Za-z0-9._-]*@)?[A-Za-z0-9][A-Za-z0-9._-]*$"
)
_SSH_PORT_RE = re.compile(r"^\d{1,5}$")
def validate_remote_host(v: str | None) -> str | None:
if v is None or v == "":
return None
if not _REMOTE_HOST_RE.match(v):
raise HTTPException(
400,
"Invalid remote_host — must be host or user@host, no SSH option syntax",
)
return v
def validate_ssh_port(v: str | None) -> str | None:
if v is None or v == "":
return None
if not _SSH_PORT_RE.fullmatch(str(v)):
raise HTTPException(400, "Invalid ssh_port")
port = int(v)
if port < 1 or port > 65535:
raise HTTPException(400, "Invalid ssh_port")
return str(port)
+10 -2
View File
@@ -68,6 +68,7 @@ def _normalize_scopes(scopes: str | list[str] | None = None, profile: str | None
ensure_before("calendar:write", "calendar:read")
ensure_before("memory:write", "memory:read")
ensure_before("email:draft", "email:read")
ensure_before("cookbook:launch", "cookbook:read")
return normalized or [DEFAULT_SCOPES]
@@ -154,6 +155,7 @@ def setup_api_token_routes() -> APIRouter:
@router.patch("/tokens/{token_id}")
async def update_token(request: Request, token_id: str):
require_admin(request)
current_user = get_current_user(request)
try:
payload = await request.json()
except Exception:
@@ -162,6 +164,8 @@ def setup_api_token_routes() -> APIRouter:
token = db.query(ApiToken).filter(ApiToken.id == token_id).first()
if not token:
raise HTTPException(404, "Token not found")
if current_user and token.owner != current_user:
raise HTTPException(403, "Not your token")
if isinstance(payload.get("name"), str) and payload["name"].strip():
token.name = payload["name"].strip()[:MAX_NAME_LEN]
# Only touch scopes when the caller actually sent them. A partial
@@ -189,10 +193,14 @@ def setup_api_token_routes() -> APIRouter:
@router.delete("/tokens/{token_id}")
def delete_token(request: Request, token_id: str):
require_admin(request)
current_user = get_current_user(request)
with get_db_session() as db:
deleted = db.query(ApiToken).filter(ApiToken.id == token_id).delete()
if not deleted:
token = db.query(ApiToken).filter(ApiToken.id == token_id).first()
if not token:
raise HTTPException(404, "Token not found")
if current_user and token.owner != current_user:
raise HTTPException(403, "Not your token")
db.delete(token)
_invalidate_cache(request)
return {"status": "deleted"}
+193 -13
View File
@@ -7,7 +7,13 @@ import asyncio
import logging
import os
from core.auth import AuthManager
import json
import re
from pathlib import Path
from core.atomic_io import atomic_write_json, atomic_write_text
from core.auth import AuthManager, SetAdminResult
from src.constants import DEEP_RESEARCH_DIR, MEMORY_FILE, SKILLS_DIR
from src.rate_limiter import RateLimiter
from src.settings_scrub import scrub_settings
from src.settings import (
@@ -67,6 +73,11 @@ class DeleteUserRequest(BaseModel):
class RenameUserRequest(BaseModel):
username: str
class SetAdminRequest(BaseModel):
is_admin: bool
class SetOpenRegistrationRequest(BaseModel):
enabled: bool
@@ -291,9 +302,30 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
if new_username in auth_manager.users:
raise HTTPException(409, "Username already taken")
# Gate on auth first. Every mutation below is contingent on this
# succeeding — doing it last meant a rejected rename (e.g. reserved
# username) left file-backed owner fields already rewritten with no
# way to roll them back.
ok = auth_manager.rename_user(old_username, new_username, user)
if not ok:
raise HTTPException(400, "Cannot rename user")
def _rollback_auth_rename() -> bool:
# On self-rename the admin session has already moved to the new
# username, so the rollback must authenticate as the new user.
rollback_user = new_username if user == old_username else user
try:
return bool(auth_manager.rename_user(new_username, old_username, rollback_user))
except Exception as rollback_err:
logger.error(
"Failed to roll back auth rename %s -> %s after owner migration failure: %s",
new_username, old_username, rollback_err,
)
return False
# Usernames are ownership keys for user data. Rename the common
# owner-scoped DB rows before changing auth so the account keeps
# access to its sessions, docs, email accounts, tasks, etc.
# owner-scoped DB rows so the account keeps access to its sessions,
# docs, email accounts, tasks, etc.
try:
from sqlalchemy import func
from core.database import Base, SessionLocal
@@ -316,6 +348,11 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
db.close()
except Exception as e:
logger.error("Failed to rename owner references %s -> %s: %s", old_username, new_username, e)
if not _rollback_auth_rename():
logger.error(
"Auth rename %s -> %s could not be rolled back after owner migration failure",
old_username, new_username,
)
raise HTTPException(500, "Failed to rename user data")
# Per-user prefs are JSON-backed, not SQL-backed.
@@ -335,9 +372,116 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
except Exception as e:
logger.warning("Failed to rename user prefs %s -> %s: %s", old_username, new_username, e)
ok = auth_manager.rename_user(old_username, new_username, user)
if not ok:
raise HTTPException(400, "Cannot rename user")
# In-flight deep-research tasks live in the process-local
# ResearchHandler registry. They are not covered by the persisted JSON
# migration above, but the research routes filter and cancel by this
# owner field while the job is running. Do this before sweeping
# completed JSON files so a job that finishes during the rename saves
# with the new owner or is caught by the disk sweep below.
try:
rh = getattr(request.app.state, "research_handler", None)
rename_owner = getattr(rh, "rename_owner", None)
if callable(rename_owner):
rename_owner(old_username, new_username)
except Exception as e:
logger.warning("Failed to rename active research tasks %s -> %s: %s", old_username, new_username, e)
# deep_research: each completed report is a standalone JSON file with
# an `owner` field. research_routes filters by d.get("owner") == user,
# so a stale owner makes every report invisible to the renamed user.
try:
dr_dir = Path(DEEP_RESEARCH_DIR)
if dr_dir.is_dir():
for p in dr_dir.glob("*.json"):
try:
d = json.loads(p.read_text(encoding="utf-8"))
if str(d.get("owner", "")).strip().lower() == old_username:
d["owner"] = new_username
atomic_write_json(str(p), d)
except Exception as err:
logger.warning("Failed to update research owner in %s: %s", p.name, err)
except Exception as e:
logger.warning("Failed to rename research owner references %s -> %s: %s", old_username, new_username, e)
# memory.json: a flat JSON array where each entry carries an `owner`
# field. memory_manager.load(owner=user) filters on it, so stale
# entries disappear from the memory panel.
try:
if os.path.isfile(MEMORY_FILE):
with open(MEMORY_FILE, encoding="utf-8") as fh:
entries = json.loads(fh.read())
if isinstance(entries, list):
changed = False
for entry in entries:
if isinstance(entry, dict) and str(entry.get("owner", "")).strip().lower() == old_username:
entry["owner"] = new_username
changed = True
if changed:
atomic_write_json(MEMORY_FILE, entries)
except Exception as e:
logger.warning("Failed to rename memory.json owner references %s -> %s: %s", old_username, new_username, e)
# uploads.json: upload rows use owner metadata for access checks and
# owner-prefixed index keys for dedupe. Rename both so attachments keep
# resolving after the account username changes.
try:
upload_handler = getattr(request.app.state, "upload_handler", None)
rename_owner = getattr(upload_handler, "rename_owner", None)
if callable(rename_owner):
rename_owner(old_username, new_username)
except Exception as e:
logger.warning("Failed to rename upload owner references %s -> %s: %s", old_username, new_username, e)
# skills: SKILL.md frontmatter carries owner: <username>; the usage
# sidecar (_usage.json) keys entries as owner::skill-name. Both must
# be updated or the renamed user's Skills panel goes empty.
try:
skills_root = Path(SKILLS_DIR)
if skills_root.is_dir():
_owner_re = re.compile(
r'(?m)^(owner:\s*)' + re.escape(old_username) + r'\s*$',
re.IGNORECASE,
)
for p in skills_root.rglob("SKILL.md"):
try:
text = p.read_text(encoding="utf-8")
new_text = _owner_re.sub(r'\g<1>' + new_username, text)
if new_text != text:
atomic_write_text(str(p), new_text)
except Exception as err:
logger.warning("Failed to update skill owner in %s: %s", p, err)
usage_path = skills_root / "_usage.json"
if usage_path.is_file():
try:
usage = json.loads(usage_path.read_text(encoding="utf-8"))
if isinstance(usage, dict):
new_usage = {}
changed = False
for k, v in usage.items():
owner_part, sep, skill_part = k.partition("::")
if sep and owner_part.lower() == old_username:
new_usage[new_username + "::" + skill_part] = v
changed = True
else:
new_usage[k] = v
if changed:
atomic_write_json(str(usage_path), new_usage)
except Exception as err:
logger.warning("Failed to update skills usage keys %s -> %s: %s", old_username, new_username, err)
except Exception as e:
logger.warning("Failed to rename skills owner references %s -> %s: %s", old_username, new_username, e)
# The in-memory session cache (session_manager.sessions) stores each
# session's owner at load time. Without this patch the renamed user's
# sessions are invisible on the next /api/sessions call because
# get_sessions_for_user does an exact `s.owner == username` comparison
# against stale in-memory values.
sm = getattr(request.app.state, "session_manager", None)
if sm is not None:
for sess in list(getattr(sm, "sessions", {}).values()):
if str(getattr(sess, "owner", None) or "").strip().lower() == old_username:
sess.owner = new_username
# The owner-rename loop above updated ApiToken.owner in the DB, but the
# bearer-token cache still maps each token to the OLD owner. Without
# refreshing it, the renamed user's API tokens resolve to the old (now
@@ -348,6 +492,31 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
invalidator()
return {"ok": True, "username": new_username, "renamed_self": old_username == user}
@router.put("/users/{username}/admin")
async def set_user_admin(username: str, body: SetAdminRequest, request: Request):
"""Promote/demote a user to/from admin. Admin only.
The last remaining admin can't be demoted (no lockout). Self-demotion
is allowed while another admin exists; the `self` flag tells the UI to
reload the acting user into the normal-user view.
"""
user = _get_current_user(request)
if not user or not auth_manager.is_admin(user):
raise HTTPException(403, "Admin only")
result = auth_manager.set_admin(username, body.is_admin, user)
if result is SetAdminResult.USER_NOT_FOUND:
raise HTTPException(404, "User not found")
if result is SetAdminResult.NOT_AUTHORIZED:
raise HTTPException(403, "Admin only")
if result is SetAdminResult.LAST_ADMIN:
raise HTTPException(400, "Cannot demote the last admin")
target = (username or "").strip().lower()
return {
"ok": True,
"is_admin": body.is_admin,
"self": target == (user or "").strip().lower(),
}
@router.post("/signup-toggle", deprecated=True)
async def toggle_signup(request: Request):
"""
@@ -378,7 +547,23 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
user = _get_current_user(request)
if not user or not auth_manager.is_admin(user):
raise HTTPException(403, "Admin only")
ok = auth_manager.delete_user(body.username, user)
def _invalidate_api_token_cache():
try:
invalidator = getattr(request.app.state, "invalidate_token_cache", None)
if invalidator:
invalidator()
except Exception:
pass
try:
ok = auth_manager.delete_user(body.username, user)
except Exception:
# delete_user can touch ApiToken rows before a later auth-store write
# fails. Dirty the bearer cache anyway so a partial token purge does
# not leave already-cached tokens authenticating until restart.
_invalidate_api_token_cache()
raise
if not ok:
raise HTTPException(400, "Cannot delete user")
# delete_user removes the user's ApiToken rows, but the bearer-auth
@@ -386,12 +571,7 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
# rebuilds when flagged dirty. Without this, a deleted user's already
# cached token keeps authenticating until some other token op or a
# restart clears the cache. Mirror what the token routes do.
try:
invalidator = getattr(request.app.state, "invalidate_token_cache", None)
if invalidator:
invalidator()
except Exception:
pass
_invalidate_api_token_cache()
return {"ok": True}
# ---- Feature visibility (admin-managed) ----
+9 -3
View File
@@ -101,11 +101,17 @@ def setup_backup_routes(memory_manager, preset_manager, skills_manager) -> APIRo
# ── Skills ──
if "skills" in body and isinstance(body["skills"], list):
existing = skills_manager.load_all()
existing_names = {s.get("name") for s in existing if s.get("name")}
existing_ids = {s.get("id") for s in existing if s.get("id")}
# Dedup against THIS user's own skills only. Using every tenant's
# rows (load_all) meant a skill whose id/name/title matched any
# other user's was silently skipped, so the importing user lost
# their own data — same cross-tenant bug fixed for memories above.
# The full store is still saved back below.
own = [s for s in existing if s.get("owner") == user]
existing_names = {s.get("name") for s in own if s.get("name")}
existing_ids = {s.get("id") for s in own if s.get("id")}
existing_titles = {
(s.get("title") or s.get("description") or "").strip().lower()
for s in existing
for s in own
}
added = 0
for skill in body["skills"]:
+71 -53
View File
@@ -11,7 +11,7 @@ from pydantic import BaseModel
from sqlalchemy import or_, and_
from dateutil.rrule import rrulestr
from core.database import SessionLocal, CalendarCal, CalendarEvent
from core.database import SessionLocal, CalendarCal, CalendarDeletedEvent, CalendarEvent
from src.auth_helpers import require_user
from src.upload_limits import read_upload_limited, ICS_MAX_BYTES
@@ -126,6 +126,54 @@ def _resolve_base_uid(uid: str) -> str:
raise ValueError("malformed compound UID: missing base before ::")
return base
async def _push_caldav_event_after_commit(owner: str, uid: str, action: str):
"""Best-effort CalDAV write-through. Local writes stay authoritative if
the remote server is unreachable; pending flags let /sync retry later."""
try:
result = {"ok": True}
if action == "create":
from src.caldav_sync import push_event_create
result = await push_event_create(owner, uid)
elif action == "update":
from src.caldav_sync import push_event_update
result = await push_event_update(owner, uid)
elif action == "delete":
from src.caldav_sync import push_event_delete
result = await push_event_delete(owner, uid)
if result and not result.get("ok") and not result.get("skipped"):
raise RuntimeError(result.get("error") or result)
except Exception as e:
logger.warning("CalDAV %s push failed for uid=%s: %s", action, uid, e)
if action in {"create", "update"}:
db = SessionLocal()
try:
ev = _get_or_404_event(db, uid, owner)
ev.caldav_sync_pending = action
db.commit()
except Exception:
db.rollback()
finally:
db.close()
def _record_caldav_delete_tombstone(db, ev: CalendarEvent, owner: str) -> None:
if not (ev.calendar and ev.calendar.source == "caldav"):
return
tombstone = db.query(CalendarDeletedEvent).filter(
CalendarDeletedEvent.uid == ev.uid,
CalendarDeletedEvent.owner == owner,
).first()
if not tombstone:
tombstone = CalendarDeletedEvent(uid=ev.uid, owner=owner)
db.add(tombstone)
tombstone.calendar_id = ev.calendar_id
tombstone.remote_href = ev.remote_href
tombstone.remote_etag = ev.remote_etag
tombstone.caldav_base_url = getattr(ev.calendar, "caldav_base_url", None)
tombstone.summary = ev.summary or ""
tombstone.last_error = None
# ── Pydantic models ──
class EventCreate(BaseModel):
@@ -843,36 +891,35 @@ def setup_calendar_routes() -> APIRouter:
return {"ok": False, "error": str(e)[:200]}
@router.post("/sync")
async def sync_caldav_endpoint(request: Request):
"""Pull events from the configured CalDAV server into local DB.
async def sync_caldav_endpoint(request: Request, direction: str = "pull"):
"""Sync events with the configured CalDAV server.
Returns counts + any per-calendar errors. Called by the frontend
on calendar open and by the periodic scheduler loop."""
owner = _require_user(request)
from src.caldav_sync import sync_caldav
return await sync_caldav(owner)
from src.caldav_sync import sync_caldav_direction
return await sync_caldav_direction(owner, direction)
@router.delete("/calendars/{cal_id}")
async def delete_calendar(cal_id: str, request: Request):
async def delete_calendar(request: Request, cal_id: str):
owner = _require_user(request)
db = SessionLocal()
try:
cal = db.query(CalendarCal).filter(
CalendarCal.id == cal_id,
CalendarCal.owner == owner,
).first()
if not cal:
raise HTTPException(404, "Calendar not found")
cal = _get_or_404_calendar(db, cal_id, owner)
db.query(CalendarEvent).filter(CalendarEvent.calendar_id == cal_id).delete()
db.delete(cal)
db.commit()
return {"ok": True}
except HTTPException:
raise
except Exception as e:
db.rollback()
logger.error("Failed to delete calendar %s: %s", cal_id, e)
raise HTTPException(500, "Failed to delete calendar")
finally:
db.close()
@router.get("/calendars")
async def list_calendars(request: Request):
owner = _require_user(request)
@@ -1003,19 +1050,12 @@ def setup_calendar_routes() -> APIRouter:
is_utc=_is_utc and not data.all_day,
rrule=data.rrule or "",
color=data.color or None,
caldav_sync_pending="create" if cal.source == "caldav" else None,
)
db.add(ev)
db.commit()
if cal.source == "caldav":
# Push the new event to the remote so it appears on the user's
# other devices — the sync is otherwise pull-only (#800).
from src.caldav_writeback import writeback_event
await writeback_event(owner, cal.source, cal.id, {
"uid": uid, "summary": data.summary, "description": data.description,
"location": data.location, "dtstart": dtstart, "dtend": dtend,
"all_day": data.all_day, "is_utc": _is_utc and not data.all_day,
"rrule": data.rrule or "",
})
await _push_caldav_event_after_commit(owner, uid, "create")
return {"ok": True, "uid": uid}
except HTTPException:
raise
@@ -1061,15 +1101,12 @@ def setup_calendar_routes() -> APIRouter:
ev.rrule = data.rrule
if data.color is not None:
ev.color = data.color if data.color else None
is_caldav = ev.calendar and ev.calendar.source == "caldav"
if is_caldav:
ev.caldav_sync_pending = "update"
db.commit()
cal = db.query(CalendarCal).filter(CalendarCal.id == ev.calendar_id).first()
if cal and cal.source == "caldav":
from src.caldav_writeback import writeback_event
await writeback_event(owner, cal.source, cal.id, {
"uid": ev.uid, "summary": ev.summary, "description": ev.description,
"location": ev.location, "dtstart": ev.dtstart, "dtend": ev.dtend,
"all_day": ev.all_day, "is_utc": ev.is_utc, "rrule": ev.rrule or "",
})
if is_caldav:
await _push_caldav_event_after_commit(owner, base_uid, "update")
return {"ok": True}
except HTTPException:
raise
@@ -1090,15 +1127,13 @@ def setup_calendar_routes() -> APIRouter:
db = SessionLocal()
try:
ev = _get_or_404_event(db, base_uid, owner)
# Capture what the remote push needs BEFORE the row is gone.
_cal = db.query(CalendarCal).filter(CalendarCal.id == ev.calendar_id).first()
_is_caldav = bool(_cal and _cal.source == "caldav")
_cal_id, _ev_uid = ev.calendar_id, ev.uid
is_caldav = ev.calendar and ev.calendar.source == "caldav"
if is_caldav:
_record_caldav_delete_tombstone(db, ev, owner)
db.delete(ev)
db.commit()
if _is_caldav:
from src.caldav_writeback import writeback_event
await writeback_event(owner, "caldav", _cal_id, {"uid": _ev_uid}, delete=True)
if is_caldav:
await _push_caldav_event_after_commit(owner, base_uid, "delete")
return {"ok": True}
except HTTPException:
raise
@@ -1152,23 +1187,6 @@ def setup_calendar_routes() -> APIRouter:
finally:
db.close()
@router.delete("/calendars/{cal_id}")
async def delete_calendar(request: Request, cal_id: str):
owner = _require_user(request)
db = SessionLocal()
try:
cal = _get_or_404_calendar(db, cal_id, owner)
db.query(CalendarEvent).filter(CalendarEvent.calendar_id == cal_id).delete()
db.delete(cal)
db.commit()
return {"ok": True}
except HTTPException:
raise
except Exception as e:
db.rollback()
return {"error": str(e)}
finally:
db.close()
# Hard cap on ICS upload (ICS_MAX_BYTES, default 10 MB). Loading the whole
# file into memory is unavoidable with python-icalendar, so an unbounded
+135 -10
View File
@@ -159,9 +159,17 @@ async def auto_name_session(session_manager, sess):
return
owner = getattr(sess, "owner", None)
t_url, t_model, t_headers = resolve_task_endpoint(
sess.endpoint_url, sess.model, sess.headers, owner=owner,
)
t_url, t_model, t_headers = resolve_task_endpoint(owner=owner)
if not t_model:
# If no task/utility model is configured at all, fall back to
# the session's own model so auto-naming still works even on
# minimal setups.
from src.endpoint_resolver import resolve_endpoint
_fallback = resolve_endpoint("default", owner=owner)
if _fallback and _fallback[1]:
t_url, t_model, t_headers = _fallback
else:
t_url, t_model, t_headers = sess.endpoint_url, sess.model, sess.headers
if not t_model:
logger.debug("[auto-name] No model provided, skipping")
return
@@ -497,6 +505,29 @@ def _normalize_model_id_from_cache(sess) -> Optional[str]:
return None
def _session_is_research_spinoff(sess) -> bool:
"""True if this session was created via research "Discuss" spin-off.
Detected by the primer system message the spin-off endpoint seeds into
history (metadata ``research_spinoff_from``). Such sessions are grounded
on the seeded report, so global memory + personal-doc RAG injection is
suppressed for them (the report is the sole knowledge base). Handles both
ChatMessage objects and plain dicts.
"""
for m in getattr(sess, "history", []) or []:
role = getattr(m, "role", None)
if role is None and isinstance(m, dict):
role = m.get("role")
if role != "system":
continue
md = getattr(m, "metadata", None)
if md is None and isinstance(m, dict):
md = m.get("metadata")
if (md or {}).get("research_spinoff_from"):
return True
return False
async def build_chat_context(
sess,
request,
@@ -562,9 +593,17 @@ async def build_chat_context(
mem_enabled, user, incognito, no_memory, uprefs.get("memory_enabled", "NOT_SET"),
)
# Research-spinoff ("Discuss") sessions are grounded on the seeded report:
# the primer system message IS the knowledge base. Injecting global memory
# or personal-doc RAG on every turn pulls in keyword-matched but off-topic
# facts ("wrong data") and competes with the report, so suppress both here.
is_research_spinoff = _session_is_research_spinoff(sess)
if is_research_spinoff:
mem_enabled = False
# Use RAG?
use_rag_val = (str(use_rag).lower() != "false") if use_rag is not None else True
if incognito or not allow_tool_preprocessing:
if incognito or not allow_tool_preprocessing or is_research_spinoff:
use_rag_val = False
# If pre-fetched search context was provided (compare mode), skip live web search
@@ -587,7 +626,7 @@ async def build_chat_context(
incognito=incognito,
use_skills=skills_enabled,
)
if use_rag is not None:
if use_rag is not None or is_research_spinoff:
_preface_kwargs["use_rag"] = use_rag_val
preface, rag_sources, web_sources = chat_processor.build_context_preface(**_preface_kwargs)
@@ -615,6 +654,26 @@ async def build_chat_context(
# Build messages
messages = preface + sess.get_context_messages()
# Current date/time — injected as a standalone *user*-role context message
# placed immediately before the latest user turn, NOT folded into the
# system prompt. Its text changes every minute, and local OpenAI-compatible
# backends (llama.cpp / LM Studio) key their KV-cache prefix off the
# system message byte-for-byte; mixing ever-changing timestamp text into
# it would invalidate the cached prefix on every request (issue #2927).
# Placing it at the tail also keeps it out of the stable
# preface+history prefix, so that prefix stays byte-identical turn over
# turn (modulo the genuinely new history entries) and the cache survives.
if not agent_mode:
try:
from src.user_time import current_datetime_context_message
_dt_msg = current_datetime_context_message()
if messages and messages[-1].get("role") == "user":
messages.insert(len(messages) - 1, _dt_msg)
else:
messages.append(_dt_msg)
except Exception:
logger.debug("Failed to add current date/time context", exc_info=True)
# Auto-compact
messages, context_length, was_compacted = await maybe_compact(
sess, sess.endpoint_url, sess.model, messages, sess.headers, owner=user,
@@ -911,6 +970,54 @@ def save_assistant_response(
return None
def _is_session_stream_active(session_id: str) -> bool:
"""Best-effort check for "is a chat completion currently streaming for
this session?" — used to keep background extraction from overlapping a
main completion and competing for the local backend's processing slots
(issue #2927). Lazily imports the route module's live registry to avoid
a circular import (chat_routes imports this module at load time)."""
try:
from routes import chat_routes as _cr
return session_id in getattr(_cr, "_active_streams", {})
except Exception:
return False
async def _run_extraction_jobs_sequentially(session_id: str, jobs: list, max_wait_s: float = 120.0):
"""Run queued background-extraction coroutines one at a time, only once
no chat completion is actively streaming for this session.
As diagnosed in issue #2927, firing memory/skill extraction concurrently
with the main chat completion (or with each other) makes them compete for
the local backend's limited processing slots, evicting the main
conversation's cached KV-cache checkpoint and forcing a full prompt
re-evaluation on the next turn. Waiting for the stream to go idle and then
running the jobs strictly in sequence keeps at most one "side" request in
flight against the backend at any time, and never alongside the user's
own conversation.
"""
# Wait for the triggering turn's own stream to finish winding down (it
# almost always already has by the time this task gets scheduled — this
# is a small safety margin, not the primary mechanism).
waited = 0.0
poll = 0.25
while _is_session_stream_active(session_id) and waited < max_wait_s:
await asyncio.sleep(poll)
waited += poll
for name, job in jobs:
# Re-check before each job: a fast follow-up message from the user
# may have started a new stream for this session while we waited.
waited = 0.0
while _is_session_stream_active(session_id) and waited < max_wait_s:
await asyncio.sleep(poll)
waited += poll
try:
await job
except Exception:
logger.warning("[bg-extract] %s extraction job failed for session %s", name, session_id, exc_info=True)
def run_post_response_tasks(
sess,
session_manager,
@@ -933,7 +1040,22 @@ def run_post_response_tasks(
extract_skills: bool = True,
allow_background_extraction: bool = True,
):
"""Fire background tasks after a completed response: memory extraction, webhooks, auto-name, skill extraction."""
"""Fire background tasks after a completed response: memory extraction, webhooks, auto-name, skill extraction.
Memory/skill extraction are queued to run *sequentially*, after the main
completion stream for this session has fully wound down never
concurrently with it or with each other. As diagnosed in issue #2927,
firing these "side" LLM calls in parallel with the main chat completion
makes them compete for the local backend's limited processing slots
(llama.cpp defaults to 4), evicting the main conversation's cached
checkpoint and forcing a full prompt re-evaluation on the next turn. By
the time this function runs the main response is already saved, but the
extraction calls themselves are still async queuing them through
``_queue_background_extraction`` keeps them from overlapping the *next*
turn's request too.
"""
_extraction_jobs: list = []
# Memory extraction — only every 4th message pair to avoid excess LLM calls
_msg_count = len(sess.history) if hasattr(sess, 'history') else 0
_should_extract = (_msg_count >= 4) and (_msg_count % 4 == 0)
@@ -943,10 +1065,10 @@ def run_post_response_tasks(
t_url, t_model, t_headers = resolve_task_endpoint(
sess.endpoint_url, sess.model, sess.headers, owner=owner,
)
asyncio.create_task(extract_and_store(
_extraction_jobs.append(("memory", extract_and_store(
sess, memory_manager, memory_vector,
t_url, t_model, t_headers,
))
)))
# Skill extraction from complex agent runs. Only when the user actually
# chose agent mode — not a chat we auto-escalated for a notes/calendar
@@ -982,12 +1104,15 @@ def run_post_response_tasks(
sess.endpoint_url, sess.model, sess.headers, owner=owner,
)
logger.debug("[skill-extract] dispatching extractor (model=%s)", s_model)
asyncio.create_task(maybe_extract_skill(
_extraction_jobs.append(("skill", maybe_extract_skill(
sess, skills_manager,
s_url, s_model, s_headers,
agent_rounds, agent_tool_calls,
owner=owner,
))
)))
if _extraction_jobs:
asyncio.create_task(_run_extraction_jobs_sequentially(session_id, _extraction_jobs))
# Token accumulation
if last_metrics:
+58 -7
View File
@@ -62,6 +62,33 @@ def _stream_set(session_id: str, **fields) -> None:
rec.update(fields)
def _resolve_request_workspace(request, raw_value) -> tuple:
"""Resolve the posted workspace for this request: (workspace, rejected).
Privilege is checked BEFORE the path ever touches the filesystem. Only
admin/single-user callers can use the workspace-backed file/shell tools,
so only they get vet_workspace() and the workspace_rejected signal. For
any other caller the submitted value is dropped uniformly, with no vetting
and no event: otherwise the presence/absence of workspace_rejected would
let a non-admin chat caller probe which host paths exist.
vet_workspace rejects non-directories, sensitive roots (.ssh, .gnupg,
...), and filesystem roots; on rejection there is no confinement and the
default tool-path allowlist applies. The rejected value is surfaced so the
stream can tell an admin client (which believes a workspace is active)
that it was dropped.
"""
requested = (raw_value or "").strip()
if not requested:
return "", ""
from src.tool_security import owner_is_admin_or_single_user
if not owner_is_admin_or_single_user(get_current_user(request)):
return "", ""
from src.tool_execution import vet_workspace
workspace = vet_workspace(requested) or ""
return workspace, (requested if not workspace else "")
def _session_url_matches_endpoint(session_url: str, endpoint_base: str) -> bool:
if not session_url or not endpoint_base:
return False
@@ -400,6 +427,7 @@ def setup_chat_routes(
temperature=ctx.preset.temperature,
max_tokens=ctx.preset.max_tokens,
prompt_type=preset_id,
session_id=session,
)
_clean_reply, _clean_md = clean_thinking_for_save(reply, {"model": sess.model})
sess.add_message(ChatMessage("assistant", _clean_reply, metadata=_clean_md))
@@ -446,8 +474,11 @@ def setup_chat_routes(
use_research = form_data.get("use_research")
time_filter = form_data.get("time_filter")
preset_id = form_data.get("preset_id")
allow_bash = form_data.get("allow_bash")
allow_web_search = form_data.get("allow_web_search")
# Issue #3229: API callers send JSON, not FormData. Read from the
# JSON body as fallback so callers who send {"allow_bash": true}
# actually get bash enabled.
allow_bash = form_data.get("allow_bash") or (body or {}).get("allow_bash")
allow_web_search = form_data.get("allow_web_search") or (body or {}).get("allow_web_search")
use_rag = form_data.get("use_rag")
search_context = form_data.get("search_context") # pre-fetched web search results (compare mode)
compare_mode = str(form_data.get("compare_mode", "")).lower() == "true"
@@ -456,7 +487,10 @@ def setup_chat_routes(
# manual form posts that still send plan_mode=true.
plan_mode = False
chat_mode = str(form_data.get("mode", "")).lower() # 'chat' or 'agent'
workspace = ""
# Workspace: confine the agent's file/shell tools to this folder.
workspace, workspace_rejected = _resolve_request_workspace(
request, form_data.get("workspace")
)
# Plan mode is a modifier on agent mode — it only makes sense with tools.
if plan_mode:
chat_mode = "agent"
@@ -707,7 +741,7 @@ def setup_chat_routes(
# leak a doc that belongs to a DIFFERENT session.
if not active_doc:
try:
from src.tool_implementations import get_active_document
from src.agent_tools.document_tools import get_active_document
_mem_id = get_active_document()
if _mem_id:
_mem_q = _doc_db.query(DBDocument).filter(DBDocument.id == _mem_id)
@@ -728,9 +762,18 @@ def setup_chat_routes(
# Build disabled-tools set from frontend toggles + user privileges
disabled_tools = set()
if str(allow_bash).lower() != "true":
# Only disable bash/web_search when the caller *explicitly* set them
# to a falsy value. When unset (None), defer to per-user privilege
# checks below — this lets admins with can_use_bash=True use bash
# by default without having to send allow_bash in every request.
if allow_bash is not None and str(allow_bash).lower() != "true":
disabled_tools.add("bash")
if str(allow_web_search).lower() != "true":
_explicit_web_intent = bool(_tool_intent and _tool_intent.category == "web")
if (
allow_web_search is not None
and str(allow_web_search).lower() != "true"
and not _explicit_web_intent
):
disabled_tools.add("web_search")
disabled_tools.add("web_fetch")
@@ -848,6 +891,13 @@ def setup_chat_routes(
# Register active stream for partial-save safety net
_active_streams[session] = {"status": "streaming", "partial": "", "query": message, "is_research": effective_do_research, "mode": _effective_mode}
# The client sent a workspace the server refused to bind (deleted
# folder, file path, sensitive dir, filesystem root). Tell it up
# front so the UI can clear the pill instead of displaying a
# confinement that is not actually in effect.
if workspace_rejected:
yield f"data: {json.dumps({'type': 'workspace_rejected', 'data': {'path': workspace_rejected}})}\n\n"
if ctx.preprocessed.attachment_meta:
yield f"data: {json.dumps({'type': 'attachments', 'data': ctx.preprocessed.attachment_meta})}\n\n"
@@ -1076,6 +1126,7 @@ def setup_chat_routes(
max_tokens=ctx.preset.max_tokens,
prompt_type=preset_id,
tools=None,
session_id=session,
):
if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
try:
@@ -1223,9 +1274,9 @@ def setup_chat_routes(
tool_policy=tool_policy,
owner=_user,
fallbacks=_fallback_candidates,
workspace=None,
plan_mode=plan_mode,
approved_plan=approved_plan or None,
workspace=workspace or None,
):
if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
try:
+18 -6
View File
@@ -18,6 +18,7 @@ from fastapi.responses import StreamingResponse
from src.auth_helpers import require_authenticated_request, require_user
from src.tool_implementations import do_manage_notes
from src.constants import COOKBOOK_STATE_FILE
from routes._validators import validate_remote_host, validate_ssh_port
COOKBOOK_READ_SCOPES = {"cookbook:read", "cookbook:launch"}
@@ -36,6 +37,21 @@ DOCS_WRITE_SCOPES = {"documents:write"}
WRITE_ACTIONS = {"add", "create", "new", "save", "remind", "update", "delete", "toggle_item", "remove", "remove_item"}
def _ssh_prefix_for_task(task: dict) -> tuple[str, str]:
"""Resolve a cookbook task's stored SSH target into ``(host, port_flag)``.
``host`` is ``""`` for a local task. ``remoteHost`` / ``sshPort`` come from
cookbook_state.json and get interpolated into an ``ssh`` command string, so
validate them the same way the cookbook routes do. A tampered entry with
shell metacharacters in ``remoteHost`` is rejected with 400 rather than
injected.
"""
host = validate_remote_host((task.get("remoteHost") or "").strip() or None) or ""
ssh_port = validate_ssh_port((task.get("sshPort") or "").strip() or None) or ""
port_flag = f"-p {ssh_port} " if ssh_port and ssh_port != "22" else ""
return host, port_flag
async def _as_owner(request: Request, owner: str, fn, *args, **kwargs):
"""Run an existing route handler with request.state.current_user temporarily
set to ``owner`` so its internal get_current_user/require_user calls see
@@ -550,8 +566,7 @@ def setup_codex_routes(
task = next((t for t in tasks if t.get("sessionId") == session_id), None)
if task is None:
raise HTTPException(404, "task not found")
host = (task.get("remoteHost") or "").strip()
ssh_port = (task.get("sshPort") or "").strip()
host, port_flag = _ssh_prefix_for_task(task)
# Prefer the persisted log file over the tmux pane. The pane gets
# overwritten by the post-crash neofetch banner + bash prompt the
# moment vllm exits; the log file is the raw stdout/stderr and
@@ -563,7 +578,6 @@ def setup_codex_routes(
f"else tmux capture-pane -t {session_id} -p -S -{tail}; fi"
)
if host:
port_flag = f"-p {ssh_port} " if ssh_port and ssh_port != "22" else ""
import shlex
cmd = f"ssh {port_flag}{host} {shlex.quote(inner)}"
else:
@@ -625,10 +639,8 @@ def setup_codex_routes(
state = _read_cookbook_state()
tasks = state.get("tasks") or []
task = next((t for t in tasks if t.get("sessionId") == session_id), None)
host = ((task or {}).get("remoteHost") or "").strip()
ssh_port = ((task or {}).get("sshPort") or "").strip()
host, port_flag = _ssh_prefix_for_task(task or {})
if host:
port_flag = f"-p {ssh_port} " if ssh_port and ssh_port != "22" else ""
cmd = f"ssh {port_flag}{host} \"tmux kill-session -t {session_id}\""
else:
cmd = f"tmux kill-session -t {session_id}"
+15 -4
View File
@@ -45,10 +45,14 @@ def _save_settings(settings):
def _get_carddav_config():
import os
settings = _load_settings()
password = settings.get("carddav_password", os.environ.get("CARDDAV_PASSWORD", ""))
if password and "carddav_password" in settings:
from src.secret_storage import decrypt
password = decrypt(password)
return {
"url": settings.get("carddav_url", os.environ.get("CARDDAV_URL", "")),
"username": settings.get("carddav_username", os.environ.get("CARDDAV_USERNAME", "")),
"password": settings.get("carddav_password", os.environ.get("CARDDAV_PASSWORD", "")),
"password": password,
}
@@ -769,8 +773,11 @@ def setup_contacts_routes():
@router.post("/import")
async def import_vcf(data: dict, _admin: str = Depends(require_admin)):
"""Import contacts from .vcf or CSV. Body: {"vcf": "..."} or {"csv": "..."}."""
text = data.get("vcf") or data.get("text") or ""
csv_text = data.get("csv") or ""
# Coerce defensively: a non-string vcf/text/csv (e.g. a number or list
# in the JSON body) would otherwise reach .strip() and 500 with an
# AttributeError instead of degrading to a clean "no data" response.
text = str(data.get("vcf") or data.get("text") or "")
csv_text = str(data.get("csv") or "")
if text.strip():
if "BEGIN:VCARD" not in text.upper():
return {"success": False, "error": "No vCard data found"}
@@ -822,7 +829,11 @@ def setup_contacts_routes():
except ValueError as e:
raise HTTPException(400, str(e))
else:
settings[key] = data[key]
value = data[key]
if key == "carddav_password" and value:
from src.secret_storage import encrypt
value = encrypt(value)
settings[key] = value
_save_settings(settings)
# Force re-fetch
_contact_cache["fetched_at"] = None
+108 -30
View File
@@ -1,16 +1,19 @@
"""cookbook_helpers.py — validators + small helpers shared by the cookbook routes.
Extracted from cookbook_routes.py; the routes module imports the symbols it needs."""
import json
import logging
import ntpath
import os
import posixpath
import re
import shlex
from pathlib import Path
from fastapi import HTTPException
from pydantic import BaseModel
from routes._validators import validate_remote_host, validate_ssh_port
from core.platform_compat import _ssh_exec_argv
logger = logging.getLogger(__name__)
@@ -30,21 +33,24 @@ _LOCAL_MODEL_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*$")
_OLLAMA_MODEL_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._:/-]{0,200}$")
# Include pattern is a glob: allow typical safe glyphs only.
_INCLUDE_RE = re.compile(r"^[A-Za-z0-9._\-*?/\[\]]+$")
# Remote host: either `user@host` or plain `host` (alias is allowed), where host
# is a safe DNS-like token or a short SSH config alias.
_REMOTE_HOST_RE = re.compile(r"^(?:[A-Za-z0-9._-]+@)?[A-Za-z0-9._-]+$")
# HF tokens and API tokens are url-safe base64-like.
_TOKEN_RE = re.compile(r"^[A-Za-z0-9._~+/=-]+$")
# Session IDs we mint look like "cookbook-deadbeef" or "serve-deadbeef".
# Anything beyond plain alphanumerics + dash + underscore could break out
# of the shell/PowerShell contexts the value lands in.
_SESSION_ID_RE = re.compile(r"^[A-Za-z0-9_-]{1,64}$")
_SSH_PORT_RE = re.compile(r"^\d{1,5}$")
_GPU_LIST_RE = re.compile(r"^\d+(?:,\d+)*$")
# A download target directory. Absolute or ~-relative path; safe path glyphs
# only (no quotes, shell metacharacters, or spaces) since it lands in a shell
# command. A leading ~ is expanded to $HOME at command-build time.
_LOCAL_DIR_RE = re.compile(r"^~?/[A-Za-z0-9._/-]*$|^~$")
# only (no quotes or shell metacharacters). Spaces are allowed because command
# builders pass the value through quoted shell/Python contexts. The character
# class uses ``\w`` — Unicode word characters under Python 3's default str
# matching — so non-ASCII folder names pass validation too: Cyrillic, accented
# Latin, CJK, e.g. ``/Volumes/Модели`` or ``D:\AI Models\Модели``. This stays
# shell-safe: none of ``; & | ` $ '' "" () {}`` newlines etc. are in ``[\w. -]``,
# so injection vectors remain rejected. A leading ~ is expanded to $HOME at
# command-build time. (Drive letters stay ASCII: ``[A-Za-z]:``.)
_LOCAL_DIR_RE = re.compile(r"^~?(?:/[\w. -]*)+$|^~$")
_WINDOWS_LOCAL_DIR_RE = re.compile(r"^[A-Za-z]:[\\/](?:[\w. -]+(?:[\\/][\w. -]+)*[\\/]?)?$")
_WINDOWS_DRIVE_PATH_RE = re.compile(r"^[A-Za-z]:[\\/]")
@@ -78,14 +84,6 @@ def _validate_include(v: str | None) -> str | None:
return v
def _validate_remote_host(v: str | None) -> str | None:
if v is None or v == "":
return None
if not _REMOTE_HOST_RE.match(v):
raise HTTPException(400, "Invalid remote_host — must be host or user@host, no SSH option syntax")
return v
def _validate_token(v: str | None) -> str | None:
if v is None or v == "":
return None
@@ -94,26 +92,43 @@ def _validate_token(v: str | None) -> str | None:
return v
def load_stored_hf_token(*, state_path: Path | str | None = None) -> str:
"""Return the decrypted HF token from cookbook_state.json, else env fallback."""
path = Path(state_path) if state_path else Path(os.environ.get("DATA_DIR", "data")) / "cookbook_state.json"
token = ""
if path.exists():
try:
state = json.loads(path.read_text(encoding="utf-8"))
env = state.get("env") if isinstance(state, dict) else {}
if isinstance(env, dict) and env.get("hfToken"):
from src.secret_storage import decrypt
token = decrypt(env.get("hfToken") or "")
except Exception:
token = ""
if not token:
token = (os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") or "").strip()
return token
def _validate_local_dir(v: str | None) -> str | None:
if v is None or v == "":
return None
if len(v) >= 2 and v[0] == v[-1] and v[0] in {"'", '"'}:
v = v[1:-1]
v = v.rstrip("/") or "/"
if not _LOCAL_DIR_RE.match(v):
raise HTTPException(400, "Invalid local_dir — must be an absolute or ~ path with no spaces or shell metacharacters")
if not (_LOCAL_DIR_RE.match(v) or _WINDOWS_LOCAL_DIR_RE.match(v)):
raise HTTPException(400, "Invalid local_dir — must be an absolute or ~ path with no shell metacharacters")
# Reject path segments that start with '-' (option injection). '-' is in the
# allowlist, so a dir like ``/models/-rf`` or ``D:\models\-rf`` could be read
# as a CLI flag by hf/etc. — and quoting does NOT stop a value from being
# parsed as an option. This is the one residual that command-build-time
# quoting can't cover, so the guard lives here, keeping the safety wholly
# inside the validator rather than relying on consumers.
if any(seg.startswith("-") for seg in re.split(r"[\\/]", v) if seg):
raise HTTPException(400, "Invalid local_dir — path segments cannot start with '-'")
return v
def _validate_ssh_port(v: str | None) -> str | None:
if v is None or v == "":
return None
if not _SSH_PORT_RE.fullmatch(str(v)):
raise HTTPException(400, "Invalid ssh_port")
port = int(v)
if port < 1 or port > 65535:
raise HTTPException(400, "Invalid ssh_port")
return str(port)
def _validate_gpus(v: str | None) -> str | None:
if v is None or v == "":
return None
@@ -125,7 +140,7 @@ def _validate_gpus(v: str | None) -> str | None:
def _shell_path(p: str) -> str:
"""Render a validated path for a double-quoted shell context, expanding a
leading ~ to $HOME (single quotes wouldn't expand it). Safe because
_validate_local_dir already restricts the charset."""
_validate_local_dir already rejects quotes and shell metacharacters."""
if p == "~":
return '"$HOME"'
if p.startswith("~/"):
@@ -347,7 +362,12 @@ def _user_shell_path_bootstrap() -> list[str]:
' ODYSSEUS_USER_PATH="$("$ODYSSEUS_USER_SHELL" -ic \'printf "__ODYSSEUS_PATH__%s\\n" "$PATH"\' 2>/dev/null | sed -n \'s/^__ODYSSEUS_PATH__//p\' | tail -n 1 || true)"',
' if [ -n "$ODYSSEUS_USER_PATH" ]; then export PATH="$ODYSSEUS_USER_PATH:$PATH"; fi',
'fi',
'command -v python3 >/dev/null 2>&1 || python3() { python "$@"; }',
# Windows can expose python3 as a Microsoft Store App Execution Alias
# under WindowsApps. Git Bash sees that stub as present, but it exits
# before running Python. A Windows venv usually has python.exe, not
# python3.exe, so treat a missing or WindowsApps python3 as absent.
'_odys_py3="$(command -v python3 2>/dev/null || true)"',
'case "$_odys_py3" in ""|*[Ww]indows[Aa]pps*) python3() { python "$@"; } ;; esac',
'command -v python >/dev/null 2>&1 || python() { python3 "$@"; }',
]
@@ -386,6 +406,7 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None, add_hf_cache:
" for root, dirs, fns in safe_walk(base):",
" for fn in sorted(fns):",
" if not fn.lower().endswith('.gguf'): continue",
" if fn.startswith('._'): continue # macOS AppleDouble sidecar, not a real GGUF",
" fp = os.path.join(root, fn)",
" try: size = os.path.getsize(fp)",
" except Exception: size = 0",
@@ -557,6 +578,36 @@ _GGUF_PRELUDE_RE = re.compile(
_OLLAMA_HOST_ASSIGNMENT_RE = re.compile(r"(?:^|\s)OLLAMA_HOST=([^\s]+)")
_OLLAMA_BIND_RE = re.compile(r"^\[([^\]]+)\]:(\d+)$|^([^:]+):(\d+)$")
_OLLAMA_BIND_HOST_RE = re.compile(r"^[A-Za-z0-9._:-]+$")
_LLAMA_CPP_PYTHON_GGML_TYPES = {
"f32": "0",
"f16": "1",
"q4_0": "2",
"q4_1": "3",
"q5_0": "6",
"q5_1": "7",
"q8_0": "8",
"q8_1": "9",
"q2_k": "10",
"q3_k": "11",
"q4_k": "12",
"q5_k": "13",
"q6_k": "14",
"q8_k": "15",
"iq2_xxs": "16",
"iq2_xs": "17",
"iq3_xxs": "18",
"iq1_s": "19",
"iq4_nl": "20",
"iq3_s": "21",
"iq2_s": "22",
"iq4_xs": "23",
"mxfp4": "39",
"nvfp4": "40",
"q1_0": "41",
}
_LLAMA_CPP_PYTHON_TYPE_FLAG_RE = re.compile(
r"(?P<flag>--type_[kv])(?P<sep>\s+|=)(?P<quote>['\"]?)(?P<value>[A-Za-z0-9_]+)(?P=quote)"
)
def _ollama_bind_from_cmd(cmd: str | None, *, default_host: str = "127.0.0.1") -> tuple[str, str]:
@@ -588,6 +639,22 @@ def _ollama_bind_from_cmd(cmd: str | None, *, default_host: str = "127.0.0.1") -
return f"[{host}]" if bracketed_host else host, port
def _normalize_llama_cpp_python_cache_types(cmd: str | None) -> str | None:
"""Map llama.cpp KV cache type names to llama-cpp-python's integer enum."""
if not cmd or "llama_cpp.server" not in cmd:
return cmd
def repl(match: re.Match[str]) -> str:
value = match.group("value")
mapped = _LLAMA_CPP_PYTHON_GGML_TYPES.get(value.lower())
if not mapped:
return match.group(0)
quote = match.group("quote")
return f"{match.group('flag')}{match.group('sep')}{quote}{mapped}{quote}"
return _LLAMA_CPP_PYTHON_TYPE_FLAG_RE.sub(repl, cmd)
def _check_serve_binary(seg: str) -> None:
"""Validate that a single command segment starts with an allowlisted binary
(after skipping leading env-var assignments like `CUDA_VISIBLE_DEVICES=0`)."""
@@ -726,6 +793,7 @@ def _append_llama_cpp_linux_accel_build_lines(runner_lines: list[str]) -> None:
runner_lines.append(' done')
# rm -rf build so a prior poisoned CMakeCache.txt (e.g. from a failed CUDA
# or HIP attempt) doesn't cause the next configure to reuse stale settings.
runner_lines.append(' mkdir -p ~/bin')
runner_lines.append(' cd ~/llama.cpp && rm -rf build')
runner_lines.append(' if command -v hipconfig &>/dev/null || [ -d /opt/rocm ] || [ -n "$ROCM_PATH" ] || [ -n "$HIP_PATH" ]; then')
runner_lines.append(' if command -v hipconfig &>/dev/null; then')
@@ -1030,6 +1098,16 @@ def _diagnose_serve_output(text: str) -> dict | None:
"vLLM is not installed or not in PATH on this server.",
[{"label": "install vLLM in Cookbook Dependencies", "op": "dependency", "package": "vllm"}],
),
(
r"sgl_kernel[\s\S]*(Python\.h|libnuma\.so\.1|common_ops)|"
r"(Python\.h|libnuma\.so\.1|common_ops)[\s\S]*sgl_kernel|"
r"Please ensure sgl_kernel is properly installed",
"SGLang native dependencies are missing on this server.",
[
{"label": "install OS packages: libnuma-dev python3.12-dev build-essential", "op": "manual"},
{"label": "upgrade sglang-kernel after OS packages are installed", "op": "manual"},
],
),
(
r"sglang.*command not found|No module named sglang|SGLang is not installed",
"SGLang is not installed or not in PATH on this server.",
+75
View File
@@ -0,0 +1,75 @@
"""Pure helpers for shaping cookbook task output for the status response.
Kept dependency-free (no FastAPI / SQLAlchemy imports) so the behavior can be
unit-tested without standing up the whole app.
"""
import re
_FETCHING_ZERO_FILES_RE = re.compile(r"Fetching\s+0\s+files", re.IGNORECASE)
# Probe scripts for the dead-session download check, run as
# `python3 -c <PROBE> <repo_id> <cache_root>` (locally or over SSH).
# cache_root is the task's custom download dir, '' for the default HF cache.
# It has to be passed explicitly: the download runner exports
# HF_HOME=<local_dir>, so that task's cache lives under <local_dir>/hub, and
# the probe process's own environment knows nothing about it.
HF_CACHE_COMPLETE_PROBE = (
"import os,sys;"
"repo=sys.argv[1];"
"root=os.path.expanduser(sys.argv[2]) if len(sys.argv)>2 and sys.argv[2] else '';"
"base=os.path.join(root,'hub') if root else (os.environ.get('HUGGINGFACE_HUB_CACHE') or os.path.join(os.environ.get('HF_HOME', os.path.expanduser('~/.cache/huggingface')), 'hub'));"
"d=os.path.join(base,'models--'+repo.replace('/','--'));"
"snap=os.path.join(d,'snapshots');"
"ok=os.path.isdir(snap) and any(os.path.isdir(os.path.join(snap,x)) and os.listdir(os.path.join(snap,x)) for x in os.listdir(snap));"
"inc=False;"
"blobs=os.path.join(d,'blobs');"
"inc=os.path.isdir(blobs) and any(x.endswith('.incomplete') for x in os.listdir(blobs));"
"sys.exit(0 if ok and not inc else 1)"
)
HF_CACHE_INCOMPLETE_PROBE = (
"import os,sys;"
"repo=sys.argv[1];"
"root=os.path.expanduser(sys.argv[2]) if len(sys.argv)>2 and sys.argv[2] else '';"
"base=os.path.join(root,'hub') if root else (os.environ.get('HUGGINGFACE_HUB_CACHE') or os.path.join(os.environ.get('HF_HOME', os.path.expanduser('~/.cache/huggingface')), 'hub'));"
"d=os.path.join(base,'models--'+repo.replace('/','--'));"
"blobs=os.path.join(d,'blobs');"
"inc=os.path.isdir(blobs) and any(x.endswith('.incomplete') for x in os.listdir(blobs));"
"sys.exit(0 if inc else 1)"
)
def classify_dead_download(full_snapshot: str):
"""Resolve a dead download session's status from its runner markers.
The runner prints DOWNLOAD_OK only after exiting 0 (and DOWNLOAD_FAILED
otherwise), so the markers stay trustworthy after the tmux pane is gone.
Returns (status, zero_files), or None when the snapshot carries no marker
and the caller has to fall back to the cache probe. Same precedence as
the live-session branch: DOWNLOAD_OK wins, except a "Fetching 0 files"
run is an error (nothing matched the include/quant pattern).
"""
if not full_snapshot:
return None
if "DOWNLOAD_OK" in full_snapshot:
if _FETCHING_ZERO_FILES_RE.search(full_snapshot):
return ("error", True)
return ("completed", False)
if "DOWNLOAD_FAILED" in full_snapshot:
return ("error", False)
return None
def error_aware_output_tail(full_snapshot: str, status: str) -> str:
"""Return the trailing slice of a task log for the status response.
Failed tasks return the last 50 lines so the "Copy last 50 lines" action
surfaces the actual error context (stack traces, build output). Running and
other non-error tasks keep the cheaper 12-line tail to limit the payload on
the 10s polling interval.
"""
if not full_snapshot:
return ""
tail_lines = 50 if status == "error" else 12
return "\n".join(full_snapshot.splitlines()[-tail_lines:])
+94 -73
View File
@@ -15,9 +15,11 @@ from pathlib import Path
from fastapi import APIRouter, HTTPException, Request, Depends
from src.auth_helpers import require_user
from src.constants import COOKBOOK_STATE_FILE
from pydantic import BaseModel
from core.middleware import require_admin
from routes._validators import validate_remote_host, validate_ssh_port
from core.platform_compat import (
IS_WINDOWS,
detached_popen_kwargs,
@@ -28,18 +30,26 @@ from core.platform_compat import (
which_tool,
)
from routes.shell_routes import TMUX_LOG_DIR
from routes.cookbook_output import (
error_aware_output_tail, classify_dead_download,
HF_CACHE_COMPLETE_PROBE, HF_CACHE_INCOMPLETE_PROBE,
)
logger = logging.getLogger(__name__)
from routes.cookbook_helpers import (
_SSH_PORT_RE, _REMOTE_HOST_RE, _SESSION_ID_RE,
_validate_repo_id, _validate_serve_model_id, _validate_include, _validate_remote_host, _validate_token,
_validate_local_dir, _validate_ssh_port, _validate_gpus, _shell_path,
_SESSION_ID_RE, _validate_repo_id, _validate_serve_model_id, _validate_include, _validate_token,
_validate_local_dir, _validate_gpus, _shell_path,
_ps_squote, _bash_squote, _validate_serve_cmd, _parse_serve_phase,
_safe_env_prefix, _local_tooling_path_export, _append_serve_preflight_exit_lines,
_append_serve_exit_code_lines, _append_llama_cpp_linux_accel_build_lines, _cached_model_scan_script,
load_stored_hf_token,
_append_vllm_linux_preflight_lines, _ollama_bind_from_cmd, _pip_install_fallback_chain,
_pip_install_no_cache, _user_shell_path_bootstrap, _venv_safe_local_pip_install_cmd,
_diagnose_serve_output, run_ssh_command_async,
_ollama_bind_from_cmd, _pip_install_fallback_chain, _pip_install_no_cache,
_user_shell_path_bootstrap, _venv_safe_local_pip_install_cmd,
_normalize_llama_cpp_python_cache_types,
ModelDownloadRequest, ServeRequest,
)
@@ -48,13 +58,13 @@ _HF_TOKEN_STATUS_SNIPPET = (
'echo "[odysseus] HF token: applied"; '
'else '
'echo "[odysseus] HF token: NOT SET — gated/private models will be denied. '
'Add one in Odysseus Settings -> Cookbook -> HuggingFace Token."; '
'Add one in Odysseus Cookbook -> Settings -> HuggingFace Token."; '
'fi'
)
def setup_cookbook_routes() -> APIRouter:
router = APIRouter(tags=["cookbook"])
_cookbook_state_path = Path(os.environ.get("DATA_DIR", "data")) / "cookbook_state.json"
_cookbook_state_path = Path(COOKBOOK_STATE_FILE)
def _mask_secret(value: str) -> str:
if not value:
@@ -164,6 +174,16 @@ def setup_cookbook_routes() -> APIRouter:
"vLLM is not installed or not in PATH on this server.",
[{"label": "install vLLM in Cookbook Dependencies", "op": "dependency", "package": "vllm"}],
),
(
r"sgl_kernel[\s\S]*(Python\.h|libnuma\.so\.1|common_ops)|"
r"(Python\.h|libnuma\.so\.1|common_ops)[\s\S]*sgl_kernel|"
r"Please ensure sgl_kernel is properly installed",
"SGLang native dependencies are missing on this server.",
[
{"label": "install OS packages: libnuma-dev python3.12-dev build-essential", "op": "manual"},
{"label": "upgrade sglang-kernel after OS packages are installed", "op": "manual"},
],
),
(
r"sglang.*command not found|No module named sglang|SGLang is not installed",
"SGLang is not installed or not in PATH on this server.",
@@ -232,14 +252,7 @@ def setup_cookbook_routes() -> APIRouter:
return state
def _load_stored_hf_token() -> str:
if not _cookbook_state_path.exists():
return ""
try:
state = json.loads(_cookbook_state_path.read_text(encoding="utf-8"))
env = state.get("env") if isinstance(state, dict) else {}
return _decrypt_secret(env.get("hfToken") if isinstance(env, dict) else "")
except Exception:
return ""
return load_stored_hf_token(state_path=_cookbook_state_path)
def _cookbook_ssh_dir() -> Path:
# The Docker image keeps cookbook keys under /app/.ssh; that path only
@@ -354,7 +367,11 @@ def setup_cookbook_routes() -> APIRouter:
# all output to the log the poller reads. Paths handed to bash use
# POSIX form + shell-quoting so drive paths / spaces survive.
inner = TMUX_LOG_DIR / f"{session_id}_run.sh"
inner.write_text("\n".join(bash_lines) + "\n", encoding="utf-8")
pp = shlex.quote(pid_path.as_posix())
inner.write_text(
f"printf '%s\\n' \"$$\" > {pp}\n" + "\n".join(bash_lines) + "\n",
encoding="utf-8",
)
lp = shlex.quote(log_path.as_posix())
ip = shlex.quote(inner.as_posix())
script_path = TMUX_LOG_DIR / f"{session_id}.sh"
@@ -406,8 +423,8 @@ def setup_cookbook_routes() -> APIRouter:
else:
_validate_repo_id(req.repo_id)
_validate_include(req.include)
_validate_remote_host(req.remote_host)
req.ssh_port = _validate_ssh_port(req.ssh_port)
validate_remote_host(req.remote_host)
req.ssh_port = validate_ssh_port(req.ssh_port)
req.local_dir = _validate_local_dir(req.local_dir)
req.hf_token = "" if is_ollama_download else (req.hf_token or _load_stored_hf_token())
_validate_token(req.hf_token)
@@ -738,9 +755,8 @@ def setup_cookbook_routes() -> APIRouter:
# Validate shell-bound inputs, matching the sibling list_gpus endpoint —
# `host`/`ssh_port` are interpolated into an ssh command below, so an
# unvalidated value (e.g. "x'; rm -rf ~ #") would be command injection.
host = _validate_remote_host(host)
if ssh_port is not None and ssh_port != "" and not _SSH_PORT_RE.fullmatch(ssh_port):
raise HTTPException(400, "Invalid ssh_port")
host = validate_remote_host(host)
ssh_port = validate_ssh_port(ssh_port)
TMUX_LOG_DIR.mkdir(parents=True, exist_ok=True)
model_dirs = []
@@ -889,11 +905,16 @@ def setup_cookbook_routes() -> APIRouter:
# listening" check without requiring ss/netstat/nmap.
ssh_base = ["ssh", "-o", "ConnectTimeout=4", "-o", "StrictHostKeyChecking=no"]
if ssh_port and str(ssh_port) != "22":
if not _SSH_PORT_RE.match(str(ssh_port)):
try:
ssh_port = validate_ssh_port(ssh_port)
except HTTPException:
return None
ssh_base.extend(["-p", str(ssh_port)])
host_arg = remote
if not _REMOTE_HOST_RE.match(host_arg):
try:
host_arg = validate_remote_host(remote)
except HTTPException:
return None
if not host_arg:
return None
probe_ports = " ".join(str(start_port + i) for i in range(max_offset + 1))
script = (
@@ -1196,8 +1217,8 @@ def setup_cookbook_routes() -> APIRouter:
"""
require_admin(request)
# Defence-in-depth: reject values that could break out of shell contexts.
_validate_remote_host(req.remote_host)
req.ssh_port = _validate_ssh_port(req.ssh_port)
validate_remote_host(req.remote_host)
req.ssh_port = validate_ssh_port(req.ssh_port)
req.gpus = _validate_gpus(req.gpus)
req.hf_token = req.hf_token or _load_stored_hf_token()
_validate_token(req.hf_token)
@@ -1208,6 +1229,7 @@ def setup_cookbook_routes() -> APIRouter:
# many downstream `"engine" in req.cmd` membership checks can't hit
# `TypeError: argument of type 'NoneType'` (a 500 instead of a clean 400).
req.cmd = _validate_serve_cmd(req.cmd) or ""
req.cmd = _normalize_llama_cpp_python_cache_types(req.cmd) or ""
req.cmd = _venv_safe_local_pip_install_cmd(
req.cmd,
local=not bool(req.remote_host),
@@ -1637,12 +1659,11 @@ def setup_cookbook_routes() -> APIRouter:
async def server_setup(request: Request, req: SetupRequest):
"""Install required dependencies on a remote server via SSH."""
require_admin(request)
host = _validate_remote_host(req.host)
host = validate_remote_host(req.host)
if not host:
raise HTTPException(400, "host is required")
port = req.ssh_port
if port is not None and port != "" and not re.fullmatch(r"\d{1,5}", port):
raise HTTPException(400, "Invalid ssh_port")
port = validate_ssh_port(port)
pf = f"-p {port} " if port and port != "22" else ""
# Detect platform: Windows first (echo %OS% → Windows_NT), then Termux, then Linux
@@ -1886,9 +1907,8 @@ def setup_cookbook_routes() -> APIRouter:
`busy` is True when free_mb/total_mb < 0.5.
"""
require_admin(request)
host = _validate_remote_host(host)
if ssh_port is not None and ssh_port != "" and not _SSH_PORT_RE.fullmatch(ssh_port):
raise HTTPException(400, "Invalid ssh_port")
host = validate_remote_host(host)
ssh_port = validate_ssh_port(ssh_port)
gpu_query = "nvidia-smi --query-gpu=index,name,memory.free,memory.total,memory.used,utilization.gpu,uuid --format=csv,noheader,nounits"
nvidia_error = None
try:
@@ -2045,9 +2065,8 @@ def setup_cookbook_routes() -> APIRouter:
sig = (req.signal or "TERM").upper()
if sig not in ("TERM", "KILL", "INT"):
raise HTTPException(400, "signal must be TERM, KILL, or INT")
host = _validate_remote_host(req.host)
if req.ssh_port and not _SSH_PORT_RE.fullmatch(req.ssh_port):
raise HTTPException(400, "Invalid ssh_port")
host = validate_remote_host(req.host)
req.ssh_port = validate_ssh_port(req.ssh_port)
kill_cmd = f"kill -{sig} {req.pid}"
try:
if host:
@@ -2381,14 +2400,19 @@ def setup_cookbook_routes() -> APIRouter:
host = (srv.get("host") or "").strip()
if not host:
continue # local-only entry; the /proc scan handles it
if not _REMOTE_HOST_RE.match(host):
try:
host = validate_remote_host(host)
except HTTPException:
continue
sport = str(srv.get("port") or "").strip()
ssh_base = ["ssh", "-o", "ConnectTimeout=4", "-o", "StrictHostKeyChecking=no"]
if sport and sport != "22":
if not _SSH_PORT_RE.match(sport):
try:
sport = validate_ssh_port(sport)
except HTTPException:
continue
ssh_base.extend(["-p", sport])
if sport != "22":
ssh_base.extend(["-p", sport])
try:
ls = subprocess.run(
@@ -2802,30 +2826,20 @@ def setup_cookbook_routes() -> APIRouter:
def _cookbook_tasks_status_sync():
import subprocess
def _download_cache_complete(repo_id: str, remote_host: str = "", ssh_port: str = "") -> bool:
def _download_cache_complete(repo_id: str, remote_host: str = "", ssh_port: str = "", cache_root: str = "") -> bool:
"""Best-effort check for a completed HF cache entry.
tmux output can stop at a stale progress line if the pane/session
disappears before Cookbook captures the final DOWNLOAD_OK marker.
In that case, trust the cache shape: a snapshot directory with files
and no *.incomplete blobs means HuggingFace finished materializing the
model.
model. cache_root is the task's custom download dir — the runner
pointed HF_HOME there, so the cache lives under <cache_root>/hub,
not wherever this probe's environment says.
"""
if not repo_id or "/" not in repo_id:
return False
py = (
"import os,sys;"
"repo=sys.argv[1];"
"base=os.environ.get('HUGGINGFACE_HUB_CACHE') or os.path.join(os.environ.get('HF_HOME', os.path.expanduser('~/.cache/huggingface')), 'hub');"
"d=os.path.join(base,'models--'+repo.replace('/','--'));"
"snap=os.path.join(d,'snapshots');"
"ok=os.path.isdir(snap) and any(os.path.isdir(os.path.join(snap,x)) and os.listdir(os.path.join(snap,x)) for x in os.listdir(snap));"
"inc=False;"
"blobs=os.path.join(d,'blobs');"
"inc=os.path.isdir(blobs) and any(x.endswith('.incomplete') for x in os.listdir(blobs));"
"sys.exit(0 if ok and not inc else 1)"
)
cmd = ["python3", "-c", py, repo_id]
cmd = ["python3", "-c", HF_CACHE_COMPLETE_PROBE, repo_id, cache_root or ""]
try:
if remote_host:
ssh_base = ["ssh"]
@@ -2839,7 +2853,7 @@ def setup_cookbook_routes() -> APIRouter:
except Exception:
return False
def _download_cache_incomplete(repo_id: str, remote_host: str = "", ssh_port: str = "") -> bool:
def _download_cache_incomplete(repo_id: str, remote_host: str = "", ssh_port: str = "", cache_root: str = "") -> bool:
"""Best-effort check for resumable HF partial blobs.
A lost SSH/tmux session can leave a real download still incomplete.
@@ -2848,16 +2862,7 @@ def setup_cookbook_routes() -> APIRouter:
"""
if not repo_id or "/" not in repo_id:
return False
py = (
"import os,sys;"
"repo=sys.argv[1];"
"base=os.environ.get('HUGGINGFACE_HUB_CACHE') or os.path.join(os.environ.get('HF_HOME', os.path.expanduser('~/.cache/huggingface')), 'hub');"
"d=os.path.join(base,'models--'+repo.replace('/','--'));"
"blobs=os.path.join(d,'blobs');"
"inc=os.path.isdir(blobs) and any(x.endswith('.incomplete') for x in os.listdir(blobs));"
"sys.exit(0 if inc else 1)"
)
cmd = ["python3", "-c", py, repo_id]
cmd = ["python3", "-c", HF_CACHE_INCOMPLETE_PROBE, repo_id, cache_root or ""]
try:
if remote_host:
ssh_base = ["ssh"]
@@ -2929,12 +2934,18 @@ def setup_cookbook_routes() -> APIRouter:
if not _SESSION_ID_RE.match(session_id):
logger.warning(f"Skipping task with unsafe session_id: {session_id!r}")
continue
if remote and not _REMOTE_HOST_RE.match(remote):
logger.warning(f"Skipping task with unsafe remoteHost: {remote!r}")
continue
if _tport and not _SSH_PORT_RE.match(str(_tport)):
logger.warning(f"Skipping task with unsafe sshPort: {_tport!r}")
continue
if remote:
try:
remote = validate_remote_host(remote)
except HTTPException:
logger.warning(f"Skipping task with unsafe remoteHost: {remote!r}")
continue
if _tport:
try:
_tport = validate_ssh_port(str(_tport))
except HTTPException:
logger.warning(f"Skipping task with unsafe sshPort: {_tport!r}")
continue
if task_platform == "windows" and remote:
# Windows: check PID file + Get-Process, read log tail
sd = "$env:TEMP\\odysseus-sessions"
@@ -3047,6 +3058,7 @@ def setup_cookbook_routes() -> APIRouter:
# snapshot to classify (DOWNLOAD_OK / exit marker) — evaluate it even
# when the PID is gone instead of blindly reporting "stopped".
download_zero_files = False
exit_code = None
status = "unknown"
download_has_ok = task_type == "download" and "DOWNLOAD_OK" in full_snapshot
download_has_failed = task_type == "download" and "DOWNLOAD_FAILED" in full_snapshot
@@ -3055,7 +3067,7 @@ def setup_cookbook_routes() -> APIRouter:
and (
".incomplete" in full_snapshot
or bool(re.search(r'model-\d+-of-\d+\.[A-Za-z0-9_.-]+:\s+(?:[0-9]|[1-8][0-9])%', full_snapshot))
or _download_cache_incomplete(_payload.get("repo_id") or model, remote, str(_tport or ""))
or _download_cache_incomplete(_payload.get("repo_id") or model, remote, str(_tport or ""), _payload.get("local_dir") or "")
)
)
if is_alive or (local_win_task and full_snapshot):
@@ -3096,11 +3108,19 @@ def setup_cookbook_routes() -> APIRouter:
else:
status = "running"
else:
# Session is dead — check if it completed or crashed
if (
# Session is dead — check if it completed or crashed. The
# runner markers in the retained output are conclusive
# (DOWNLOAD_OK only prints after exit 0), so check them before
# the cache probe, which can't see ollama pulls at all.
marker = classify_dead_download(full_snapshot) if task_type == "download" else None
if marker is not None:
status, download_zero_files = marker
if status == "completed" and not progress_text:
progress_text = "Download complete"
elif (
task_type == "download"
and not download_has_incomplete_evidence
and _download_cache_complete(_payload.get("repo_id") or model, remote, str(_tport or ""))
and _download_cache_complete(_payload.get("repo_id") or model, remote, str(_tport or ""), _payload.get("local_dir") or "")
):
status = "completed"
if not progress_text:
@@ -3120,7 +3140,7 @@ def setup_cookbook_routes() -> APIRouter:
status = "error"
if download_zero_files:
diagnosis = {"message": "No matching files were downloaded. The model repo or filename/quant pattern may be wrong (for example a ':Q4_K_M' tag that does not exist in the repo). Check the repo and the include/quant pattern."}
output_tail = "\n".join(full_snapshot.splitlines()[-12:]) if full_snapshot else ""
output_tail = error_aware_output_tail(full_snapshot, status)
results.append({
"session_id": session_id,
@@ -3131,6 +3151,7 @@ def setup_cookbook_routes() -> APIRouter:
"phase": serve_phase,
"diagnosis": diagnosis,
"output_tail": output_tail,
"exit_code": exit_code,
"cmd": _payload.get("_cmd") or "",
"tps": phase_info.get("tps"),
"reqs": phase_info.get("reqs"),
+35 -1
View File
@@ -1,12 +1,13 @@
"""Diagnostics routes — /api/db/stats, /api/rag/stats, /api/test/youtube, /api/test-research."""
import logging
import os
from typing import Dict, Any
from fastapi import APIRouter, HTTPException, Form, Request
from services.youtube.youtube_handler import extract_youtube_id, extract_transcript_async
from core.constants import DEFAULT_HOST
from core.constants import DEFAULT_HOST, DATA_DIR
from core.middleware import require_admin
logger = logging.getLogger(__name__)
@@ -16,9 +17,42 @@ def setup_diagnostics_routes(
rag_manager,
rag_available: bool,
research_handler,
memory_vector=None,
) -> APIRouter:
router = APIRouter(tags=["diagnostics"])
@router.get("/api/diagnostics/services")
async def get_service_health(request: Request) -> Dict[str, Any]:
"""Consolidated degraded-state report for ChromaDB, SearXNG, email,
ntfy, and provider endpoints. Non-intrusive probes safe to poll."""
require_admin(request)
from src.service_health import collect_service_health
return await collect_service_health(rag_manager, memory_vector)
@router.get("/api/diagnostics/logs")
async def get_diagnostics_logs(request: Request, limit: int = 200) -> Dict[str, Any]:
require_admin(request)
limit = max(1, min(limit, 1000))
try:
log_file = os.path.join(DATA_DIR, "logs", "app.log")
if not os.path.exists(log_file):
return {"status": "success", "logs": []}
# Safe tail read of the log file (max 5MB via rotation)
with open(log_file, "r", encoding="utf-8", errors="ignore") as f:
lines = f.readlines()
tail_lines = lines[-limit:] if len(lines) > limit else lines
tail_lines = [line.rstrip('\r\n') for line in tail_lines]
return {
"status": "success",
"logs": tail_lines
}
except Exception as e:
logger.error(f"Diagnostics logs retrieval error: {e}")
raise HTTPException(500, f"Failed to retrieve logs: {str(e)}")
@router.get("/api/db/stats")
async def get_database_stats(request: Request) -> Dict[str, Any]:
require_admin(request)
+4 -4
View File
@@ -108,10 +108,10 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
# to markdown for prose.
language = req.language
if not language:
from src.tool_implementations import _looks_like_email_document, _sniff_doc_language
from src.agent_tools.document_tools import _looks_like_email_document, _sniff_doc_language
language = _sniff_doc_language(req.content)
else:
from src.tool_implementations import _looks_like_email_document
from src.agent_tools.document_tools import _looks_like_email_document
if _looks_like_email_document(req.content, req.title):
language = "email"
@@ -643,7 +643,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
# in-memory active-doc pointer so the last-resort injection
# path doesn't re-surface this doc in a later chat (#1160).
try:
from src.tool_implementations import clear_active_document
from src.agent_tools.document_tools import clear_active_document
clear_active_document(doc_id)
except Exception:
pass
@@ -672,7 +672,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
# Closed/deleted — drop the in-memory active-doc pointer so it isn't
# re-injected into a later, unrelated chat (#1160).
try:
from src.tool_implementations import clear_active_document
from src.agent_tools.document_tools import clear_active_document
clear_active_document(doc_id)
except Exception:
pass
+60 -16
View File
@@ -304,6 +304,7 @@ OWNER_SCOPED_EMAIL_CACHE_TABLES = {
"email_ai_replies",
"email_calendar_extractions",
"email_urgency_alerts",
"sender_signatures",
}
@@ -341,6 +342,55 @@ def _ensure_owner_scoped_email_cache_table(conn, table: str, create_sql: str, co
_lg.getLogger(__name__).warning(f"{table} owner-migration skipped: {_mig_e}")
def _ensure_sender_signatures_table(conn):
"""Create/migrate learned sender signatures to an owner-scoped cache."""
create_sql = """
CREATE TABLE IF NOT EXISTS sender_signatures (
from_address TEXT,
owner TEXT DEFAULT '',
signature_text TEXT,
sample_count INTEGER,
last_built_at TEXT NOT NULL,
model_used TEXT,
source TEXT,
PRIMARY KEY (from_address, owner)
)
"""
conn.execute(create_sql)
try:
info = conn.execute("PRAGMA table_info(sender_signatures)").fetchall()
cols = [r[1] for r in info]
pk_cols = [r[1] for r in sorted((r for r in info if r[5]), key=lambda r: r[5])]
if "owner" in cols and pk_cols == ["from_address", "owner"]:
return
conn.execute("ALTER TABLE sender_signatures RENAME TO sender_signatures__old")
conn.execute(create_sql)
old_cols = [r[1] for r in conn.execute("PRAGMA table_info(sender_signatures__old)").fetchall()]
copy_cols = [
c for c in (
"from_address",
"signature_text",
"sample_count",
"last_built_at",
"model_used",
"source",
)
if c in old_cols
]
source_owner = "COALESCE(owner, '')" if "owner" in old_cols else "''"
conn.execute(
f"INSERT OR IGNORE INTO sender_signatures "
f"({', '.join([*copy_cols, 'owner'])}) "
f"SELECT {', '.join([*copy_cols, source_owner])} "
f"FROM sender_signatures__old"
)
conn.execute("DROP TABLE sender_signatures__old")
except Exception as _mig_e:
import logging as _lg
_lg.getLogger(__name__).warning(f"sender_signatures owner-migration skipped: {_mig_e}")
def attachment_extract_dir(folder: str, uid: str) -> Path:
"""Containment-safe extraction directory for an attachment.
@@ -559,20 +609,10 @@ def _init_scheduled_db():
conn.execute("ALTER TABLE email_boundaries ADD COLUMN turns_json TEXT")
except Exception:
pass
# Per-sender signature cache. Populated by `learn_sender_signatures`
# action: the LLM extracts the common trailing block across N emails
# from each sender; the renderer folds it consistently for every
# future email from that address.
conn.execute("""
CREATE TABLE IF NOT EXISTS sender_signatures (
from_address TEXT PRIMARY KEY,
signature_text TEXT,
sample_count INTEGER,
last_built_at TEXT NOT NULL,
model_used TEXT,
source TEXT
)
""")
# Per-sender signature cache. Populated by `learn_sender_signatures`.
# Message sender addresses are global, so signatures must be scoped to the
# mailbox owner before `/read` returns them to the renderer.
_ensure_sender_signatures_table(conn)
conn.commit()
conn.close()
@@ -762,10 +802,14 @@ def _open_imap_connection(host: str, port: int, *, starttls: bool, timeout: int
imaplib._MAXLINE = 50_000_000
return conn
def _imap_connect(account_id: str | None = None, owner: str = ""):
def _imap_connect(account_id: str | None = None, owner: str = "",
timeout: int = _IMAP_TIMEOUT_SECONDS):
# SECURITY: passing `owner` scopes the fallback config lookup so a brand
# new user doesn't get connected against another user's default mailbox
# when they have no account configured.
#
# `timeout` is overridable so short-lived callers (e.g. the service-health
# probe) can impose a tighter budget than the default IMAP timeout.
cfg = _get_email_config(account_id, owner=owner)
# Connection mode:
# STARTTLS on → plain + upgrade
@@ -778,7 +822,7 @@ def _imap_connect(account_id: str | None = None, owner: str = ""):
cfg["imap_host"],
cfg["imap_port"],
starttls=bool(cfg.get("imap_starttls")),
timeout=_IMAP_TIMEOUT_SECONDS,
timeout=timeout,
)
try:
conn.login(cfg["imap_user"], cfg["imap_password"])
+59 -26
View File
@@ -249,6 +249,41 @@ def _uid_from_fetch_meta(meta_b: bytes) -> str:
return m.group(1).decode() if m else ""
_FETCH_SEQ_RE = re.compile(rb"^(\d+)\s+\(")
def _group_uid_fetch_records(msg_data) -> list:
"""Group an imaplib UID FETCH response into per-message (meta, payload).
imaplib yields an interleaved list: ``(meta, literal)`` tuples for
attributes that carry a literal (``RFC822.HEADER {n}`` etc.) plus bare
``bytes`` elements for everything the server sends outside a literal.
Where each attribute lands is server-specific: Dovecot sends FLAGS
*before* the header literal (so it ends up inside the tuple meta), while
Gmail sends FLAGS *after* it, arriving as a bare ``b' FLAGS (\\Seen))'``
element. Dropping bare elements therefore silently loses FLAGS on Gmail
and every message renders as unread/unflagged.
A tuple whose meta starts with a sequence number opens a new record;
every other part continuation tuple or bare bytes is folded into the
current record's meta so attribute regexes see the full meta text.
Plain ``b')'`` terminators get folded in too, which is harmless.
"""
grouped: list = [] # list of (meta_bytes, payload_bytes_or_None)
for part in (msg_data or []):
if isinstance(part, tuple):
meta_b = part[0] if isinstance(part[0], (bytes, bytearray)) else str(part[0]).encode()
if _FETCH_SEQ_RE.match(meta_b):
grouped.append((meta_b, part[1]))
elif grouped:
cur_meta, cur_payload = grouped[-1]
grouped[-1] = (cur_meta + b" " + meta_b, cur_payload or part[1])
elif isinstance(part, (bytes, bytearray)) and grouped:
cur_meta, cur_payload = grouped[-1]
grouped[-1] = (cur_meta + b" " + bytes(part), cur_payload)
return grouped
def _smtp_ready(cfg: dict) -> bool:
return bool(cfg.get("smtp_host") and cfg.get("smtp_user") and cfg.get("smtp_password"))
@@ -799,20 +834,11 @@ def setup_email_routes():
except Exception as e:
logger.warning(f"Batch fetch failed, falling back to per-UID: {e}")
status, msg_data = "NO", []
# imaplib batch responses interleave (meta, payload) tuples and
# `b')'` terminators. Group by message: each tuple where the
# meta begins with a seq number starts a new message record.
seq_re = re.compile(rb'^(\d+)\s+\(')
grouped = [] # list of (meta_str, payload_bytes)
for part in (msg_data or []):
if isinstance(part, tuple):
meta_b = part[0] if isinstance(part[0], (bytes, bytearray)) else str(part[0]).encode()
if seq_re.match(meta_b):
grouped.append((meta_b, part[1]))
elif grouped:
# continuation of previous message — concatenate meta info if any
cur_meta, cur_payload = grouped[-1]
grouped[-1] = (cur_meta + b" " + meta_b, cur_payload or part[1])
# Group the batched response into per-message (meta, payload)
# records. Bare bytes parts must be kept: Gmail returns FLAGS
# after the header literal as a bare element, and dropping it
# rendered every Gmail message as unread/unflagged.
grouped = _group_uid_fetch_records(msg_data)
if status != "OK" and not grouped:
conn.logout()
@@ -1061,7 +1087,10 @@ def setup_email_routes():
return {"contacts": [], "error": "Mail operation failed"}
@router.get("/search")
async def search_emails(
# Sync def: the body is blocking IMAP I/O with no awaits. As `async def` it ran
# directly on the event loop and stalled the whole app during a search; as a sync
# def FastAPI runs it in a threadpool, keeping the loop responsive.
def search_emails(
q: str = Query(""),
folder: str = Query("INBOX"),
limit: int = Query(50),
@@ -1123,14 +1152,15 @@ def setup_email_routes():
continue
raw_header = None
flags = ""
for part in msg_data:
if isinstance(part, tuple):
meta = part[0].decode() if isinstance(part[0], bytes) else str(part[0])
if b"RFC822.HEADER" in part[0] if isinstance(part[0], bytes) else "RFC822.HEADER" in meta:
raw_header = part[1]
flag_match = re.search(r'FLAGS \(([^)]*)\)', meta)
if flag_match:
flags = flag_match.group(1)
# Same Gmail caveat as the list route: FLAGS may
# arrive after the header literal, so group bare
# parts back into the message meta before scanning.
for meta_b, payload in _group_uid_fetch_records(msg_data):
if payload and b"RFC822.HEADER" in meta_b:
raw_header = payload
flag_match = re.search(rb'FLAGS \(([^)]*)\)', meta_b)
if flag_match:
flags = flag_match.group(1).decode(errors="replace")
if not raw_header:
continue
msg = email_mod.message_from_bytes(raw_header)
@@ -1279,8 +1309,9 @@ def setup_email_routes():
try:
if sender_addr:
_rs = _c.execute(
"SELECT signature_text FROM sender_signatures WHERE from_address = ?",
(sender_addr.lower().strip(),),
f"SELECT signature_text FROM sender_signatures "
f"WHERE from_address = ? AND {owner_clause}",
(sender_addr.lower().strip(), *owner_params),
).fetchone()
if _rs and _rs[0]:
cached_sender_sig = _rs[0]
@@ -1756,7 +1787,9 @@ def setup_email_routes():
return {"success": False, "error": "Mail operation failed"}
@router.post("/archive/{uid}")
async def archive_email(uid: str, folder: str = Query("INBOX"), account_id: str | None = Query(None), owner: str = Depends(require_owner)):
# Sync def: blocking IMAP I/O with no awaits — see search_emails above. Runs in a
# threadpool instead of blocking the event loop.
def archive_email(uid: str, folder: str = Query("INBOX"), account_id: str | None = Query(None), owner: str = Depends(require_owner)):
"""Move email to Archive folder."""
try:
with _imap(account_id, owner=owner) as conn:
+9 -9
View File
@@ -11,6 +11,7 @@ from typing import Dict, Any, Optional
from pydantic import BaseModel
from core.database import GalleryImage
from src.auth_helpers import _auth_disabled
logger = logging.getLogger(__name__)
@@ -120,19 +121,18 @@ def _image_to_dict(img: GalleryImage, session_name: str = None) -> Dict[str, Any
}
def _owner_filter(q, user):
def _owner_filter(q, user, model_cls=GalleryImage):
"""Apply owner filtering to a gallery query.
When auth is disabled (single-user mode) get_current_user returns None
and there is no per-user scoping. The main library list and stats already
treat None as "show everything" (`if user is not None`), so this helper
must too otherwise the tag/model filter sidebars come back empty and the
tag-cleanup endpoints (clear-user-tags, clear-ai-tags, dedupe-tags)
silently affect zero rows in the most common self-hosted deployment.
``get_current_user`` returns None both in auth-disabled single-user mode
and when auth is enabled but no current user was resolved. Preserve the
single-user behavior, but fail closed for auth-enabled null-user states.
"""
if user is None:
if user is not None:
return q.filter(model_cls.owner == user)
if _auth_disabled():
return q
return q.filter(GalleryImage.owner == user)
return q.filter(False)
+58 -30
View File
@@ -19,6 +19,7 @@ from src.upload_limits import (
GALLERY_TRANSFORM_UPLOAD_MAX_BYTES,
)
from src.constants import GENERATED_IMAGES_DIR
from src.optional_deps import patch_realesrgan_torchvision_compat
from routes.gallery_helpers import (
GalleryPatch, _extract_exif, _image_to_dict, _owner_filter, _human_size,
@@ -108,6 +109,32 @@ def _visible_image_endpoint_for_base(db, base: str, owner: str | None):
return fallback
async def _fetch_result_image_b64(url: str) -> Optional[str]:
"""Fetch an image URL returned in an upstream response body, base64-encoded
(or None on a non-200).
The URL comes from the diffusion/OpenAI server's response, not from our own
config, so a malicious or compromised endpoint could otherwise steer this
fetch at an internal or cloud-metadata address. Validate it the same way the
client-supplied endpoint is validated before the first request.
"""
import base64
import httpx
from src.url_safety import check_outbound_url
ok, reason = check_outbound_url(
url,
block_private=os.getenv("IMAGE_BLOCK_PRIVATE_IPS", "false").lower() == "true",
)
if not ok:
raise HTTPException(502, f"Upstream returned an unsafe image URL: {reason}")
async with httpx.AsyncClient(timeout=60) as c2:
ir = await c2.get(url)
if ir.status_code == 200:
return base64.b64encode(ir.content).decode()
return None
def setup_gallery_routes() -> APIRouter:
router = APIRouter(tags=["gallery"])
@@ -476,8 +503,7 @@ def setup_gallery_routes() -> APIRouter:
.outerjoin(DbSession, GalleryImage.session_id == DbSession.id)
.filter(GalleryImage.is_active == True)
)
if user is not None:
q = q.filter(GalleryImage.owner == user)
q = _owner_filter(q, user)
# Search filter (prompt + tags + ai_tags)
if search:
@@ -579,28 +605,26 @@ def setup_gallery_routes() -> APIRouter:
db = SessionLocal()
try:
q = db.query(GalleryAlbum)
if user:
q = q.filter(GalleryAlbum.owner == user)
q = _owner_filter(q, user, GalleryAlbum)
albums = q.order_by(GalleryAlbum.created_at.desc()).all()
result = []
for a in albums:
_count_q = db.query(GalleryImage).filter(
GalleryImage.album_id == a.id, GalleryImage.is_active == True
)
if user:
_count_q = _count_q.filter(GalleryImage.owner == user)
_count_q = _owner_filter(_count_q, user)
count = _count_q.count()
cover_url = None
if a.cover_id:
cover = db.query(GalleryImage).filter(GalleryImage.id == a.cover_id).first()
cover_q = db.query(GalleryImage).filter(GalleryImage.id == a.cover_id)
cover = _owner_filter(cover_q, user).first()
if cover:
cover_url = f"/api/generated-image/{cover.filename}"
elif count > 0:
_cover_q = db.query(GalleryImage).filter(
GalleryImage.album_id == a.id, GalleryImage.is_active == True
)
if user:
_cover_q = _cover_q.filter(GalleryImage.owner == user)
_cover_q = _owner_filter(_cover_q, user)
first = _cover_q.order_by(GalleryImage.created_at.desc()).first()
if first:
cover_url = f"/api/generated-image/{first.filename}"
@@ -643,10 +667,9 @@ def setup_gallery_routes() -> APIRouter:
base = db.query(GalleryImage).filter(GalleryImage.is_active == True)
size_q = db.query(func.sum(GalleryImage.file_size)).filter(GalleryImage.is_active == True)
album_q = db.query(GalleryAlbum)
if user:
base = base.filter(GalleryImage.owner == user)
size_q = size_q.filter(GalleryImage.owner == user)
album_q = album_q.filter(GalleryAlbum.owner == user)
base = _owner_filter(base, user)
size_q = _owner_filter(size_q, user)
album_q = _owner_filter(album_q, user, GalleryAlbum)
total = base.count()
total_size = size_q.scalar() or 0
fav_count = base.filter(GalleryImage.favorite == True).count()
@@ -674,8 +697,7 @@ def setup_gallery_routes() -> APIRouter:
GalleryImage.is_active == True,
(GalleryImage.ai_tags == None) | (GalleryImage.ai_tags == ""),
)
if user:
q = q.filter(GalleryImage.owner == user)
q = _owner_filter(q, user)
if album_id:
q = q.filter(GalleryImage.album_id == album_id)
untagged = q.count()
@@ -909,15 +931,23 @@ def setup_gallery_routes() -> APIRouter:
raise HTTPException(404, "Image not found")
img_filename = img.filename
# Remove the file from disk
img_path = _gallery_image_path(img_filename)
if img_path.exists():
img_path.unlink()
# Soft-delete the record
# Soft-delete the record first; the DB is the source of truth.
img.is_active = False
db.commit()
# Only after the soft-delete commit succeeds do we remove the file.
# If the file were deleted first and the commit then failed/rolled
# back, the still-active record would point at a missing file.
# Best-effort so a missing or locked file can't 500 a delete that
# already succeeded logically. Uses the path-confined resolver so a
# malformed stored filename can't escape generated_images.
try:
img_path = _gallery_image_path(img_filename)
if img_path.exists():
img_path.unlink()
except Exception as e:
logger.warning(f"Could not remove gallery image file for {img_filename}: {e}")
# Strip stale chat-history references so the image bubble
# (and its prompt caption) doesn't come back after a server
# reboot replays the session. We remove the matching tool
@@ -1147,10 +1177,7 @@ def setup_gallery_routes() -> APIRouter:
if item.get("b64_json"):
raw_b64 = item["b64_json"]
elif item.get("url"):
async with httpx.AsyncClient(timeout=60) as c2:
img_r = await c2.get(item["url"])
if img_r.status_code == 200:
raw_b64 = base64.b64encode(img_r.content).decode()
raw_b64 = await _fetch_result_image_b64(item["url"])
if not raw_b64:
raise HTTPException(502, "OpenAI returned no image")
@@ -1211,7 +1238,7 @@ def setup_gallery_routes() -> APIRouter:
original and regenerates `strength` fraction. With strength ~0.4
you get edge blending + lighting unification while keeping the
composition recognisable."""
import httpx, base64 as _b64
import httpx
user = require_privilege(request, "can_generate_images")
body = await request.json()
@@ -1387,10 +1414,9 @@ def setup_gallery_routes() -> APIRouter:
if item.get("b64_json"):
return {"image": item["b64_json"]}
if item.get("url"):
async with httpx.AsyncClient(timeout=60) as c2:
ir = await c2.get(item["url"])
if ir.status_code == 200:
return {"image": _b64.b64encode(ir.content).decode()}
img_b64 = await _fetch_result_image_b64(item["url"])
if img_b64:
return {"image": img_b64}
last_err = f"{path}: server returned no image"
except httpx.ConnectError as e:
raise HTTPException(502, f"Can't reach diffusion server at {base}: {e}")
@@ -1450,6 +1476,7 @@ def setup_gallery_routes() -> APIRouter:
img_bytes = base64.b64decode(image_b64)
src = Image.open(io.BytesIO(img_bytes)).convert("RGB")
try:
patch_realesrgan_torchvision_compat()
from realesrgan import RealESRGANer
except ImportError:
return {"error": "realesrgan not installed. Install it from Cookbook → Dependencies (search 'realesrgan')."}
@@ -1499,6 +1526,7 @@ def setup_gallery_routes() -> APIRouter:
img_bytes = base64.b64decode(image_b64)
src = Image.open(io.BytesIO(img_bytes)).convert("RGB")
try:
patch_realesrgan_torchvision_compat()
from basicsr.archs.rrdbnet_arch import RRDBNet
from realesrgan import RealESRGANer
except ImportError:
+23 -3
View File
@@ -1,7 +1,9 @@
import re
from copy import deepcopy
from fastapi import APIRouter
from fastapi import APIRouter, HTTPException
from routes._validators import validate_remote_host, validate_ssh_port
# Backends the manual hardware simulator accepts. Must stay a subset of what
@@ -11,6 +13,14 @@ from fastapi import APIRouter
_MANUAL_BACKENDS = {"cuda", "rocm", "metal", "cpu_x86", "cpu_arm"}
def _validate_detection_target(host: str = "", ssh_port: str = "") -> tuple[str, str]:
host_value = validate_remote_host(host) or ""
port_value = validate_ssh_port(ssh_port) or ""
if port_value and not host_value:
raise HTTPException(400, "ssh_port requires host")
return host_value, port_value
def _apply_manual_hardware(system, manual_mode="", manual_gpu_count="", manual_vram_gb="", manual_ram_gb="", manual_backend=""):
"""Manual hardware is a "what if I had this setup" simulator —
REPLACES the detected hardware entirely instead of adding to it.
@@ -105,6 +115,7 @@ def setup_hwfit_routes():
"""Detect and return current system hardware info. Pass host=user@server for remote.
fresh=true bypasses the per-host cache (the Rescan button)."""
from services.hwfit.hardware import detect_system
host, ssh_port = _validate_detection_target(host, ssh_port)
return detect_system(host=host, ssh_port=ssh_port, platform=platform, fresh=fresh)
@router.get("/models")
@@ -118,6 +129,7 @@ def setup_hwfit_routes():
from services.hwfit.hardware import detect_system
from services.hwfit.fit import rank_models
from services.hwfit.models import get_models, model_catalog_path
host, ssh_port = _validate_detection_target(host, ssh_port)
system = deepcopy(detect_system(host=host, ssh_port=ssh_port, platform=platform, fresh=fresh))
if system.get("error"):
return {"system": system, "models": [], "error": system["error"]}
@@ -165,8 +177,14 @@ def setup_hwfit_routes():
system["gpu_name"] = g["name"]
system["active_group"] = {**g, "use_count": n}
if gpu_count != "":
n = int(gpu_count)
# Parse the optional count defensively (matches the gpu_group guard
# above): a non-numeric query param previously raised ValueError ->
# HTTP 500. A malformed value is ignored, same as omitting it.
try:
n = int(gpu_count) if gpu_count != "" else None
except ValueError:
n = None
if n is not None:
if n == 0:
# RAM-only mode: rank against system memory, offload allowed.
system["has_gpu"] = False
@@ -229,6 +247,7 @@ def setup_hwfit_routes():
from services.hwfit.hardware import detect_system
from services.hwfit.models import get_models
from services.hwfit.profiles import compute_serve_profiles
host, ssh_port = _validate_detection_target(host, ssh_port)
system = detect_system(host=host, ssh_port=ssh_port, platform=platform, fresh=fresh)
if system.get("error"):
return {"system": system, "profiles": [], "error": system["error"]}
@@ -279,6 +298,7 @@ def setup_hwfit_routes():
"""Rank image generation models against detected hardware."""
from services.hwfit.hardware import detect_system
from services.hwfit.image_models import rank_image_models
host, ssh_port = _validate_detection_target(host, ssh_port)
system = deepcopy(detect_system(host=host, ssh_port=ssh_port, platform=platform, fresh=fresh))
if system.get("error"):
return {"system": system, "models": [], "error": system["error"]}
+18 -6
View File
@@ -108,6 +108,12 @@ def _load_disabled_map():
db.close()
def _mcp_oauth_redirect_uri() -> str:
"""Shared callback URL for legacy Google and generic MCP OAuth flows."""
from src.mcp_oauth import REDIRECT_URI
return REDIRECT_URI
def setup_mcp_routes(mcp_manager: McpManager):
"""Setup MCP routes with the provided manager."""
@@ -445,9 +451,9 @@ def setup_mcp_routes(mcp_manager: McpManager):
client_id = keys["client_id"]
scopes = oauth_cfg.get("scopes", [])
# For Desktop App creds, redirect to localhost — the user will
# For Desktop App creds, default to localhost — the user will
# paste the resulting URL back if they're on a different device.
redirect_uri = "http://localhost:7000/api/mcp/oauth/callback"
redirect_uri = _mcp_oauth_redirect_uri()
params = {
"client_id": client_id,
@@ -469,7 +475,7 @@ def setup_mcp_routes(mcp_manager: McpManager):
return RedirectResponse(auth_url)
else:
# Remote device — show paste-back page
return HTMLResponse(_oauth_authorize_page(auth_url, server_id, host))
return HTMLResponse(_oauth_authorize_page(auth_url, server_id, host, redirect_uri))
finally:
db.close()
@@ -536,7 +542,7 @@ def setup_mcp_routes(mcp_manager: McpManager):
client_id = keys["client_id"]
client_secret = keys["client_secret"]
redirect_uri = "http://localhost:7000/api/mcp/oauth/callback"
redirect_uri = _mcp_oauth_redirect_uri()
async with httpx.AsyncClient() as client:
resp = await client.post(
@@ -603,13 +609,19 @@ def setup_mcp_routes(mcp_manager: McpManager):
return router
def _oauth_authorize_page(auth_url: str, server_id: str, host: str) -> str:
def _oauth_authorize_page(
auth_url: str,
server_id: str,
host: str,
redirect_uri: str = "http://localhost:7000/api/mcp/oauth/callback",
) -> str:
"""Page with Google sign-in link and URL paste-back form for remote access."""
# Escape values interpolated into the page: `host` comes from the request
# Host header and `server_id` from the OAuth state — neither is trusted.
auth_url = html.escape(auth_url, quote=True)
server_id = html.escape(server_id, quote=True)
host = html.escape(host, quote=True)
redirect_uri = html.escape(redirect_uri, quote=True)
return f"""<!DOCTYPE html>
<html><head>
<meta charset="UTF-8"><title>Authorize Odysseus</title>
@@ -654,7 +666,7 @@ def _oauth_authorize_page(auth_url: str, server_id: str, host: str) -> str:
<div class="divider"></div>
<form method="POST" action="http://{host}/api/mcp/oauth/exchange/{server_id}">
<p>Paste the URL from your browser after signing in:</p>
<input type="text" name="callback_url" placeholder="http://localhost:7000/api/mcp/oauth/callback?code=..." required>
<input type="text" name="callback_url" placeholder="{redirect_uri}?code=..." required>
<br><button type="submit">Connect</button>
</form>
</div></body></html>"""
+75 -45
View File
@@ -29,6 +29,7 @@ from src.llm_core import llm_call_async
from services.memory.memory_extractor import audit_memories
from src.auth_helpers import get_current_user, require_user
from src.endpoint_resolver import resolve_endpoint
from src.task_endpoint import resolve_task_endpoint
from src.upload_limits import read_upload_limited, MEMORY_IMPORT_MAX_BYTES
logger = logging.getLogger(__name__)
@@ -105,6 +106,13 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
if memory_manager.find_duplicates(text, user_mem):
return {"ok": True, "count": len(user_mem), "message": "Memory already exists"}
if memory_data.session_id:
try:
session_obj = session_manager.get_session(memory_data.session_id)
except KeyError:
raise HTTPException(404, "Session not found")
_assert_session_owner(session_obj, user)
new_entry = memory_manager.add_entry(text, memory_data.source, memory_data.category, owner=user)
if memory_data.session_id:
new_entry["session_id"] = memory_data.session_id
@@ -163,8 +171,17 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
session_id = memory.get("session_id")
if session_id and session_id in session_manager.sessions:
session = session_manager.get_session(session_id)
memory["session_name"] = session.name if session else f"Session {session_id[:6]}"
try:
session = session_manager.get_session(session_id)
if session:
_assert_session_owner(session, user)
memory["session_name"] = session.name if session else f"Session {session_id[:6]}"
except KeyError:
memory["session_name"] = "Unknown"
except HTTPException as exc:
if exc.status_code != 404:
raise
memory["session_name"] = "Unknown"
else:
memory["session_name"] = "Unknown"
@@ -224,14 +241,18 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
}
messages = [system_msg] + sess.get_context_messages()
t_url, t_model, t_headers = resolve_task_endpoint(
sess.endpoint_url, sess.model, sess.headers, owner=_owner(request)
)
try:
suggestion_text = await llm_call_async(
sess.endpoint_url,
sess.model,
t_url,
t_model,
messages,
temperature=0.2,
max_tokens=500,
headers=sess.headers,
headers=t_headers,
)
try:
suggestions = json.loads(suggestion_text)
@@ -262,42 +283,50 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
endpoint_url = model = None
headers = {}
# Try default model from settings first
settings = _load_settings()
ep_id = settings.get("default_endpoint_id", "")
default_model = settings.get("default_model", "")
if ep_id:
db = SessionLocal()
try:
ep = db.query(ModelEndpoint).filter(
ModelEndpoint.id == ep_id, ModelEndpoint.is_enabled == True
).first()
if ep:
base = _normalize_base(ep.base_url)
endpoint_url = build_chat_url(base)
model = default_model
if not model and ep.models:
try:
models = _json.loads(ep.models) if isinstance(ep.models, str) else ep.models
if models:
model = models[0]
except Exception:
pass
if ep.api_key:
headers = {"Authorization": f"Bearer {ep.api_key}"}
finally:
db.close()
# Try utility model from settings first — memory audit is a background
# task and should prefer the lighter utility model over the main chat model.
from src.task_endpoint import resolve_task_endpoint
user = _owner(request)
t_url, t_model, t_headers = resolve_task_endpoint(owner=user)
if t_url and t_model:
endpoint_url, model, headers = t_url, t_model, t_headers
else:
# Fall back to default model if no task/utility model configured
settings = _load_settings()
ep_id = settings.get("default_endpoint_id", "")
default_model = settings.get("default_model", "")
if ep_id:
db = SessionLocal()
try:
ep = db.query(ModelEndpoint).filter(
ModelEndpoint.id == ep_id, ModelEndpoint.is_enabled == True
).first()
if ep:
base = _normalize_base(ep.base_url)
endpoint_url = build_chat_url(base)
model = default_model
if not model and ep.models:
try:
models = _json.loads(ep.models) if isinstance(ep.models, str) else ep.models
if models:
model = models[0]
except Exception:
pass
if ep.api_key:
headers = {"Authorization": f"Bearer {ep.api_key}"}
finally:
db.close()
# Fall back to session model if no default configured
if not endpoint_url and session:
try:
sess = session_manager.get_session(session)
_assert_session_owner(sess, _owner(request))
endpoint_url = sess.endpoint_url
model = sess.model
headers = sess.headers
except KeyError:
pass
# Fall back to session model if no default configured
if not endpoint_url and session:
try:
sess = session_manager.get_session(session)
_assert_session_owner(sess, _owner(request))
endpoint_url = sess.endpoint_url
model = sess.model
headers = sess.headers
except KeyError:
pass
if not endpoint_url or not model:
raise HTTPException(400, "No default model configured — set one in Settings")
@@ -344,13 +373,14 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
try:
sess = session_manager.get_session(session)
_assert_session_owner(sess, _owner(request))
endpoint_url = sess.endpoint_url
model = sess.model
headers = sess.headers
endpoint_url, model, headers = resolve_task_endpoint(
sess.endpoint_url, sess.model, sess.headers, owner=_owner(request)
)
except KeyError:
raise HTTPException(404, "Session not found — needed for LLM config")
logger.warning("Session %s not found, falling back to utility endpoint", session)
endpoint_url, model, headers = resolve_endpoint("utility", owner=_owner(request))
else:
endpoint_url, model, headers = resolve_endpoint("utility", owner=_owner(request))
endpoint_url, model, headers = resolve_task_endpoint(owner=_owner(request))
if not endpoint_url or not model:
raise HTTPException(400, "No LLM model configured. Set a default model in Settings.")
+90 -12
View File
@@ -123,6 +123,21 @@ def _clear_user_pref_endpoint_refs(all_prefs: dict, ep_id: str) -> int:
return cleared_users
def _default_endpoint_needs_assignment(current_default_id: str, enabled_endpoint_ids) -> bool:
"""Whether the global default chat endpoint should be (re)assigned.
True when nothing is configured yet, or the configured default no longer
resolves to an enabled endpoint (e.g. the user disabled it). Without the
second case, adding a new endpoint after disabling the previous default
leaves `default_endpoint_id` pointing at the disabled endpoint, so features
that read the raw setting (Memory Tidy) fail with "No default model
configured" even though an enabled endpoint exists. See #3586.
"""
if not current_default_id:
return True
return current_default_id not in enabled_endpoint_ids
# Loopback hosts a user might type for a local model server (LM Studio,
# llama.cpp, vLLM, …). Inside Docker these point at the *container*, not the
# host the server actually runs on.
@@ -233,6 +248,9 @@ _PROVIDER_CURATED = {
"zai-coding": [
"glm-5.1", "glm-5v-turbo", "glm-5-turbo", "glm-4.7", "glm-4.5-air",
],
"kimi-code": [
"kimi-for-coding",
],
"deepseek": [
"deepseek-chat", "deepseek-reasoner",
],
@@ -283,6 +301,7 @@ _HOST_TO_CURATED = (
("fireworks.ai", "fireworks"),
("googleapis.com", "google"),
("x.ai", "xai"),
("nvidia.com", "nvidia"),
("openrouter.ai", "openrouter"),
("ollama.com", "ollama"),
)
@@ -299,6 +318,8 @@ def _match_provider_curated(base_url: str, provider: str) -> str:
parsed = urlparse(base_url)
if _host_match(base_url, "z.ai") and "/api/coding" in (parsed.path or ""):
return "zai-coding"
if _host_match(base_url, "kimi.com") and "/coding" in (parsed.path or ""):
return "kimi-code"
for domain, key in _HOST_TO_CURATED:
if _host_match(base_url, domain):
return key
@@ -477,10 +498,17 @@ _NON_CHAT_PREFIXES = (
"dall-e", "tts-", "whisper", "text-embedding", "embedding",
"davinci", "babbage", "moderation", "omni-moderation",
"sora", "gpt-image", "chatgpt-image",
# embedding / retrieval / non-chat models (common across providers)
"snowflake/arctic-embed", "nvidia/nv-embed", "embed",
)
_NON_CHAT_CONTAINS = (
"-realtime", "-transcribe", "-tts", "-codex",
"codex-",
"codex-", "content-safety", "-safety", "-reward", "nvclip",
"kosmos", "fuyu", "deplot", "vila", "neva",
"gliner", "riva", "-parse", "-embedqa", "-nemoretriever",
"topic-control", "calibration",
"ai-synthetic-video", "cosmos-reason2",
"bge", "llama-guard",
)
_NON_CHAT_EXACT_PREFIXES = (
"gpt-audio", # gpt-audio, gpt-audio-mini etc. (not gpt-4o-audio-preview which is chat)
@@ -680,6 +708,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
"""Probe a base URL's /models endpoint and return list of model IDs.
For Anthropic, queries their /v1/models API, falling back to hardcoded list."""
from src.endpoint_resolver import resolve_url
from src.llm_core import httpx_get_kimi_aware
base = resolve_url(_normalize_base(base_url))
provider = _safe_detect_provider(base)
if provider == "chatgpt-subscription":
@@ -715,7 +744,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
url = _safe_build_models_url(base)
headers = _safe_build_headers(api_key, base)
try:
r = httpx.get(url, headers=headers, timeout=timeout, verify=llm_verify())
r = httpx_get_kimi_aware(url, headers, timeout=timeout, verify=llm_verify())
r.raise_for_status()
data = r.json()
# OpenAI format: {"data": [{"id": "model-name"}]}
@@ -731,7 +760,12 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
for _e in _PROVIDER_CURATED.get(_ck, []):
if _e not in set(models) and not any(m.startswith(_e) for m in models):
models.append(_e)
return models
if _host_match(base, "kimi.com") and "/coding" in (urlparse(base).path or ""):
_ck = _match_provider_curated(base, None)
for _e in _PROVIDER_CURATED.get(_ck, []):
if _e not in set(models) and not any(m.startswith(_e) for m in models):
models.append(_e)
return [m for m in models if _is_chat_model(m)]
except httpx.HTTPStatusError as e:
if api_key:
status = e.response.status_code if e.response is not None else "unknown"
@@ -755,7 +789,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
data = r.json()
models = [m.get("name") or m.get("model") for m in (data.get("models") or []) if m.get("name") or m.get("model")]
if models:
return models
return [m for m in models if _is_chat_model(m)]
except Exception as e:
logger.debug(f"Ollama /api/tags probe failed for {base}: {e}")
# Fall back to curated list if the provider has a URL-based match (e.g. z.ai has no /models endpoint)
@@ -847,15 +881,52 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
def _model_endpoint_error_message(base_url: str, ping: Dict[str, Any] = None) -> str:
"""Return a provider-aware error message for failed endpoint probes."""
"""Return a provider-aware error message for failed endpoint probes.
Surfaces the URL we actually probed and, when the endpoint looks like
LM Studio (port 1234 or hostname match), adds a hint about loading a
model and confirming the Developer Server is running. The user previously
saw a generic "No models found for that provider/key" with no way to
tell whether the URL was wrong, the server was down, or the server was
reachable but had no model loaded (issue #25).
"""
ping = ping or {}
error = ping.get("error")
from src.endpoint_resolver import build_models_url
try:
probed = build_models_url(base_url) or base_url
except Exception:
probed = base_url
parsed = urlparse(base_url)
host = (parsed.hostname or "").lower()
is_ollama = parsed.port == 11434 or "ollama" in host or "ollama" in base_url.lower()
is_lmstudio = (
parsed.port == 1234
or "lmstudio" in host
or "lm-studio" in host
or "lm_studio" in host
)
if is_lmstudio:
parts = [
"LM Studio is reachable, but no models were reported.",
f"Probed {probed}.",
]
if error:
parts.append(f"Last probe error: {error}.")
parts.append(
"Open LM Studio, load at least one model, and confirm the "
"Developer Server is running on port 1234."
)
parts.append(
"Base URL should be http://localhost:1234/v1 (native) or "
"http://host.docker.internal:1234/v1 (Docker)."
)
return " ".join(parts)
if is_ollama:
parts = ["No Ollama models found for that endpoint."]
parts.append(f"Probed {probed}.")
if error:
parts.append(f"Last probe error: {error}.")
parts.append("Check that Ollama is running and that the base URL is correct.")
@@ -865,9 +936,9 @@ def _model_endpoint_error_message(base_url: str, ping: Dict[str, Any] = None) ->
return " ".join(parts)
if error:
return f"No models found for that provider/key. Last probe error: {error}."
return f"No models found for that provider/key. Probed {probed}. Last probe error: {error}."
return "No models found for that provider/key."
return f"No models found for that provider/key. Probed {probed}."
def _normalize_model_ids(value):
@@ -1719,12 +1790,19 @@ def setup_model_routes(model_discovery):
)
db.add(ep)
db.commit()
# Auto-set as default chat endpoint if none configured yet. Seed
# the first CHAT model (not raw model_ids[0]) so we don't pin the
# global default to an embedding/tts/etc. entry a provider happens
# to list first.
# Auto-set as default chat endpoint when none is usable yet — either
# nothing is configured, or the configured default points at an
# endpoint that is now missing/disabled (#3586). Seed the first CHAT
# model (not raw model_ids[0]) so we don't pin the global default to
# an embedding/tts/etc. entry a provider happens to list first.
settings = _load_settings()
if not settings.get("default_endpoint_id"):
enabled_ids = {
e.id
for e in db.query(ModelEndpoint).filter(
ModelEndpoint.is_enabled == True # noqa: E712
).all()
}
if _default_endpoint_needs_assignment(settings.get("default_endpoint_id") or "", enabled_ids):
from src.endpoint_resolver import _first_chat_model
settings["default_endpoint_id"] = ep.id
settings["default_model"] = _first_chat_model(model_ids) or ""
+5 -2
View File
@@ -160,8 +160,11 @@ def setup_personal_routes(personal_docs_manager, rag_manager, rag_available):
JSON response confirming removal
"""
try:
if not directory:
raise HTTPException(400, "Directory path is required")
# Confine to PERSONAL_DIR — parity with add_directory_to_rag (which
# resolves the path the same way). Without this, an arbitrary or
# `..`-escaping path is passed straight to
# personal_docs_manager.remove_directory / rag.remove_directory.
directory = _resolve_allowed_personal_dir(directory)
logger.info(f"Removing directory from RAG: {directory}")
+14 -10
View File
@@ -11,7 +11,7 @@ from core.session_manager import SessionManager
from core.models import ChatMessage
from src.request_models import SessionResponse
from core.database import Session as DbSession, SessionLocal, Document, GalleryImage, utcnow_naive
from src.auth_helpers import get_current_user, effective_user, _auth_disabled
from src.auth_helpers import get_current_user, effective_user, _auth_disabled, owner_filter
from src.session_actions import is_session_recently_active
@@ -258,7 +258,9 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
last_msg_map = {}
mode_map = {}
msg_count_map = {}
rows = db.query(DbSession.id, DbSession.folder, DbSession.total_input_tokens, DbSession.total_output_tokens, DbSession.is_important, DbSession.created_at, DbSession.updated_at, DbSession.last_message_at, DbSession.mode, DbSession.message_count).filter(DbSession.archived == False, DbSession.owner == user).all()
q = db.query(DbSession.id, DbSession.folder, DbSession.total_input_tokens, DbSession.total_output_tokens, DbSession.is_important, DbSession.created_at, DbSession.updated_at, DbSession.last_message_at, DbSession.mode, DbSession.message_count).filter(DbSession.archived == False)
q = owner_filter(q, DbSession, user)
rows = q.all()
for row in rows:
folder_map[row.id] = row.folder
token_map[row.id] = (row.total_input_tokens or 0) + (row.total_output_tokens or 0)
@@ -277,17 +279,19 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
# Sessions with active documents that have content
from sqlalchemy import func
doc_session_ids = set(
r[0] for r in db.query(Document.session_id)
.filter(Document.is_active == True,
Document.current_content != None,
func.trim(Document.current_content) != "",
Document.owner == user)
r[0] for r in owner_filter(
db.query(Document.session_id)
.filter(Document.is_active == True,
Document.current_content != None,
func.trim(Document.current_content) != ""),
Document, user)
.distinct().all()
)
img_session_ids = set(
r[0] for r in db.query(GalleryImage.session_id)
.filter(GalleryImage.session_id != None,
GalleryImage.owner == user)
r[0] for r in owner_filter(
db.query(GalleryImage.session_id)
.filter(GalleryImage.session_id != None),
GalleryImage, user)
.distinct().all()
)
finally:
+16 -2
View File
@@ -1,6 +1,7 @@
"""Shell routes — user-facing command execution endpoint."""
import asyncio
import importlib
import json
import logging
import os
@@ -14,6 +15,7 @@ from collections import namedtuple
from pathlib import Path
from typing import Dict, Any
from core.platform_compat import IS_APPLE_SILICON, which_tool
from src.optional_deps import prepare_optional_dependency_import
# POSIX-only: `pty`/`fcntl` transitively import `termios`, which does NOT exist
# on Windows, so importing them unconditionally crashed app startup there
@@ -149,6 +151,11 @@ def _pip_dist_name(pkg: dict) -> str:
return (pkg.get("name") or "").replace("_", "-")
def _import_optional_dependency_for_status(name: str):
prepare_optional_dependency_import(name)
return importlib.import_module(name)
def _package_installed_from_probe(name: str, probe: dict) -> bool:
"""Return whether an optional dependency is usable by Cookbook.
@@ -970,7 +977,6 @@ def setup_shell_routes() -> APIRouter:
"""
_require_admin(request)
_reject_cross_site(request)
import importlib
import importlib.metadata as importlib_metadata
import shlex
import json as _json
@@ -1057,6 +1063,13 @@ def setup_shell_routes() -> APIRouter:
"category": "Image",
"target": "remote",
},
{
"name": "transformers",
"pip": "transformers",
"desc": "Hugging Face model components used by SD/Flux pipelines and image tools",
"category": "Image",
"target": "remote",
},
{
"name": "rembg",
"pip": "rembg[gpu]",
@@ -1202,7 +1215,7 @@ def setup_shell_routes() -> APIRouter:
pkg["status_note"] = _package_status_note("vllm", probe)
else:
try:
importlib.import_module(pkg["name"])
_import_optional_dependency_for_status(pkg["name"])
importlib_metadata.version(_pip_dist_name(pkg))
pkg["installed"] = True
except ImportError:
@@ -1251,6 +1264,7 @@ def setup_shell_routes() -> APIRouter:
"sglang[all]",
"diffusers",
"diffusers[torch]",
"transformers",
"TTS",
"bark",
"faster-whisper",
+4
View File
@@ -198,6 +198,8 @@ def setup_webhook_routes(
"opencode-go": "https://opencode.ai/zen/go/v1",
"fireworks": "https://api.fireworks.ai/inference/v1",
"venice": "https://api.venice.ai/api/v1",
"kimi-code": "https://api.kimi.com/coding/v1",
"kimicode": "https://api.kimi.com/coding/v1",
}
# Model prefix → provider mapping for auto-detection
@@ -210,6 +212,8 @@ def setup_webhook_routes(
"mistral": "mistral",
"llama": "groq",
"mixtral": "groq",
"kimi-for-coding": "kimi-code",
"kimi": "kimi-code",
}
def _resolve_base_url(model: Optional[str], provider: Optional[str]) -> Optional[str]:
+85
View File
@@ -0,0 +1,85 @@
"""Workspace API - browse server directories to pick a tool workspace folder."""
import os
from fastapi import APIRouter, Request, HTTPException, Query
from src.auth_helpers import get_current_user
from src.tool_security import owner_is_admin_or_single_user
# Cap entries returned per directory (mirrors filesystem_tools._CODENAV_MAX_HITS).
# A huge directory shouldn't dump thousands of rows into the picker; the user can
# type/paste a path to jump straight in instead.
_MAX_BROWSE_DIRS = 500
def setup_workspace_routes():
router = APIRouter(prefix="/api/workspace", tags=["workspace"])
@router.get("/browse")
def browse(request: Request, path: str = Query(default="")):
"""List subdirectories of `path` (default: home) so the UI can navigate
the server filesystem and pick a workspace folder. Directories only.
ADMIN-ONLY: this enumerates the server filesystem, so it is gated the
same way the file/shell tools are (read_file/write_file/bash are in
NON_ADMIN_BLOCKED_TOOLS). A non-admin who can't use those tools must not
be able to map the host's directory tree either.
"""
owner = get_current_user(request)
if not owner_is_admin_or_single_user(owner):
raise HTTPException(status_code=403, detail="Workspace browsing is admin-only")
# Resolve symlinks so the reported path is canonical and the UI navigates
# real directories (defends against symlink games in displayed paths).
target = os.path.realpath(os.path.expanduser(path.strip() or "~"))
if not os.path.isdir(target):
target = os.path.realpath(os.path.expanduser("~"))
dirs = []
try:
with os.scandir(target) as it:
for entry in it:
try:
# Don't follow symlinks when classifying - a symlinked
# dir is skipped rather than letting the browser wander
# off via a link. Hidden entries are omitted.
if entry.is_dir(follow_symlinks=False) and not entry.name.startswith("."):
# Build the child path server-side with os.path.join
# so it's correct on Windows (backslashes) and Linux.
dirs.append({"name": entry.name, "path": os.path.join(target, entry.name)})
except OSError:
continue
except (PermissionError, OSError):
dirs = []
dirs_sorted = sorted(dirs, key=lambda d: d["name"].lower())
truncated = len(dirs_sorted) > _MAX_BROWSE_DIRS
parent = os.path.dirname(target)
from src.tool_execution import vet_workspace
return {
"path": target,
"parent": parent if parent and parent != target else None,
"dirs": dirs_sorted[:_MAX_BROWSE_DIRS],
"truncated": truncated,
# Whether this directory may be bound as a workspace (filesystem
# roots and sensitive dirs may be browsed through but not chosen).
"selectable": vet_workspace(target) is not None,
}
@router.get("/vet")
def vet(request: Request, path: str = Query(default="")):
"""Validate a workspace path without binding it.
The UI calls this before persisting a manually typed path (/workspace
set) so a typo, file path, deleted folder, sensitive dir, or filesystem
root is rejected up front with the canonical path returned on success,
instead of being stored client-side and silently dropped at chat time.
Admin-gated like /browse: it confirms path existence on the host.
"""
owner = get_current_user(request)
if not owner_is_admin_or_single_user(owner):
raise HTTPException(status_code=403, detail="Workspace selection is admin-only")
from src.tool_execution import vet_workspace
resolved = vet_workspace(path)
return {"ok": resolved is not None, "path": resolved}
return router
+635
View File
@@ -0,0 +1,635 @@
#!/usr/bin/env python3
"""Build a neutral agent migration manifest.
This helper is intentionally read-only. It does not import the Odysseus
application package, write to data/, call an LLM, or apply anything. It turns
common agent export shapes into a portable JSON manifest that Odysseus can
preview or import later.
"""
from __future__ import annotations
import argparse
import hashlib
import json
import mimetypes
import sys
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Iterable
SCHEMA_VERSION = "agent-migration.v1"
TEXT_EXTENSIONS = {
".cfg",
".conf",
".csv",
".json",
".log",
".md",
".markdown",
".py",
".rst",
".toml",
".txt",
".yaml",
".yml",
}
@dataclass(frozen=True)
class InputWarning:
path: str
message: str
def utc_now_iso() -> str:
return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")
def sha256_text(text: str) -> str:
return hashlib.sha256(text.encode("utf-8")).hexdigest()
def sha256_bytes(data: bytes) -> str:
return hashlib.sha256(data).hexdigest()
def sha256_path(path: Path) -> str:
h = hashlib.sha256()
with path.open("rb") as f:
for chunk in iter(lambda: f.read(65536), b""):
h.update(chunk)
return h.hexdigest()
def stable_id(kind: str, source_name: str, *parts: Any) -> str:
raw = "\x1f".join([kind, source_name, *[str(part) for part in parts]])
return f"{kind}:{hashlib.sha256(raw.encode('utf-8')).hexdigest()[:16]}"
def read_json(path: Path) -> Any:
with path.open("r", encoding="utf-8") as handle:
return json.load(handle)
def normalize_category(value: Any) -> str:
category = str(value or "fact").strip().lower()
return category or "fact"
def normalize_memory_text(item: Any) -> str:
if isinstance(item, str):
return item.strip()
if isinstance(item, dict):
for key in ("text", "content", "memory", "value"):
value = item.get(key)
if isinstance(value, str) and value.strip():
return value.strip()
return ""
def memory_metadata(item: Any, source_path: Path, index: int) -> dict[str, Any]:
metadata: dict[str, Any] = {
"source_path": str(source_path),
"source_index": index,
}
if isinstance(item, dict):
for key in ("id", "timestamp", "created_at", "updated_at", "source", "tags", "pinned"):
if key in item:
metadata[f"source_{key}"] = item.get(key)
return metadata
def payload_items(payload: Any, keys: tuple[str, ...]) -> Any:
if isinstance(payload, dict):
for key in keys:
if isinstance(payload.get(key), list):
return payload[key]
return payload
def collect_memory_json(path: Path, source_name: str) -> tuple[list[dict[str, Any]], list[InputWarning]]:
warnings: list[InputWarning] = []
try:
payload = read_json(path)
except Exception as exc:
return [], [InputWarning(str(path), f"could not read JSON: {exc}")]
payload = payload_items(payload, ("memories", "memory", "items", "data"))
if not isinstance(payload, list):
return [], [InputWarning(str(path), "expected a JSON list or an object containing a memory list")]
items: list[dict[str, Any]] = []
seen: set[str] = set()
for index, item in enumerate(payload):
text = normalize_memory_text(item)
if not text:
warnings.append(InputWarning(str(path), f"skipped memory at index {index}: missing text"))
continue
digest = sha256_text(text.strip().lower())
if digest in seen:
warnings.append(InputWarning(str(path), f"skipped duplicate memory at index {index}"))
continue
seen.add(digest)
category = normalize_category(item.get("category") if isinstance(item, dict) else "fact")
source = str(item.get("source") or source_name) if isinstance(item, dict) else source_name
items.append(
{
"id": stable_id("memory", source_name, path, index, digest),
"kind": "memory",
"text": text,
"category": category,
"source": source,
"metadata": memory_metadata(item, path, index),
}
)
return items, warnings
def normalize_timestamp(value: Any) -> str | None:
if value is None or value == "":
return None
if isinstance(value, (int, float)):
try:
return (
datetime.fromtimestamp(float(value), timezone.utc)
.replace(microsecond=0)
.isoformat()
.replace("+00:00", "Z")
)
except (OverflowError, OSError, ValueError):
return str(value)
return str(value)
def normalize_role(value: Any) -> str:
role = str(value or "unknown").strip().lower()
if role in {"human", "user"}:
return "user"
if role in {"assistant", "ai", "bot", "model"}:
return "assistant"
if role in {"system", "tool"}:
return role
return role or "unknown"
def content_part_text(part: Any) -> str:
if isinstance(part, str):
return part
if isinstance(part, dict):
for key in ("text", "content", "value"):
value = part.get(key)
if isinstance(value, str):
return value
if part.get("type") == "text" and isinstance(part.get("text"), str):
return part["text"]
return ""
def normalize_message_text(message: dict[str, Any]) -> str:
content = message.get("content")
if isinstance(content, str):
return content
if isinstance(content, list):
return "\n".join(text for text in (content_part_text(part).strip() for part in content) if text)
if isinstance(content, dict):
parts = content.get("parts")
if isinstance(parts, list):
return "\n".join(text for text in (content_part_text(part).strip() for part in parts) if text)
for key in ("text", "content", "value"):
value = content.get(key)
if isinstance(value, str):
return value
for key in ("text", "body", "message"):
value = message.get(key)
if isinstance(value, str):
return value
return ""
def normalize_message(message: dict[str, Any]) -> dict[str, Any] | None:
author = message.get("author") if isinstance(message.get("author"), dict) else {}
role = (
message.get("role")
or message.get("sender")
or message.get("speaker")
or author.get("role")
or author.get("name")
)
text = normalize_message_text(message).strip()
if not text:
return None
normalized: dict[str, Any] = {
"role": normalize_role(role),
"text": text,
}
timestamp = normalize_timestamp(message.get("created_at") or message.get("create_time") or message.get("timestamp"))
if timestamp:
normalized["created_at"] = timestamp
message_id = message.get("id")
if message_id is not None:
normalized["source_id"] = str(message_id)
return normalized
def chatgpt_mapping_messages(conversation: dict[str, Any]) -> list[dict[str, Any]]:
mapping = conversation.get("mapping")
if not isinstance(mapping, dict):
return []
rows: list[tuple[float, int, dict[str, Any]]] = []
for index, node in enumerate(mapping.values()):
if not isinstance(node, dict) or not isinstance(node.get("message"), dict):
continue
message = node["message"]
sort_value = message.get("create_time")
try:
sort_key = float(sort_value)
except (TypeError, ValueError):
sort_key = float(index)
normalized = normalize_message(message)
if normalized:
rows.append((sort_key, index, normalized))
return [row[2] for row in sorted(rows, key=lambda row: (row[0], row[1]))]
def conversation_messages(conversation: dict[str, Any]) -> tuple[list[dict[str, Any]], str]:
mapped = chatgpt_mapping_messages(conversation)
if mapped:
return mapped, "chatgpt_mapping"
for key in ("messages", "chat_messages", "turns"):
raw_messages = conversation.get(key)
if isinstance(raw_messages, list):
messages = [
normalized
for raw in raw_messages
if isinstance(raw, dict)
for normalized in [normalize_message(raw)]
if normalized
]
return messages, key
return [], "unknown"
def conversation_title(conversation: dict[str, Any], index: int) -> str:
for key in ("title", "name", "summary"):
value = conversation.get(key)
if isinstance(value, str) and value.strip():
return value.strip()
return f"Conversation {index + 1}"
def collect_conversation_json(
path: Path,
source_name: str,
*,
include_content: bool = False,
max_messages: int = 2000,
) -> tuple[list[dict[str, Any]], list[InputWarning]]:
warnings: list[InputWarning] = []
try:
payload = read_json(path)
except Exception as exc:
return [], [InputWarning(str(path), f"could not read JSON: {exc}")]
payload = payload_items(payload, ("conversations", "conversation", "items", "data"))
if isinstance(payload, dict):
payload = [payload]
if not isinstance(payload, list):
return [], [InputWarning(str(path), "expected a JSON list or an object containing a conversation list")]
items: list[dict[str, Any]] = []
for index, conversation in enumerate(payload):
if not isinstance(conversation, dict):
warnings.append(InputWarning(str(path), f"skipped conversation at index {index}: expected object"))
continue
messages, format_hint = conversation_messages(conversation)
if not messages:
warnings.append(InputWarning(str(path), f"skipped conversation at index {index}: no text messages found"))
continue
title = conversation_title(conversation, index)
source_id = conversation.get("id") or conversation.get("uuid") or conversation.get("conversation_id")
text_digest = sha256_text("\n".join(f"{msg['role']}:{msg['text']}" for msg in messages))
metadata: dict[str, Any] = {
"source_path": str(path),
"source_index": index,
"source_format": format_hint,
"message_count": len(messages),
"text_sha256": text_digest,
"content_included": False,
}
if source_id is not None:
metadata["source_id"] = str(source_id)
for key in ("create_time", "created_at", "update_time", "updated_at"):
timestamp = normalize_timestamp(conversation.get(key))
if timestamp:
metadata[f"source_{key}"] = timestamp
item: dict[str, Any] = {
"id": stable_id("conversation", source_name, path, source_id or index, text_digest),
"kind": "conversation_thread",
"title": title,
"source": source_name,
"metadata": metadata,
}
if include_content:
if len(messages) > max_messages:
warnings.append(
InputWarning(
str(path),
f"skipped conversation content at index {index}: over {max_messages} messages",
)
)
else:
item["messages"] = messages
item["metadata"]["content_included"] = True
items.append(item)
return items, warnings
def parse_skill_frontmatter(text: str) -> dict[str, Any]:
if not text.startswith("---"):
return {}
end = text.find("\n---", 3)
if end < 0:
return {}
frontmatter: dict[str, Any] = {}
for line in text[3:end].strip().splitlines():
if not line.strip() or line.lstrip().startswith("#") or ":" not in line:
continue
key, value = line.split(":", 1)
key = key.strip()
value = value.strip().strip('"').strip("'")
if key:
frontmatter[key] = value
return frontmatter
def collect_skill_dir(path: Path, source_name: str) -> tuple[list[dict[str, Any]], list[InputWarning]]:
warnings: list[InputWarning] = []
if path.is_symlink():
return [], [InputWarning(str(path), "skills path is a symlink; skipped")]
if not path.exists():
return [], [InputWarning(str(path), "skills directory does not exist")]
if not path.is_dir():
return [], [InputWarning(str(path), "skills path is not a directory")]
items: list[dict[str, Any]] = []
for skill_path in sorted(path.rglob("SKILL.md")):
if skill_path.is_symlink():
warnings.append(InputWarning(str(skill_path), "skipped symlinked skill file"))
continue
try:
text = skill_path.read_text(encoding="utf-8")
except Exception as exc:
warnings.append(InputWarning(str(skill_path), f"could not read skill: {exc}"))
continue
frontmatter = parse_skill_frontmatter(text)
name = str(frontmatter.get("name") or skill_path.parent.name).strip() or skill_path.parent.name
items.append(
{
"id": stable_id("skill", source_name, skill_path, sha256_text(text)),
"kind": "skill",
"name": name,
"category": str(frontmatter.get("category") or "general"),
"source": source_name,
"format": "SKILL.md",
"content": text,
"metadata": {
"source_path": str(skill_path),
"sha256": sha256_text(text),
"frontmatter": frontmatter,
},
}
)
return items, warnings
def looks_textual(path: Path) -> bool:
if path.suffix.lower() in TEXT_EXTENSIONS:
return True
guessed, _ = mimetypes.guess_type(str(path))
return bool(guessed and (guessed.startswith("text/") or guessed in {"application/json"}))
def iter_archive_dir(path: Path) -> Iterable[Path | InputWarning]:
try:
children = sorted(path.iterdir())
except Exception as exc:
yield InputWarning(str(path), f"could not scan archive directory: {exc}")
return
for child in children:
if child.is_symlink():
yield InputWarning(str(child), "skipped symlinked archive path")
continue
if child.is_file():
yield child
elif child.is_dir():
yield from iter_archive_dir(child)
def iter_archive_files(paths: Iterable[Path]) -> Iterable[Path | InputWarning]:
for path in paths:
if path.is_symlink():
yield InputWarning(str(path), "skipped symlinked archive path")
continue
if path.is_file():
yield path
elif path.is_dir():
yield from iter_archive_dir(path)
def collect_archive_paths(
paths: list[Path],
source_name: str,
*,
include_content: bool = False,
max_bytes: int = 256_000,
) -> tuple[list[dict[str, Any]], list[InputWarning]]:
warnings: list[InputWarning] = []
items: list[dict[str, Any]] = []
existing_paths: list[Path] = []
for path in paths:
if path.is_symlink():
warnings.append(InputWarning(str(path), "archive path is a symlink; skipped"))
continue
if not path.exists():
warnings.append(InputWarning(str(path), "archive path does not exist"))
continue
if not path.is_file() and not path.is_dir():
warnings.append(InputWarning(str(path), "archive path is not a file or directory"))
continue
existing_paths.append(path)
for entry in iter_archive_files(existing_paths):
if isinstance(entry, InputWarning):
warnings.append(entry)
continue
path = entry
if not looks_textual(path):
warnings.append(InputWarning(str(path), "skipped non-text archive file"))
continue
try:
st = path.stat()
except Exception as exc:
warnings.append(InputWarning(str(path), f"could not stat archive file: {exc}"))
continue
size = st.st_size
try:
file_hash = sha256_path(path)
except Exception as exc:
warnings.append(InputWarning(str(path), f"could not hash archive file: {exc}"))
continue
if include_content and size > max_bytes:
warnings.append(InputWarning(str(path), f"skipped archive content over {max_bytes} bytes"))
archive_item: dict[str, Any] = {
"id": stable_id("archive", source_name, path, file_hash),
"kind": "archive_document",
"title": path.name,
"source": source_name,
"metadata": {
"source_path": str(path),
"size_bytes": size,
"sha256": file_hash,
},
}
if include_content and size <= max_bytes:
try:
archive_item["content"] = path.read_text(encoding="utf-8")
except UnicodeDecodeError:
archive_item["content"] = path.read_text(encoding="utf-8", errors="replace")
archive_item["metadata"]["decoded_with_replacement"] = True
items.append(archive_item)
return items, warnings
def build_manifest(args) -> dict[str, Any]:
warnings: list[InputWarning] = []
items: list[dict[str, Any]] = []
for path in args.memory_json:
collected, got_warnings = collect_memory_json(path, args.source_name)
items.extend(collected)
warnings.extend(got_warnings)
for path in args.skills_dir:
collected, got_warnings = collect_skill_dir(path, args.source_name)
items.extend(collected)
warnings.extend(got_warnings)
for path in args.conversation_json:
collected, got_warnings = collect_conversation_json(
path,
args.source_name,
include_content=args.include_conversation_content,
max_messages=args.max_conversation_messages,
)
items.extend(collected)
warnings.extend(got_warnings)
if args.archive:
collected, got_warnings = collect_archive_paths(
args.archive,
args.source_name,
include_content=args.include_archive_content,
max_bytes=args.max_archive_bytes,
)
items.extend(collected)
warnings.extend(got_warnings)
counts: dict[str, int] = {}
for item in items:
counts[item["kind"]] = counts.get(item["kind"], 0) + 1
return {
"schema_version": SCHEMA_VERSION,
"generated_at": utc_now_iso(),
"source": {
"name": args.source_name,
"kind": args.source_kind,
},
"summary": {
"item_count": len(items),
"counts_by_kind": counts,
"warning_count": len(warnings),
},
"items": items,
"warnings": [{"path": warning.path, "message": warning.message} for warning in warnings],
}
def parse_args(argv: list[str] | None = None):
parser = argparse.ArgumentParser(description="Build a neutral Odysseus agent migration manifest.")
parser.add_argument("--source-name", default="agent-export", help="Human-readable source name.")
parser.add_argument("--source-kind", default="generic", help="Source adapter kind, e.g. generic, openclaw, hermes.")
parser.add_argument(
"--memory-json",
action="append",
type=Path,
default=[],
help="JSON memory export. May be a list, or an object containing memories/items/data.",
)
parser.add_argument(
"--skills-dir",
action="append",
type=Path,
default=[],
help="Directory containing SKILL.md files. Scanned recursively.",
)
parser.add_argument(
"--archive",
action="append",
type=Path,
default=[],
help="Text/Markdown/JSON file or directory to preserve as archive documents.",
)
parser.add_argument(
"--conversation-json",
action="append",
type=Path,
default=[],
help="Conversation export JSON. Supports generic message lists and ChatGPT-style conversations.json.",
)
parser.add_argument(
"--include-archive-content",
action="store_true",
help="Embed archive document content in the manifest. By default only metadata is included.",
)
parser.add_argument(
"--max-archive-bytes",
type=int,
default=256_000,
help="Maximum bytes to embed per archive file when --include-archive-content is used.",
)
parser.add_argument(
"--include-conversation-content",
action="store_true",
help="Embed normalized conversation messages. By default only thread metadata is included.",
)
parser.add_argument(
"--max-conversation-messages",
type=int,
default=2000,
help="Maximum messages to embed per conversation when --include-conversation-content is used.",
)
parser.add_argument("--output", type=Path, help="Write manifest JSON to this path instead of stdout.")
parser.add_argument("--compact", action="store_true", help="Write compact JSON without indentation.")
return parser.parse_args(argv)
def main(argv: list[str] | None = None) -> int:
args = parse_args(argv)
manifest = build_manifest(args)
text = json.dumps(manifest, ensure_ascii=False, sort_keys=True, separators=(",", ":")) if args.compact else (
json.dumps(manifest, ensure_ascii=False, indent=2, sort_keys=True) + "\n"
)
if args.output:
args.output.parent.mkdir(parents=True, exist_ok=True)
args.output.write_text(text, encoding="utf-8")
else:
sys.stdout.write(text)
return 0
if __name__ == "__main__":
raise SystemExit(main())
+90
View File
@@ -611,6 +611,93 @@ def _cache_key(host: str, ssh_port: str, platform_name: str):
)
def _is_containerized():
"""Best-effort check for whether the local Odysseus process is running in a container."""
if _remote_host:
return False
if os.path.exists("/.dockerenv"):
return True
try:
with open("/proc/1/cgroup", encoding="utf-8", errors="replace") as f:
text = f.read().lower()
return any(marker in text for marker in ("docker", "containerd", "kubepods"))
except Exception:
return False
def _hardware_visibility_warning(result):
"""Return a non-blocking UX warning when detected hardware may only be container-visible."""
if not isinstance(result, dict):
return None
if result.get("manual_hardware"):
return None
if not result.get("containerized"):
return None
if result.get("gpu_error"):
return None
if not result.get("has_gpu"):
return {
"code": "container_no_gpu_visible",
"severity": "warning",
"title": "No GPU visible inside Docker",
"message": (
"Cookbook is scanning hardware from inside the Odysseus container. "
"If your host has a GPU, Docker may not be exposing it to the container, "
"so model recommendations may be CPU-only or too conservative."
),
"actions": [
"manual_hardware",
"rescan",
"copy_diagnostics",
],
}
total_ram = result.get("total_ram_gb") or 0
if total_ram and total_ram <= 8:
return {
"code": "container_low_ram_visible",
"severity": "info",
"title": "Container-visible RAM may be lower than host RAM",
"message": (
"Cookbook is seeing the RAM available inside the container. "
"If your host has more memory, validate host RAM separately or use Manual Hardware."
),
"actions": [
"manual_hardware",
"rescan",
"copy_diagnostics",
],
}
return None
def _attach_probe_context(result, host=""):
"""Attach probe-scope metadata and optional hardware visibility warning."""
if not isinstance(result, dict) or result.get("error"):
return result
is_remote = bool(host)
containerized = False if is_remote else _is_containerized()
result["probe_scope"] = "remote" if is_remote else ("container" if containerized else "native")
result["containerized"] = containerized
warning = _hardware_visibility_warning(result)
if warning:
result["hardware_visibility_warning"] = warning
else:
result.pop("hardware_visibility_warning", None)
return result
def detect_system(host="", ssh_port="", platform="", fresh=False):
"""Detect system hardware: RAM, CPU, GPU. Cached per host (hardware rarely
changes, and probing a remote host over SSH is slow). Pass fresh=True to
@@ -635,6 +722,7 @@ def detect_system(host="", ssh_port="", platform="", fresh=False):
if _remote_platform == "windows" and _remote_host:
result = _detect_windows()
if result:
result = _attach_probe_context(result, host=host)
_remote_host = None
_remote_platform = None
_cache_by_host[cache_key] = (now, result)
@@ -653,6 +741,7 @@ def detect_system(host="", ssh_port="", platform="", fresh=False):
if not _remote_host and os.name == "nt":
result = _detect_windows()
if result:
result = _attach_probe_context(result, host=host)
_cache_by_host[cache_key] = (now, result)
return result
# PowerShell probe failed entirely — fall through to the generic path
@@ -714,6 +803,7 @@ def detect_system(host="", ssh_port="", platform="", fresh=False):
"gpu_error": _last_gpu_error,
}
result = _attach_probe_context(result, host=host)
_remote_host = None
_remote_platform = None
_cache_by_host[cache_key] = (now, result)
+8 -2
View File
@@ -188,12 +188,18 @@ def compute_serve_profiles(system, model, serve_weights_gb=None, serve_quant=Non
# Shrink context if even the chosen KV won't fit alongside weights.
# Start from the smaller of the profile's target and the model's limit.
cur_ctx = min(ctx, model_ctx_max)
while cur_ctx >= 8192:
# Floor the context-shrink loop at 8192, but never above the model's own
# trained limit. A model with a sub-8192 context (e.g. a 2048-token
# SmolLM) starts below 8192, so a hard-coded 8192 guard skipped the loop
# entirely and produced NO profile — the serve UI then fell back to
# manual flags even though the model fits the GPU trivially.
ctx_floor = min(8192, model_ctx_max)
while cur_ctx >= ctx_floor:
kv = _kv_gb(model, cur_ctx, kv_type)
n_cpu_moe, fits = _cpu_moe_for_budget(model, quant, kv, budget, fixed_gb=serve_weights_gb)
est = _weights_gb(model, quant, serve_weights_gb) + kv + 0.6
# If a non-MoE model can't fit even fully offloaded, try less context.
if model.get("is_moe") or fits or cur_ctx <= 8192:
if model.get("is_moe") or fits or cur_ctx <= ctx_floor:
profiles.append({
"key": key,
"label": label,
+42 -26
View File
@@ -66,41 +66,57 @@ def _has_duplicate_title(skills, title: str) -> bool:
def _extract_json_object(text: str) -> Optional[dict]:
"""Best-effort extraction of a JSON object from an LLM response.
The response may be wrapped in code fences or surrounded by prose, and some
models emit a stray brace in the prose before the real object
(e.g. "uses {placeholder} then {...}"). Slicing first-'{' .. last-'}' then
grabs an unparseable span and the skill is silently lost. Try the whole
string first, then each '{' start position in turn, returning the first
candidate that parses to a JSON object (dict). Returns None if none do.
The response may be wrapped in code fences or surrounded by prose. Uses
json.JSONDecoder().raw_decode() to locate the boundaries of complete JSON
objects starting at each '{' position. Nested objects are filtered out to
keep only top-level candidates. If multiple non-overlapping valid JSON
objects are found, it is treated as ambiguous and returns None. Otherwise,
returns the single valid candidate dictionary.
"""
if not text:
return None
s = text.strip()
if s.startswith("```"):
s = s.split("\n", 1)[-1].rsplit("```", 1)[0].strip()
end = s.rfind("}")
if end == -1:
decoder = json.JSONDecoder()
candidates = []
start = s.find("{")
while start != -1:
try:
obj, idx = decoder.raw_decode(s[start:])
end_pos = start + idx
if isinstance(obj, dict):
candidates.append((start, end_pos, obj))
except (json.JSONDecodeError, ValueError):
pass
start = s.find("{", start + 1)
# Filter out nested candidates to identify top-level dictionaries
top_level = []
for c in candidates:
is_nested = False
for other in candidates:
if other == c:
continue
if other[0] <= c[0] and c[1] <= other[1]:
is_nested = True
break
if not is_nested:
top_level.append(c)
if not top_level:
return None
def _as_dict(candidate):
try:
obj = json.loads(candidate)
except (json.JSONDecodeError, ValueError):
return None
return obj if isinstance(obj, dict) else None
if len(top_level) > 1:
logger.debug(
"[skill-extract] Found multiple non-overlapping JSON objects: %s",
[item[2].get("title") for item in top_level]
)
return None
# The clean, common case: the whole (de-fenced) string is the object.
obj = _as_dict(s)
if obj is not None:
return obj
# Otherwise scan each '{' candidate up to the last '}'.
start = s.find("{")
while 0 <= start < end:
obj = _as_dict(s[start : end + 1])
if obj is not None:
return obj
start = s.find("{", start + 1)
return None
return top_level[0][2]
async def maybe_extract_skill(
+6 -4
View File
@@ -603,7 +603,6 @@ class SkillsManager:
escalation) those are work-in-progress and pollute the
prompt with half-finished procedures.
"""
active_toolsets = active_toolsets or []
out = []
for s in self.load(owner=owner):
status = s.get("status")
@@ -617,13 +616,16 @@ class SkillsManager:
# Platform gating
if platform and s.get("platforms") and platform not in s["platforms"]:
continue
# requires_toolsets: hide unless every required toolset is active
# requires_toolsets: hide unless every required toolset is active.
# active_toolsets=None means the caller doesn't know the active
# set (API listings, chat preface) — don't gate in that case;
# only an explicit list filters.
req = s.get("requires_toolsets") or []
if req and not all(t in active_toolsets for t in req):
if req and active_toolsets is not None and not all(t in active_toolsets for t in req):
continue
# fallback_for_toolsets: hide when any of those toolsets is active
fb = s.get("fallback_for_toolsets") or []
if fb and any(t in active_toolsets for t in fb):
if fb and active_toolsets and any(t in active_toolsets for t in fb):
continue
out.append({
"name": s["name"],
+21 -4
View File
@@ -285,6 +285,7 @@ class ResearchHandler:
query, report, stats, elapsed,
findings=researcher.findings,
evolving_report=researcher.evolving_report,
analyzed_urls=getattr(researcher, "analyzed_urls", None),
)
except Exception as e:
@@ -331,7 +332,8 @@ class ResearchHandler:
def _format_research_report(
self, query: str, full_report: str, stats: dict, elapsed: float,
findings: list = None, evolving_report: str = None,
findings: Optional[list] = None, evolving_report: Optional[str] = None,
analyzed_urls: Optional[list] = None,
) -> str:
"""Format research report with sources list and expandable raw findings."""
summary_lines = [
@@ -342,20 +344,34 @@ class ResearchHandler:
]
summary_text = " | ".join(summary_lines)
# Build sources list with clickable links
# Build sources list with clickable links. Keep the curated Sources
# section filtered for citation quality, but also list every unique URL
# the research run inspected so the "URLs Analyzed" count is auditable.
sources_section = ""
if findings:
analyzed_urls_section = ""
url_items = analyzed_urls if analyzed_urls is not None else findings
if findings or url_items:
seen_urls = set()
source_lines = []
for f in findings:
analyzed_seen = set()
analyzed_lines = []
for f in findings or []:
url = f.get("url", "")
title = f.get("title", "") or url
summary = f.get("summary", "") or f.get("evidence", "")
if url and url not in seen_urls and not is_low_quality(summary):
seen_urls.add(url)
source_lines.append(f"- [{title}]({url})")
for item in url_items or []:
url = item.get("url", "")
title = item.get("title", "") or url
if url and url not in analyzed_seen:
analyzed_seen.add(url)
analyzed_lines.append(f"{len(analyzed_lines) + 1}. [{title}]({url})")
if source_lines:
sources_section = "\n### Sources\n\n" + "\n".join(source_lines) + "\n"
if analyzed_lines:
analyzed_urls_section = "\n### Analyzed URLs\n\n" + "\n".join(analyzed_lines) + "\n"
# Build raw findings section (individual extractions per source)
raw_findings_section = ""
@@ -391,6 +407,7 @@ class ResearchHandler:
{full_report}
{sources_section}
{analyzed_urls_section}
{collected_section}
---
+34
View File
@@ -299,6 +299,40 @@ def fetch_webpage_content(url: str, timeout: int = 5, retry_attempt: int = 0) ->
_cache_result(cache_file, cache_key, result, url)
return result
# Plain-text / Markdown / JSON handling. Sources like
# raw.githubusercontent.com serve Markdown as `text/plain`, JSON APIs and
# raw config files serve `application/json`, and a lot of code and tool
# docs live in `.md` / `.txt`. These have no HTML structure, so the HTML
# branch below would extract nothing and report "no readable text content".
# Return the body verbatim instead. The `is_html` guard keeps real HTML
# (including `application/xhtml+xml`) on the parsing path; the `json` check
# covers `application/json` and `+json` suffixes; the URL-suffix fallback
# catches servers that mislabel text files as `application/octet-stream`.
is_html = "html" in content_type
is_json = "json" in content_type
url_path = url.lower().split("?", 1)[0].split("#", 1)[0]
looks_like_text_file = url_path.endswith(
(".md", ".markdown", ".txt", ".text", ".json", ".jsonl")
)
if not is_html and (content_type.startswith("text/") or is_json or looks_like_text_file):
text_body = (response.text or "").strip()
result = {
"url": url,
"title": os.path.basename(url_path) or url,
"content": text_body,
"lists": [],
"tables": [],
"code_blocks": [],
"meta_description": "",
"meta_keywords": "",
"js_rendered": False,
"js_message": "",
"success": bool(text_body),
"error": "" if text_body else "Empty response body",
}
_cache_result(cache_file, cache_key, result, url)
return result
# HTML handling
try:
soup = BeautifulSoup(response.text, "html.parser")
+13 -8
View File
@@ -134,9 +134,10 @@ _NEWS_HINTS = ("news", "nyheter", "headlines", "breaking", "latest", "today", "i
_GENERAL_ENGINES = os.environ.get("SEARXNG_GENERAL_ENGINES", "bing,mojeek,presearch")
def searxng_search_api(query: str, count: int = 10, categories: str = "general",
def searxng_search_api(query: str, count: Optional[int] = None, categories: str = "general",
time_filter: Optional[str] = None) -> List[dict]:
"""Search using SearXNG JSON API. Returns list of {title, url, snippet}."""
count = count if count is not None else _get_result_count()
instance = _get_search_instance()
api_key = ""
headers = {"User-Agent": "Mozilla/5.0"}
@@ -282,8 +283,9 @@ def searxng_search(query, max_results=10):
# ── Brave ──
def brave_search(query: str, count: int = 10, time_filter: Optional[str] = None) -> List[dict]:
def brave_search(query: str, count: Optional[int] = None, time_filter: Optional[str] = None) -> List[dict]:
"""Search using Brave API with key from admin settings or env var."""
count = count if count is not None else _get_result_count()
api_key = _get_provider_key("brave") or os.environ.get("DATA_BRAVE_API_KEY") or ""
return _brave_search_impl(query, count, time_filter, search_config={"brave_api_key": api_key})
@@ -381,9 +383,9 @@ def _resolve_ddg_redirect(raw: str) -> str:
return resolved
def duckduckgo_search(query: str, count: int = 10, time_filter: Optional[str] = None) -> List[dict]:
def duckduckgo_search(query: str, count: Optional[int] = None, time_filter: Optional[str] = None) -> List[dict]:
"""Search using DuckDuckGo via the duckduckgo-search library. No API key needed."""
count = count if count is not None else _get_result_count()
def _html_fallback() -> List[dict]:
try:
response = httpx.get(
@@ -415,7 +417,7 @@ def duckduckgo_search(query: str, count: int = 10, time_filter: Optional[str] =
return []
try:
from duckduckgo_search import DDGS
from ddgs import DDGS
except ImportError:
logger.warning("duckduckgo-search package not installed; using HTML fallback")
return _html_fallback()
@@ -452,7 +454,7 @@ def duckduckgo_search(query: str, count: int = 10, time_filter: Optional[str] =
# ── Google Programmable Search Engine ──
def google_pse_search(query: str, count: int = 10, time_filter: Optional[str] = None) -> List[dict]:
def google_pse_search(query: str, count: Optional[int] = None, time_filter: Optional[str] = None) -> List[dict]:
"""Search using Google PSE (Custom Search JSON API).
Requires two keys in settings:
@@ -460,6 +462,7 @@ def google_pse_search(query: str, count: int = 10, time_filter: Optional[str] =
- google_pse_cx: Programmable Search Engine ID (cx)
Or env vars GOOGLE_API_KEY and GOOGLE_PSE_CX.
"""
count = count if count is not None else _get_result_count()
settings = _get_search_settings()
api_key = _get_provider_key("google_pse") or os.environ.get("GOOGLE_API_KEY", "")
cx = (settings.get("google_pse_cx") or "").strip() or os.environ.get("GOOGLE_PSE_CX", "")
@@ -522,8 +525,9 @@ def google_pse_search(query: str, count: int = 10, time_filter: Optional[str] =
# ── Tavily ──
def tavily_search(query: str, count: int = 10, time_filter: Optional[str] = None) -> List[dict]:
def tavily_search(query: str, count: Optional[int] = None, time_filter: Optional[str] = None) -> List[dict]:
"""Search using Tavily API. Requires search_api_key or TAVILY_API_KEY env var."""
count = count if count is not None else _get_result_count()
api_key = _get_provider_key("tavily") or os.environ.get("TAVILY_API_KEY", "")
if not api_key:
logger.warning("Tavily: no API key configured")
@@ -580,8 +584,9 @@ def tavily_search(query: str, count: int = 10, time_filter: Optional[str] = None
# ── Serper.dev ──
def serper_search(query: str, count: int = 10, time_filter: Optional[str] = None) -> List[dict]:
def serper_search(query: str, count: Optional[int] = None, time_filter: Optional[str] = None) -> List[dict]:
"""Search using Serper.dev API. Requires search_api_key or SERPER_API_KEY env var."""
count = count if count is not None else _get_result_count()
api_key = _get_provider_key("serper") or os.environ.get("SERPER_API_KEY", "")
if not api_key:
logger.warning("Serper: no API key configured")
+46 -15
View File
@@ -64,20 +64,40 @@ def is_youtube_url(url: str) -> bool:
return "youtube.com" in url or "youtu.be" in url
# youtube.com-shaped hosts. music.youtube.com serves the same /watch and
# /shorts paths, so links shared from YouTube Music must resolve too.
_YT_HOSTS = ("www.youtube.com", "youtube.com", "m.youtube.com", "music.youtube.com")
# Path prefixes whose first following segment is the video id. Covers the
# /embed/ player, Shorts (/shorts/), live streams (/live/), and the legacy
# /v/ embed — all of which `is_youtube_url` already treats as YouTube, so
# they must be extractable or the link is silently dropped (neither web-fetched
# nor transcript-fetched) by the chat pipeline.
_YT_PATH_PREFIXES = ("/embed/", "/shorts/", "/live/", "/v/")
def extract_youtube_id(url: str) -> Optional[str]:
"""Extract YouTube video ID from various URL formats."""
"""Extract a YouTube video ID from the common URL shapes:
watch?v=, youtu.be/<id>, /embed/<id>, /shorts/<id>, /live/<id>, /v/<id>,
across youtube.com / m.youtube.com / music.youtube.com / youtu.be."""
if not isinstance(url, str):
return None
parsed = urllib.parse.urlparse(url)
if parsed.hostname in ("www.youtube.com", "youtube.com", "m.youtube.com"):
host = (parsed.hostname or "").lower()
if host in _YT_HOSTS:
if parsed.path == "/watch":
params = urllib.parse.parse_qs(parsed.query)
if "v" in params:
if params.get("v"):
return params["v"][0]
elif parsed.path.startswith("/embed/"):
return parsed.path.split("/")[-1]
elif parsed.hostname == "youtu.be":
return parsed.path[1:]
else:
for prefix in _YT_PATH_PREFIXES:
if parsed.path.startswith(prefix):
vid = parsed.path[len(prefix):].split("/")[0]
if vid:
return vid
elif host == "youtu.be":
vid = parsed.path.lstrip("/").split("/")[0]
if vid:
return vid
return None
@@ -170,6 +190,8 @@ def format_transcript_for_context(
if segments:
ctx += "Timestamped Transcript:\n"
for seg in segments:
if not isinstance(seg, dict):
continue
ctx += f"[{seg['timestamp']}] {seg['text']}\n"
# Check length — fall back to plain text if too long
if len(ctx) > 12000:
@@ -202,15 +224,24 @@ async def fetch_youtube_comments(
f"https://www.youtube.com/watch?v={video_id}",
]
proc = await asyncio.wait_for(
asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
),
timeout=timeout,
proc = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await proc.communicate()
# Bound the wait on the process actually finishing, not on spawning it.
# create_subprocess_exec returns as soon as the child starts, so wrapping
# it in wait_for never enforces the timeout — proc.communicate() is the
# blocking step. Kill and reap the child if it overruns so it does not
# linger after we return.
try:
stdout, stderr = await asyncio.wait_for(
proc.communicate(), timeout=timeout
)
except asyncio.TimeoutError:
proc.kill()
await proc.wait()
raise
if proc.returncode != 0:
return {"success": False, "error": f"yt-dlp failed: {stderr.decode()[:200]}", "comments": []}
+3
View File
@@ -91,6 +91,9 @@ _ROUTING_PATTERNS: tuple[tuple[str, str, Pattern[str]], ...] = tuple(
("ui", "tool or feature toggle request", r"\b(?:disable|enable|turn\s+(?:on|off))\s+(?:the\s+)?(?:shell|search|web|browser|documents?|memory|skills|images?|calendar|email|mail|research|incognito)\b"),
# Deep research jobs, not quick conceptual mentions of research.
("web", "explicit web search request", rf"{_PLEASE}(?:do|run|use|perform|make)\s+(?:a\s+)?(?:web\s+search|search\s+the\s+web)\b.+"),
("web", "web lookup imperative request", rf"{_PLEASE}(?:web\s+search|search\s+the\s+web|search\s+online|look\s+up|google)\b.+"),
("web", "assistant web lookup request", rf"{_ACTION_QUESTION}(?:web\s+search|search\s+the\s+web|search\s+online|look\s+up|google)\b.+"),
("research", "deep research imperative request", rf"{_PLEASE}(?:research|deep\s+dive|look\s+into|investigate)\s+.+"),
("research", "assistant deep research request", rf"{_ACTION_QUESTION}(?:research|do\s+research|deep\s+dive|look\s+into|investigate)\s+.+"),
+179 -53
View File
@@ -21,7 +21,7 @@ from src.settings import get_setting
from src.prompt_security import untrusted_context_message
from src.tool_security import blocked_tools_for_owner, plan_mode_disabled_tools
from src.tool_policy import GUIDE_ONLY_DIRECTIVE, ToolPolicy
from src.tool_utils import get_mcp_manager
from src.tool_utils import _truncate, get_mcp_manager
from src.agent_tools import (
parse_tool_blocks,
strip_tool_blocks,
@@ -262,6 +262,11 @@ _DOMAIN_RULES = {
- Use `manage_settings` for preferences and tool enable/disable.
- Use named tools over `app_api` when a named wrapper exists.
- `app_api` is only for safe UI/API actions without a named tool; do not use it for shell, package installs, engine rebuilds, or sensitive auth/admin paths.""",
"contacts": """\
## Contacts rules
- Use `resolve_contact` to look up a contact's email or phone number by name. Searches the CardDAV address book and sent email history.
- Use `manage_contact` to list, add, update, or delete contacts in the address book.
- Do NOT use `manage_memory` for contact lookups contact details live in the address book, not memory.""",
}
_DOMAIN_TOOL_MAP = {
@@ -272,8 +277,9 @@ _DOMAIN_TOOL_MAP = {
"notes_calendar_tasks": {"manage_notes", "manage_calendar", "manage_tasks"},
"ui": {"ui_control"},
"sessions": {"create_session", "list_sessions", "manage_session", "send_to_session", "search_chats"},
"files": {"bash", "python", "read_file", "write_file", "edit_file", "grep", "glob", "ls"},
"files": {"bash", "python", "read_file", "write_file", "edit_file", "grep", "glob", "ls", "get_workspace"},
"settings": {"manage_settings", "manage_endpoints", "manage_mcp", "manage_webhooks", "manage_tokens", "app_api"},
"contacts": {"resolve_contact", "manage_contact"},
}
def _domain_rules_for_tools(tool_names: set) -> list[str]:
@@ -309,6 +315,7 @@ NEVER pipe multi-line Python through `python -c "..."` — shell quoting eats re
<python code>
```
Execute Python code. Use for computation, data processing, scripting. NOT for writing code for the user (use create_document for that). Same sandbox limits as bash no TTY, no GUI, no `input()`; for anything the user should interact with, generate a single HTML file with inline JS instead.
Prefer a dedicated tool whenever one fits the job (reading, searching, or writing files); use python only for computation/processing no dedicated tool covers - not for reading or writing files.
Do NOT use Python/requests for web lookup/search/latest/current requests when `web_search` or `web_fetch` is available.""",
"web_search": """\
@@ -347,6 +354,11 @@ Write content to a file. First line is the path, rest is the content.""",
```
Edit an EXISTING file by exact string replacement. PREFER this over bash (sed/echo/redirects) for changing files it shows a before/after diff. `old_string` must match the file exactly and be unique unless `replace_all` is true. Use write_file to create a new file.""",
"get_workspace": """\
```get_workspace
```
Return the absolute path of the active workspace folder. File tools are CONFINED to it (paths can be RELATIVE to it); the shell starts there (cwd) but is NOT sandboxed. Call this first when the user says "the project"/"the code"/"this folder" without a path, instead of asking them. No arguments.""",
"create_document": """\
```create_document
<title>
@@ -598,7 +610,7 @@ _API_HOSTS = frozenset([
"api.deepseek.com", "deepseek.com",
"api.together.xyz", "api.fireworks.ai",
"api.perplexity.ai", "api.x.ai",
"ollama.com", "api.venice.ai",
"ollama.com", "api.venice.ai", "api.kimi.com",
"api.githubcopilot.com",
# Local OpenAI-compatible endpoints (llama.cpp, vLLM, LM Studio, etc.).
# Without these, `_is_api_model` falls back to keyword sniffing on the
@@ -785,6 +797,12 @@ def _classify_agent_request(messages: List[Dict], last_user: str) -> Dict[str, o
domains.add("documents")
if has(r"\b(search|web|google|look up|latest|news|current|weather|forecast|stock price|price of|website|url|https?://|www\.)\b"):
domains.add("web")
if has(
r"\b(wyszukaj|wyszukać|wyszukac)\b.*\b(internet|internecie|online|web)\b",
r"\b(sprawd[zź]|znajd[zź])\b.*\b(internet|internecie|online|web)\b",
r"\b(aktualn\w*|bieżąc\w*|biezac\w*|dzisiaj|teraz)\b.*\b(pogod\w*|temperatur\w*)\b",
):
domains.add("web")
if has(r"\b(research|deep dive|investigate|look into)\b"):
domains.add("web")
if has(r"\b(open|show|toggle|turn on|turn off|disable|enable|switch model|change model|settings|theme|panel)\b"):
@@ -795,6 +813,8 @@ def _classify_agent_request(messages: List[Dict], last_user: str) -> Dict[str, o
domains.add("files")
if has(r"\b(endpoint|api token|mcp|webhook|preference|configure|config|setting)\b"):
domains.add("settings")
if has(r"\b(contact|contacts|phone|phone number|address book|vcard)\b"):
domains.add("contacts")
low_signal = not continuation and not domains
return {
@@ -860,7 +880,7 @@ def _build_system_prompt(
_ov_sig = _hl.sha256(_json.dumps(get_builtin_overrides() or {}, sort_keys=True).encode()).hexdigest()
except Exception:
_ov_sig = ""
cache_key = (frozenset(disabled_tools or []), bool(mcp_mgr), needs_admin, _rt_key, compact, _ov_sig, suppress_local_context)
cache_key = (frozenset(disabled_tools or []), bool(mcp_mgr), needs_admin, _rt_key, compact, _ov_sig, owner, suppress_local_context)
if _cached_base_prompt and _cached_base_prompt_key == cache_key and not active_document:
agent_prompt = _cached_base_prompt
# Skill index is user-editable (name + description), so it must never
@@ -868,7 +888,7 @@ def _build_system_prompt(
# when the cache hits.
_, _skill_index_block = _build_base_prompt(
disabled_tools, mcp_mgr, needs_admin, relevant_tools,
mcp_disabled_map=mcp_disabled_map, compact=compact,
mcp_disabled_map=mcp_disabled_map, compact=compact, owner=owner,
suppress_local_context=suppress_local_context,
)
else:
@@ -879,6 +899,7 @@ def _build_system_prompt(
relevant_tools,
mcp_disabled_map=mcp_disabled_map,
compact=compact,
owner=owner,
suppress_local_context=suppress_local_context,
)
if not active_document:
@@ -894,9 +915,20 @@ def _build_system_prompt(
# Current date/time for every agent request. This is user-local when the
# browser provided timezone headers, with a server-local fallback.
#
# IMPORTANT: this is intentionally NOT prepended into agent_prompt (the
# system message) anymore. Its text changes every minute, and local
# OpenAI-compatible backends (llama.cpp / LM Studio) key their KV-cache
# prefix off the system message byte-for-byte — mixing ever-changing
# timestamp text into the (already large, tool-laden) agent system prompt
# would invalidate the cached prefix on every single request, forcing a
# full prompt re-evaluation each turn (issue #2927). It's built here as a
# standalone *user*-role message and inserted near the end of the array,
# right alongside _doc_message / _skills_message, below.
_datetime_message = None
try:
from src.user_time import current_datetime_prompt
agent_prompt = current_datetime_prompt() + agent_prompt
from src.user_time import current_datetime_context_message
_datetime_message = current_datetime_context_message()
except Exception:
pass
@@ -1296,6 +1328,9 @@ def _build_system_prompt(
last_user_idx += 1
if _skills_message:
merged.insert(last_user_idx, _skills_message)
last_user_idx += 1
if _datetime_message:
merged.insert(last_user_idx, _datetime_message)
return merged, mcp_schemas
@@ -1314,6 +1349,7 @@ def _build_base_prompt(
relevant_tools=None,
mcp_disabled_map=None,
compact: bool = False,
owner: Optional[str] = None,
suppress_local_context: bool = False,
):
"""Build the agent prompt with only relevant tools included.
@@ -1373,7 +1409,7 @@ def _build_base_prompt(
from src.constants import DATA_DIR
_sm = SkillsManager(DATA_DIR)
active_tools = list(set(TOOL_SECTIONS.keys()) - set(disabled or []))
skill_idx = _sm.index_for(owner=None, active_toolsets=active_tools)
skill_idx = _sm.index_for(owner=owner, active_toolsets=active_tools)
if skill_idx:
lines = ["## Available skills",
"Procedures the assistant should consult before doing domain work. "
@@ -1782,10 +1818,10 @@ async def stream_agent_loop(
owner: Optional[str] = None,
relevant_tools: Optional[Set[str]] = None,
fallbacks: Optional[List[tuple]] = None,
workspace: Optional[str] = None,
plan_mode: bool = False,
approved_plan: Optional[str] = None,
tool_policy: Optional[ToolPolicy] = None,
workspace: Optional[str] = None,
_is_teacher_run: bool = False,
) -> AsyncGenerator[str, None]:
"""Streaming agent loop generator.
@@ -1854,8 +1890,21 @@ async def stream_agent_loop(
logger.info(f"[tool-rag] Using caller-provided relevant_tools ({len(_relevant_tools)} tools)")
if not guide_only and not _relevant_tools and bool(_intent.get("low_signal")):
from src.tool_index import ALWAYS_AVAILABLE
_relevant_tools = set(ALWAYS_AVAILABLE)
logger.info("[tool-rag] Low-signal agent message; skipping retrieval and using always-available tools only")
if workspace:
# An active workspace IS the file-work signal: a vague "look at the
# project" means explore this folder. Surface only the READ-ONLY file
# tools (intersection with the plan-mode read-only allowlist) so the
# agent can investigate; write/shell tools stay out until the request
# actually calls for them (RAG retrieval adds those on a real ask).
_relevant_tools = set(ALWAYS_AVAILABLE)
from src.tool_security import PLAN_MODE_READONLY_TOOLS
_relevant_tools |= (_DOMAIN_TOOL_MAP["files"] & PLAN_MODE_READONLY_TOOLS)
logger.info("[tool-rag] Low-signal but workspace active; including read-only file tools")
else:
# Don't short-circuit: fall through to RAG retrieval below.
# Non-English queries are flagged low_signal by the English-only
# intent classifier, but fastembed retrieval works across languages.
logger.info("[tool-rag] Low-signal query; will run RAG retrieval")
if not guide_only and not _relevant_tools:
try:
from src.tool_index import get_tool_index, ALWAYS_AVAILABLE
@@ -1930,6 +1979,44 @@ async def stream_agent_loop(
if _relevant_tools is not None and active_document is not None:
_relevant_tools.update({"edit_document", "update_document", "suggest_document"})
# The skill index injected by _build_system_prompt tells the model to
# call `manage_skills action=view`, and Jaccard-matched skills are pasted
# into the prompt as procedures to follow — but neither path goes through
# tool selection, so the model can be handed a procedure naming tools
# (grep, read_file, ...) that aren't in its schema list. Keep the schemas
# in lockstep: manage_skills is callable whenever any skill is indexed,
# and a matched skill's declared requires_toolsets ride along with it.
if not guide_only and _relevant_tools is not None:
try:
from services.memory.skills import SkillsManager
from src.constants import DATA_DIR
_skills_on = True
try:
from routes.prefs_routes import _load_for_user as _load_prefs
_skills_on = (_load_prefs(owner) or {}).get("skills_enabled", True)
except Exception:
pass
_sm = SkillsManager(DATA_DIR)
_owner_skills = _sm.load(owner=owner) if _skills_on else []
if _owner_skills:
_relevant_tools.add("manage_skills")
if _retrieval_query:
# Validate against every known executable tool, not just
# TOOL_SECTIONS — code-nav tools (grep/glob/ls) ship as
# schemas without a prompt-prose section.
from src.tool_policy import known_tool_names
_known = known_tool_names()
for _sk in _sm.get_relevant_skills(
_retrieval_query, skills=_owner_skills,
threshold=0.25, max_items=3,
):
_relevant_tools.update(
t for t in (_sk.get("requires_toolsets") or [])
if t in _known
)
except Exception as _e:
logger.debug(f"[tool-rag] skill-aware tool include skipped: {_e}")
if _relevant_tools is not None:
logger.info("[agent-intent] selected_tools=%s", sorted(_relevant_tools)[:50])
@@ -1980,6 +2067,10 @@ async def stream_agent_loop(
# and can override this list for users who know their setup.
_model_no_tools = any(kw in _model_lc for kw in (
"deepseek-r1",
# Open-weight GPT-OSS models are commonly served through llama.cpp /
# llama-cpp-python. Their names contain "gpt-o", but they do not use
# OpenAI's native tool-call channel unless the endpoint opts in.
"gpt-oss",
))
# Native Ollama endpoints (/api/chat) handle tool schemas differently from
# the OpenAI-compat path. Models like gemma4, qwen3.5, ministral respond to
@@ -2011,27 +2102,6 @@ async def stream_agent_loop(
suppress_local_context=guide_only,
active_email=active_email,
)
if workspace and not guide_only:
# PREPEND (not append) so it dominates the large base prompt — appended
# at the end, small models ignored it and asked the user for code. The
# folder IS the project; the agent must explore it, not ask.
_ws_note = (
f"## ACTIVE WORKSPACE — READ FIRST\n"
f"The user is working in this folder: {workspace}\n"
f"It IS the project. bash/python run with cwd set here and "
f"read_file/write_file are confined to it (paths outside are rejected).\n"
f"When the user says \"the code\" / \"this project\" / \"the workspace\" "
f"or asks to review/find/edit something WITHOUT a path, they mean THIS "
f"folder. Do NOT ask the user for code or a path, and do NOT read a file "
f"literally named \"workspace\". ALWAYS start by exploring it yourself: "
f"run `bash` → `git ls-files` (or `ls -R`) to see the files, then "
f"read_file the relevant ones by path RELATIVE to the workspace."
)
if messages and messages[0].get("role") == "system":
messages[0]["content"] = _ws_note + "\n\n" + (messages[0].get("content") or "")
else:
messages.insert(0, {"role": "system", "content": _ws_note})
logger.info("[workspace] active for this turn: %s", workspace)
if plan_mode and not guide_only:
# Steer the model to investigate-then-propose. Hard tool gating handles
# every write path except shell; this directive is what keeps the
@@ -2063,30 +2133,34 @@ async def stream_agent_loop(
_t3 = time.time()
try:
from src.context_compactor import trim_for_context
from src.context_budget import compute_input_token_budget, DEFAULT_HARD_MAX
from src.settings import is_setting_overridden
from src.context_budget import compute_input_token_budget, DEFAULT_HARD_MAX, DEFAULT_BUDGET, budget_is_explicit as _budget_is_explicit
from src.model_context import budget_context_for_model
soft_budget = int(get_setting("agent_input_token_budget", 6000) or 0)
soft_budget = int(get_setting("agent_input_token_budget", DEFAULT_BUDGET) or 0)
if soft_budget > 0:
before_trim_tokens = estimate_tokens(messages)
reserve_tokens = min(max(max_tokens or 1024, 512), 2048)
# Honour the configurable ceiling for the auto-derived budget path.
# No-op when the user has an explicit `agent_input_token_budget`
# (that branch ignores hard_max). Falls back to DEFAULT_HARD_MAX
# on missing/malformed values so misconfig can't zero the budget.
# Ceiling for the auto-derived budget (no effect on an explicit budget;
# see #1230). Falls back to DEFAULT_HARD_MAX on missing/malformed values
# so misconfig can't zero the budget.
try:
hard_max = int(get_setting("agent_input_token_hard_max", DEFAULT_HARD_MAX) or DEFAULT_HARD_MAX)
except (TypeError, ValueError):
hard_max = DEFAULT_HARD_MAX
if hard_max <= 0:
hard_max = DEFAULT_HARD_MAX
# Scale the default budget to the model's context window so long-context
# models aren't silently capped at 6000; an explicit user setting is
# still honoured (clamped to the window). (#1170)
# Default value = auto sentinel (scale to the window); any other value =
# explicit cap. Value-based, not presence-based, because the save path
# materializes defaults so a persisted default must still read as auto (#4121).
budget_is_explicit = _budget_is_explicit(soft_budget)
# Scale only off a window we actually discovered, bound to the value it
# proves (else 0) — not the passed-in context_length, which can be stale
# or unset for some callers (#4122 review).
ctx_for_budget = budget_context_for_model(endpoint_url, model, fallback=context_length)
effective_budget = compute_input_token_budget(
soft_budget,
context_length,
is_setting_overridden("agent_input_token_budget"),
ctx_for_budget,
budget_is_explicit,
hard_max=hard_max,
)
trimmed_messages = trim_for_context(
@@ -2161,11 +2235,12 @@ async def stream_agent_loop(
# tool, so we don't nudge on harmless transitional text like "let me
# know what you think".
_INTENT_RE = re.compile(
r"(?:^|\n)\s*(?:let me|i'?ll|i will|going to|let's)\s+"
r"(?:^|\n)\s*(?:let me|i'?ll|i will|i need to|we need to|need to|"
r"i should|we should|i must|we must|going to|let's)\s+"
r"(?:tail|check|investigate|look at|see|tail|read|fetch|inspect|"
r"verify|diagnose|examine|debug|capture|grab|pull|view|run|call|"
r"trigger|launch|start|kick off|stop|kill|restart|adopt|serve|"
r"register|adopt|list|search|find|query|hit|ping|test)"
r"register|adopt|list|search|find|query|hit|ping|test|use|perform|do)"
r"\b[^.\n]{0,140}",
re.IGNORECASE,
)
@@ -2206,9 +2281,17 @@ async def stream_agent_loop(
elif _is_api_model:
# Filter schemas by RAG-selected tools (if available)
if _relevant_tools:
# _build_base_prompt unions _ADMIN_TOOLS into the prompt
# sections when admin intent fires — the schema list must
# offer the same names, or the model reads prose describing
# tools it cannot call and substitutes the nearest schema
# it does have (e.g. manage_memory for manage_skills).
_schema_names = set(_relevant_tools)
if _needs_admin:
_schema_names |= _ADMIN_TOOLS
base_schemas = [
s for s in FUNCTION_TOOL_SCHEMAS
if s.get("function", {}).get("name") in _relevant_tools
if s.get("function", {}).get("name") in _schema_names
]
_mcp_filtered = [
s for s in mcp_schemas
@@ -2254,6 +2337,7 @@ async def stream_agent_loop(
prompt_type=prompt_type if round_num == 1 else None,
tools=all_tool_schemas if all_tool_schemas else None,
timeout=agent_stream_timeout,
session_id=session_id,
):
if time.time() > _round_deadline:
logger.warning(f"[agent] round {round_num} stream exceeded wall-clock deadline; cutting off")
@@ -2743,6 +2827,46 @@ async def stream_agent_loop(
)
desc, result = await _tool_task
# A skill the model just loaded can prescribe tools that weren't
# RAG-selected this turn (declared via requires_toolsets in its
# frontmatter). Union them into the selection so the NEXT round's
# schema list includes them — otherwise the model reads "use
# grep" from the skill it fetched but has no grep schema to call.
if (
block.tool_type == "manage_skills"
and _relevant_tools is not None
and not result.get("error")
):
_ms_args = {}
_ms_raw = (block.content or "").strip()
if _ms_raw.startswith("{"):
try:
_ms_args = json.loads(_ms_raw)
except json.JSONDecodeError:
_ms_args = {}
_ms_name = str(_ms_args.get("name", "") or "").strip()
if _ms_name and _ms_args.get("action") in ("view", "view_ref"):
try:
from services.memory.skills import SkillsManager as _SkM
from src.constants import DATA_DIR as _DD
from src.tool_policy import known_tool_names as _ktn
_known = _ktn()
for _sk in _SkM(_DD).load(owner=owner):
if _sk.get("name") == _ms_name:
_new = {
t for t in (_sk.get("requires_toolsets") or [])
if t in _known and t not in _relevant_tools
}
if _new:
_relevant_tools.update(_new)
logger.info(
"[tool-rag] skill '%s' unlocked tools for next round: %s",
_ms_name, sorted(_new),
)
break
except Exception as _e:
logger.debug(f"skill requires_toolsets unlock skipped: {_e}")
# Extract structured web sources from web_search tool output.
# web_search returns {"output": ..., "exit_code": 0}; check "output"
# first so the <!-- SOURCES:…--> marker is found and stripped even
@@ -2833,18 +2957,20 @@ async def stream_agent_loop(
# On a bash/python timeout the result carries error + (often
# empty) stdout/stderr; fall back to the error so the "timed
# out" reason reaches the UI instead of a blank result.
output_text = (result["stdout"] or result["stderr"] or result.get("error", ""))[:2000]
raw = result["stdout"] or result["stderr"] or result.get("error", "")
output_text = _truncate(raw)
elif "output" in result:
# bash / python canonical result: {"output": ..., "exit_code": ...}
output_text = (result["output"] or "")[:2000]
raw = result["output"] or ""
output_text = _truncate(raw)
elif "response" in result:
# AI interaction tools (chat_with_model, send_to_session)
label = result.get("model", result.get("session_name", "AI"))
output_text = f"{label}: {result['response']}"[:4000]
output_text = _truncate(f"{label}: {result['response']}")
elif "content" in result:
output_text = result["content"][:2000]
output_text = _truncate(result["content"])
elif "results" in result:
output_text = result["results"][:4000]
output_text = _truncate(result["results"])
elif "session_id" in result and "name" in result:
output_text = f"Session created: {result['name']} (id: {result['session_id']})"
elif "success" in result:
@@ -2854,7 +2980,7 @@ async def stream_agent_loop(
else f"Error: {result.get('error', '')}"
)
elif "error" in result:
output_text = result["error"][:2000]
output_text = _truncate(result["error"])
# Emit tool_output (include ui_event data if present)
tool_output_data = {"type": "tool_output", "tool": block.tool_type, "command": cmd_display, "output": output_text, "exit_code": result.get("exit_code")}
@@ -18,6 +18,30 @@ from src.tool_utils import _truncate, get_mcp_manager, set_mcp_manager
logger = logging.getLogger(__name__)
from .subprocess_tools import BashTool, PythonTool
from .web_tools import WebSearchTool, WebFetchTool
from .filesystem_tools import ReadFileTool, WriteFileTool, EditFileTool, LsTool, GlobTool, GrepTool, GetWorkspaceTool
from .document_tools import CreateDocumentTool, UpdateDocumentTool, EditDocumentTool, SuggestDocumentTool, ManageDocumentTool
TOOL_HANDLERS = {
"bash": BashTool().execute,
"python": PythonTool().execute,
"web_search": WebSearchTool().execute,
"web_fetch": WebFetchTool().execute,
"read_file": ReadFileTool().execute,
"write_file": WriteFileTool().execute,
"edit_file": EditFileTool().execute,
"ls": LsTool().execute,
"glob": GlobTool().execute,
"grep": GrepTool().execute,
"create_document": CreateDocumentTool().execute,
"update_document": UpdateDocumentTool().execute,
"edit_document": EditDocumentTool().execute,
"suggest_document": SuggestDocumentTool().execute,
"manage_documents": ManageDocumentTool().execute,
"get_workspace": GetWorkspaceTool().execute,
}
# ---------------------------------------------------------------------------
# Constants (re-exported for backward compatibility — single source of truth
# is src.constants; always prefer importing from there for new code)
@@ -28,7 +52,7 @@ PYTHON_TIMEOUT = 30
# Tool types that trigger execution
TOOL_TAGS = {"bash", "python", "web_search", "web_fetch", "read_file", "write_file", "edit_file",
"grep", "glob", "ls",
"grep", "glob", "ls", "get_workspace",
"create_document", "update_document", "edit_document",
"search_chats",
"chat_with_model", "create_session", "list_sessions",
@@ -92,15 +116,14 @@ from src.tool_execution import ( # noqa: E402, F401
format_tool_result,
)
# Document functions
from .document_tools import (
set_active_document,
set_active_model
)
# Implementations
from src.tool_implementations import ( # noqa: E402, F401
set_active_document,
set_active_model,
get_active_document,
do_create_document,
do_update_document,
do_edit_document,
do_suggest_document,
do_search_chats,
do_manage_skills,
do_manage_tasks,
@@ -108,7 +131,6 @@ from src.tool_implementations import ( # noqa: E402, F401
do_manage_mcp,
do_manage_webhooks,
do_manage_tokens,
do_manage_documents,
do_manage_settings,
do_api_call,
)
+644
View File
@@ -0,0 +1,644 @@
from typing import Any, Dict, List, Optional
import logging
import re
import json
from src.constants import MAX_READ_CHARS
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Active document state
# ---------------------------------------------------------------------------
_active_document_id: Optional[str] = None
_active_model: Optional[str] = None
def set_active_document(doc_id: Optional[str]):
"""Set the active document ID for document tool execution."""
global _active_document_id
_active_document_id = doc_id
def set_active_model(model: Optional[str]):
"""Set the current model name for version summaries."""
global _active_model
_active_model = model
def get_active_document():
return _active_document_id
def clear_active_document(doc_id: Optional[str] = None) -> bool:
"""Clear the in-memory active-document pointer.
With ``doc_id`` given, only clears when it matches the current pointer, so a
different active document is left untouched. Returns True if it was cleared.
Called when a document is detached from its session or deleted (its tab is
closed): without this, the stale pointer makes the last-resort doc-injection
path re-surface a closed document in a later, unrelated chat even one whose
session no longer matches because an unlinked doc has session_id NULL (#1160).
"""
global _active_document_id
if doc_id is None or _active_document_id == doc_id:
_active_document_id = None
return True
return False
def _owned_document_query(query, Document, owner: Optional[str]):
if owner is None:
# A bare Python `False` is not a valid SQL expression — SQLAlchemy 1.4
# deprecates it and 2.0 raises ArgumentError. Use the SQL `false()`
# literal to return zero rows for an unscoped (owner-less) query.
from sqlalchemy import false
return query.filter(false())
return query.filter(Document.owner == owner)
def _get_owned_document(db, Document, doc_id: str, owner: Optional[str], active_only: bool = False):
q = db.query(Document).filter(Document.id == doc_id)
if active_only:
q = q.filter(Document.is_active == True)
q = _owned_document_query(q, Document, owner)
return q.first()
def _most_recent_owned_document(db, Document, owner: Optional[str], active_only: bool = False):
q = db.query(Document)
if active_only:
q = q.filter(Document.is_active == True)
q = _owned_document_query(q, Document, owner)
return q.order_by(Document.updated_at.desc()).first()
# ---------------------------------------------------------------------------
# Document tools — create/update/edit/suggest living documents
# ---------------------------------------------------------------------------
def _sniff_doc_language(text: str) -> str:
"""Best-effort detect a document's language from its content when the model
didn't specify one. Defaults to 'markdown' (prose). Recognizes the common
markup/code types the editor supports so e.g. an SVG isn't saved as markdown."""
import json as _json, re as _re2
s = (text or "").strip()
if not s:
return "markdown"
head = s[:600]
hl = head.lower()
if _looks_like_email_document(s):
return "email"
# Markup (unambiguous)
if "<svg" in hl:
return "svg"
if hl.startswith("<?xml"):
return "xml"
if (hl.startswith("<!doctype html") or hl.startswith("<html")
or _re2.search(r"<(div|body|head|p|span|table|button|h[1-6]|ul|ol|li|img)\b", hl)):
return "html"
# JSON
if s[0] in "{[":
try:
_json.loads(s)
return "json"
except Exception:
pass
# Shebang
first = s.split("\n", 1)[0].strip().lower()
if first.startswith("#!"):
return "python" if "python" in first else "bash"
# Code by strong leading signals (line-anchored so prose with stray words won't match)
if _re2.search(r"(?m)^\s*(def \w|class \w|import \w|from \w[\w.]* import )", s):
return "python"
if _re2.search(r"(?m)^\s*(function \w|const \w|let \w|export |import .* from )", s):
return "javascript"
if _re2.search(r"(?mi)^\s*(select .* from |create table |insert into |update \w)", s):
return "sql"
if _re2.search(r"(?m)^[.#]?[\w-]+\s*\{[^{}]*:[^{}]*;", s):
return "css"
return "markdown"
def _looks_like_email_document(text: str = "", title: str = "") -> bool:
import re as _re
title_l = (title or "").strip().lower()
if title_l in {"new email", "new mail", "new message"}:
return True
s = (text or "").lstrip()
if "\n---\n" in s and _re.search(r"(?im)^To:\s*", s) and _re.search(r"(?im)^Subject:\s*", s):
return True
return bool(_re.search(r"(?im)^To:\s*", s) and _re.search(r"(?im)^Subject:\s*", s))
def _coerce_email_document_content(existing: str, incoming: str) -> str:
"""Keep email docs in the To/Subject/---/body shape even if a model writes
only the body or dumps header labels without the separator."""
import re as _re
old = existing or ""
new = (incoming or "").strip()
if "\n---\n" in new:
return new
header = old.split("\n---\n", 1)[0] if "\n---\n" in old else "To: \nSubject: "
if _looks_like_email_document(new):
lines = new.splitlines()
last_header_idx = -1
header_re = _re.compile(r"^(To|Cc|Bcc|Subject|In-Reply-To|References|X-Source-UID|X-Source-Folder|X-Attachments):", _re.I)
for i, line in enumerate(lines):
if header_re.match(line.strip()):
last_header_idx = i
body_lines = lines[last_header_idx + 1:] if last_header_idx >= 0 else lines
while body_lines and not body_lines[0].strip():
body_lines.pop(0)
body = "\n".join(body_lines).strip()
else:
body = new
return header.rstrip() + "\n---\n" + body
def _parse_tool_args(content):
"""Parse a tool-call argument blob.
Accepts either a JSON string or an already-decoded dict. Unwraps the
common `{"body": {...}}` envelope that smaller models emit when they
read tool descriptions like "Body is JSON: {...}" literally they
pass `body` as a field name rather than treating it as a noun.
Returns a dict on success, raises ValueError on bad JSON.
"""
if isinstance(content, str):
try:
args = json.loads(content) if content.strip() else {}
except (json.JSONDecodeError, TypeError) as e:
raise ValueError(str(e))
elif isinstance(content, dict):
args = content
else:
args = {}
# Unwrap {"body": {...}} envelope — but only if `body` is the sole key
# and points at a dict. We don't want to clobber a legitimate `body`
# field on tools where it's a real arg (e.g. send_email body text).
if (
isinstance(args, dict)
and len(args) == 1
and "body" in args
and isinstance(args["body"], dict)
and "action" in args["body"] # extra safety: only unwrap if the inner dict looks like a tool call
):
args = args["body"]
return args
def parse_edit_blocks(content: str) -> list:
"""Parse <<<FIND>>>...<<<REPLACE>>>...<<<END>>> blocks."""
edits = []
pattern = r'<<<FIND>>>\n(.*?)\n<<<REPLACE>>>\n(.*?)\n<<<END>>>'
for m in re.finditer(pattern, content, re.DOTALL):
edits.append({"find": m.group(1), "replace": m.group(2)})
return edits
def parse_suggest_blocks(content: str) -> list:
"""Parse <<<FIND>>>...<<<SUGGEST>>>...<<<REASON>>>...<<<END>>> blocks."""
suggestions = []
_skip_phrases = ["no change", "clear", "fine as", "looks good", "no improvement", "keep as"]
pattern = r'<<<FIND>>>\n(.*?)\n<<<SUGGEST>>>\n(.*?)\n<<<REASON>>>\n(.*?)\n<<<END>>>'
for m in re.finditer(pattern, content, re.DOTALL):
find_text = m.group(1)
replace_text = m.group(2)
reason = m.group(3).strip()
# Skip no-op suggestions where find == replace or reason says no change
if find_text.strip() == replace_text.strip():
continue
if any(phrase in reason.lower() for phrase in _skip_phrases):
continue
suggestions.append({
"id": f"sugg-{len(suggestions)+1}",
"find": find_text,
"replace": replace_text,
"reason": reason,
})
return suggestions
class CreateDocumentTool:
async def execute(self, content: str, ctx: dict) -> dict:
"""Create a new document. Supports two formats:
1) Line-based: line 1 = title, line 2 (optional) = language, rest = content
2) XML-like tags: <title>...</title><language>...</language><content>...</content>
Some models mix them strip any XML-style tags and fall back to line parsing."""
import uuid, re as _re
from src.database import SessionLocal, Document, DocumentVersion, Session as DbSession
raw = content or ""
session_id = ctx.get("session_id")
owner = ctx.get("owner")
# Known languages the editor understands (match the <select> in HTML)
_KNOWN_LANGS = {
"python", "javascript", "typescript", "html", "css", "markdown", "json",
"yaml", "bash", "sql", "rust", "go", "java", "c", "cpp", "xml", "toml",
"ini", "ruby", "php", "csv", "email", "text", "plain", "svg",
}
# Try XML tag extraction first
title = None
language = None
content = None
mt = _re.search(r"<title>\s*(.*?)\s*</title>", raw, _re.DOTALL | _re.IGNORECASE)
ml = _re.search(r"<language>\s*(.*?)\s*</language>", raw, _re.DOTALL | _re.IGNORECASE)
mc = _re.search(r"<content>\s*(.*?)\s*</content>", raw, _re.DOTALL | _re.IGNORECASE)
if mt or mc:
title = mt.group(1).strip() if mt else None
language = ml.group(1).strip().lower() if ml else None
content = mc.group(1) if mc else None
# Fall back to line-based parsing. First strip any stray XML-ish tags.
if title is None or content is None:
cleaned = _re.sub(r"</?(?:title|language|content)>", "", raw)
lines = cleaned.strip().split("\n")
if title is None:
title = lines[0].strip() if lines else "Untitled"
lines = lines[1:]
# Only consume second line as language if it looks like a valid short lang token
if language is None and lines:
candidate = lines[0].strip().lower()
if candidate and len(candidate) < 20 and " " not in candidate and candidate in _KNOWN_LANGS:
language = candidate
lines = lines[1:]
if content is None:
content = "\n".join(lines)
# Validate language: must be in known set, else default based on content
if language and language not in _KNOWN_LANGS:
language = None
if not language:
# No explicit language — sniff it from the content so an SVG / HTML / JSON
# / code document isn't silently saved as markdown. Prose → markdown.
language = _sniff_doc_language(content)
if _looks_like_email_document(content, title):
language = "email"
if not title:
title = "Untitled"
if not session_id:
return {"error": "No session context for document creation"}
db = SessionLocal()
try:
doc_id = str(uuid.uuid4())
ver_id = str(uuid.uuid4())
# Inherit ownership from the chat session so the doc survives that
# session later being deleted (session_id → NULL).
_sess = db.query(DbSession).filter(DbSession.id == session_id).first()
if owner is not None and (not _sess or _sess.owner != owner):
return {"error": "Cannot create document in another user's session"}
_owner = _sess.owner if _sess else None
doc = Document(
id=doc_id,
session_id=session_id,
title=title,
language=language,
current_content=content,
version_count=1,
is_active=True,
owner=_owner,
)
ver = DocumentVersion(
id=ver_id,
document_id=doc_id,
version_number=1,
content=content,
summary=f"Created by {_active_model or 'AI'}",
source="ai",
)
db.add(doc)
db.add(ver)
db.commit()
set_active_document(doc_id)
try:
from src.event_bus import fire_event
fire_event("document_created", _owner)
except Exception:
logger.debug("document_created event dispatch failed", exc_info=True)
return {
"action": "create",
"doc_id": doc_id,
"title": title,
"language": language,
"content": content,
"version": 1,
}
except Exception as e:
db.rollback()
return {"error": f"Failed to create document: {e}"}
finally:
db.close()
class UpdateDocumentTool:
async def execute(self, content: str, ctx: dict) -> Dict:
"""Update an existing document. Content = full new document text."""
import uuid
from src.database import SessionLocal, Document, DocumentVersion
target_id = ctx.get("doc_id", None) or _active_document_id
owner = ctx.get("owner")
db = SessionLocal()
try:
doc = None
if target_id:
doc = _get_owned_document(db, Document, target_id, owner)
if not doc:
doc = _most_recent_owned_document(db, Document, owner)
if doc:
target_id = doc.id
set_active_document(target_id)
logger.info(f"update_document: fell back to most recent doc id={target_id}")
if not doc:
return {"error": "No documents exist to update"}
is_email_doc = doc.language == "email" or _looks_like_email_document(doc.current_content or "", doc.title or "")
new_content = _coerce_email_document_content(doc.current_content or "", content) if is_email_doc else content.strip()
if is_email_doc:
doc.language = "email"
new_ver = doc.version_count + 1
ver = DocumentVersion(
id=str(uuid.uuid4()),
document_id=target_id,
version_number=new_ver,
content=new_content,
summary=f"Updated by {_active_model or 'AI'}",
source="ai",
)
doc.current_content = new_content
doc.version_count = new_ver
db.add(ver)
db.commit()
return {
"action": "update",
"doc_id": target_id,
"title": doc.title,
"language": doc.language,
"content": new_content,
"version": new_ver,
}
except Exception as e:
db.rollback()
return {"error": f"Failed to update document: {e}"}
finally:
db.close()
class EditDocumentTool:
async def execute(self, content: str, ctx: dict) -> Dict:
"""Apply targeted FIND/REPLACE edits to an existing document."""
import uuid
from src.database import SessionLocal, Document, DocumentVersion
target_id = ctx.get("doc_id", None) or _active_document_id
owner = ctx.get("owner")
edits = parse_edit_blocks(content)
if not edits:
return {"error": "No valid <<<FIND>>>...<<<REPLACE>>>...<<<END>>> blocks found"}
db = SessionLocal()
try:
doc = None
if target_id:
doc = _get_owned_document(db, Document, target_id, owner)
if not doc:
# Fallback: most recently updated document. Avoids "no active doc" errors
# after server restart or when the agent loses track of which doc to edit.
doc = _most_recent_owned_document(db, Document, owner)
if doc:
target_id = doc.id
set_active_document(target_id)
logger.info(f"edit_document: fell back to most recent doc id={target_id} title={doc.title!r}")
if not doc:
return {"error": "No documents exist to edit"}
updated_content = doc.current_content
applied = 0
skipped = 0
for edit in edits:
_find = edit["find"]
if _find in updated_content:
updated_content = updated_content.replace(_find, edit["replace"], 1)
applied += 1
else:
# Defensive: the active-doc context shows a "N\t" line-number
# gutter for reference. Weaker models sometimes copy that prefix
# into FIND. If the exact match failed, retry with a leading
# "<digits><tab>" stripped from each FIND line — but only use it
# when that stripped form actually matches, so we never corrupt a
# legitimately tab-prefixed document.
_stripped = "\n".join(re.sub(r"^\d+\t", "", _l) for _l in _find.split("\n"))
if _stripped != _find and _stripped in updated_content:
updated_content = updated_content.replace(_stripped, edit["replace"], 1)
applied += 1
logger.info("edit_document: matched after stripping line-number gutter from FIND")
else:
logger.warning(f"edit_document: FIND text not found, skipping: {_find[:80]!r}")
skipped += 1
if applied == 0:
return {"error": f"No edits applied — none of the FIND blocks matched the document content (skipped {skipped})"}
new_ver = doc.version_count + 1
ver = DocumentVersion(
id=str(uuid.uuid4()),
document_id=target_id,
version_number=new_ver,
content=updated_content,
summary=f"Edited by {_active_model or 'AI'} ({applied} edit(s))",
source="ai",
)
doc.current_content = updated_content
doc.version_count = new_ver
db.add(ver)
db.commit()
return {
"action": "edit",
"doc_id": target_id,
"title": doc.title,
"language": doc.language,
"content": updated_content,
"version": new_ver,
"applied": applied,
"skipped": skipped,
}
except Exception as e:
db.rollback()
return {"error": f"Failed to edit document: {e}"}
finally:
db.close()
class SuggestDocumentTool:
async def execute(self, content: str, ctx: dict) -> Dict:
"""Create inline suggestions for the active document WITHOUT modifying it."""
from src.database import SessionLocal, Document
target_id = ctx.get("doc_id", None) or _active_document_id
owner = ctx.get("owner")
if not target_id:
return {"error": "No active document to suggest on"}
suggestions = parse_suggest_blocks(content)
if not suggestions:
return {"error": "No valid <<<FIND>>>...<<<SUGGEST>>>...<<<REASON>>>...<<<END>>> blocks found"}
db = SessionLocal()
try:
doc = _get_owned_document(db, Document, target_id, owner)
if not doc:
return {"error": f"Document {target_id} not found"}
# Validate that FIND text exists in document
valid = []
for s in suggestions:
if s["find"] in doc.current_content:
valid.append(s)
else:
logger.warning(f"suggest_document: FIND text not found, skipping: {s['find'][:80]!r}")
if not valid:
return {"error": "No suggestions matched the document content"}
return {
"action": "suggest",
"doc_id": target_id,
"suggestions": valid,
"count": len(valid),
}
finally:
db.close()
# ---------------------------------------------------------------------------
# Document management tool (delete, list, organize)
# ---------------------------------------------------------------------------
class ManageDocumentTool:
async def execute(self, content: str, ctx: dict) -> Dict:
"""Manage documents: list, read/view/open, delete, tidy.
Output format mirrors `manage_session`: list rows include a
clickable `[Title](#document-<id>)` anchor + relative timestamps
so the user can click straight from chat to open the editor.
"""
from core.database import SessionLocal, Document
from datetime import datetime, timezone
owner = ctx.get("owner")
try:
args = _parse_tool_args(content)
except ValueError:
return {"error": "Invalid JSON arguments", "exit_code": 1}
action = args.get("action", "list")
db = SessionLocal()
def _rel(ts):
if not ts:
return 'never'
try:
now = datetime.now(timezone.utc) if ts.tzinfo is not None else datetime.utcnow()
diff = (now - ts).total_seconds()
except Exception:
return 'unknown'
if diff < 60: return 'just now'
if diff < 3600: return f'{int(diff / 60)}m ago'
if diff < 86400: return f'{int(diff / 3600)}h ago'
if diff < 86400 * 7: return f'{int(diff / 86400)}d ago'
return ts.strftime('%Y-%m-%d')
try:
if action == "list":
q = db.query(Document).filter(Document.is_active == True)
q = _owned_document_query(q, Document, owner)
if args.get("search"):
q = q.filter(Document.title.ilike(f"%{args['search']}%"))
if args.get("language"):
q = q.filter(Document.language == args["language"])
docs = q.order_by(Document.updated_at.desc()).limit(args.get("limit", 50)).all()
if not docs:
msg = "No documents found" + (f" matching '{args['search']}'" if args.get("search") else "") + "."
return {"response": msg, "documents": [], "exit_code": 0}
lines = []
items = []
for i, d in enumerate(docs):
size = len(d.current_content or "")
lang = d.language or "text"
ts = getattr(d, 'updated_at', None) or getattr(d, 'created_at', None)
marker = " ← most recent" if i == 0 else ""
lines.append(
f"- [{d.title}](#document-{d.id}) — {lang}, {size} chars, updated {_rel(ts)}{marker}"
)
items.append({"id": d.id, "title": d.title, "language": lang, "size": size})
header = f"Found {len(docs)} document(s), sorted most-recent first. Click a title to open:"
return {
"response": header + "\n" + "\n".join(lines),
"documents": items,
"exit_code": 0,
}
elif action in ("read", "view", "open", "get"):
doc_id = args.get("document_id") or args.get("id") or args.get("uid")
if not doc_id:
return {"error": "Need document_id (use action=list to find one)", "exit_code": 1}
doc = _get_owned_document(db, Document, doc_id, owner, active_only=True)
if not doc:
return {"error": f"Document '{doc_id}' not found", "exit_code": 1}
body = doc.current_content or ""
preview_limit = int(args.get("limit", MAX_READ_CHARS))
truncated = len(body) > preview_limit
preview = body[:preview_limit] + (f"\n... (truncated, {len(body)} chars total)" if truncated else "")
anchor = f"[{doc.title}](#document-{doc.id})"
return {
"response": f"{anchor} — click to open in editor.\n\n```{doc.language or ''}\n{preview}\n```",
"document": {
"id": doc.id,
"title": doc.title,
"language": doc.language,
"size": len(body),
"content": preview,
"truncated": truncated,
},
"exit_code": 0,
}
elif action == "delete":
doc_id = args.get("document_id") or args.get("id") or args.get("uid") or _active_document_id
doc = None
if doc_id:
doc = _get_owned_document(db, Document, doc_id, owner)
if not doc:
# Fallback: most recently updated doc (likely what the user means)
doc = _most_recent_owned_document(db, Document, owner, active_only=True)
if not doc:
return {"error": "No document to delete", "exit_code": 1}
title = doc.title
doc.is_active = False
db.commit()
if _active_document_id == doc.id:
set_active_document(None)
return {"response": f"Deleted document '{title}'", "exit_code": 0}
elif action == "tidy":
from src.document_actions import run_document_tidy
result = await run_document_tidy(owner or "")
return {"response": result, "exit_code": 0}
else:
return {"error": f"Unknown action: {action}", "exit_code": 1}
except Exception as e:
logger.error(f"manage_documents error: {e}")
return {"error": str(e), "exit_code": 1}
finally:
db.close()
+398
View File
@@ -0,0 +1,398 @@
import asyncio
import json
import os
import difflib
import fnmatch
import shutil
from typing import Optional, Dict, Any, Tuple
from src.constants import MAX_READ_CHARS, MAX_DIFF_LINES, MAX_OUTPUT_CHARS
_CODENAV_SKIP_DIRS = frozenset({
".git", ".hg", ".svn", "node_modules", "venv", ".venv", "__pycache__",
".mypy_cache", ".pytest_cache", ".ruff_cache", "dist", "build",
".next", ".cache", "site-packages", ".idea", ".tox",
})
_CODENAV_MAX_HITS = 200
_CODENAV_MAX_LINE = 400
def _unified_diff(old: str, new: str, path: str) -> Optional[Dict[str, Any]]:
if old == new:
return None
old_lines = old.splitlines()
new_lines = new.splitlines()
label = path or "file"
diff_lines = list(difflib.unified_diff(
old_lines, new_lines,
fromfile=f"a/{label}", tofile=f"b/{label}",
lineterm="",
))
added = sum(1 for line in diff_lines if line.startswith("+") and not line.startswith("+++"))
removed = sum(1 for line in diff_lines if line.startswith("-") and not line.startswith("---"))
truncated = False
if len(diff_lines) > MAX_DIFF_LINES:
diff_lines = diff_lines[:MAX_DIFF_LINES]
truncated = True
text = "\n".join(diff_lines)
if truncated:
text += f"\n… diff truncated at {MAX_DIFF_LINES} lines"
return {
"text": text,
"added": added,
"removed": removed,
"new_file": old == "",
"file": os.path.basename(path) or (path or "file"),
}
class EditFileTool:
async def execute(self, content: str, ctx: dict) -> dict:
from src.tool_execution import _resolve_tool_path, _resolve_search_root, _truncate
try:
args = json.loads(content) if content.strip().startswith("{") else {}
except (json.JSONDecodeError, TypeError):
args = {}
raw_path = (args.get("path") or "").strip()
old = args.get("old_string", "")
new = args.get("new_string", "")
replace_all = bool(args.get("replace_all", False))
if not raw_path:
return {"error": "edit_file: path required", "exit_code": 1}
try:
path = _resolve_tool_path(raw_path)
except ValueError as e:
return {"error": f"edit_file: {e}", "exit_code": 1}
if old == "":
return {"error": "edit_file: old_string required (use write_file to create a file)", "exit_code": 1}
if old == new:
return {"error": "edit_file: old_string and new_string are identical", "exit_code": 1}
def _apply():
"""Helper function that performs the actual string replacement and file writing logic."""
with open(path, "r", encoding="utf-8") as f:
original = f.read()
count = original.count(old)
if count == 0:
return original, None, "not_found"
if count > 1 and not replace_all:
return original, None, f"not_unique:{count}"
updated = original.replace(old, new) if replace_all else original.replace(old, new, 1)
with open(path, "w", encoding="utf-8") as f:
f.write(updated)
return original, updated, "ok"
try:
original, updated, status = await asyncio.to_thread(_apply)
except FileNotFoundError:
return {"error": f"edit_file: {path}: not found (use write_file to create it)", "exit_code": 1}
except (IsADirectoryError, UnicodeDecodeError):
return {"error": f"edit_file: {path}: not an editable text file", "exit_code": 1}
except PermissionError:
return {"error": f"edit_file: {path}: permission denied", "exit_code": 1}
except OSError as e:
return {"error": f"edit_file: {path}: {e}", "exit_code": 1}
if status == "not_found":
return {"error": f"edit_file: old_string not found in {path}. Read the file and match it exactly.", "exit_code": 1}
if status.startswith("not_unique"):
n = status.split(":", 1)[1]
return {"error": f"edit_file: old_string is not unique in {path} ({n} matches). Add surrounding context or set replace_all=true.", "exit_code": 1}
n = original.count(old)
result = {"output": f"Edited {path} ({n} replacement{'s' if n != 1 else ''})", "exit_code": 0}
diff = _unified_diff(original, updated, path)
if diff:
result["diff"] = diff
return result
class ReadFileTool:
async def execute(self, content: str, ctx: dict) -> dict:
from src.tool_execution import _resolve_tool_path, _resolve_search_root, _truncate
raw_path, offset, limit = content.split("\n", 1)[0].strip(), 0, 0
_stripped = content.strip()
if _stripped.startswith("{"):
try:
_a = json.loads(_stripped)
raw_path = str(_a.get("path", "")).strip()
offset = int(_a.get("offset") or 0)
limit = int(_a.get("limit") or 0)
except (json.JSONDecodeError, TypeError, ValueError):
pass
try:
path = _resolve_tool_path(raw_path)
except ValueError as e:
return {"error": f"read_file: {e}", "exit_code": 1}
try:
def _read():
if offset > 0 or limit > 0:
start = max(offset, 1)
out, n, budget = [], 0, MAX_READ_CHARS
with open(path, "r", encoding="utf-8", errors="replace") as f:
for i, line in enumerate(f, 1):
if i < start:
continue
if limit > 0 and n >= limit:
break
out.append(line)
n += 1
budget -= len(line)
if budget <= 0:
out.append(f"\n... [truncated at {MAX_READ_CHARS} chars]")
break
return "".join(out)
with open(path, "r", encoding="utf-8", errors="replace") as f:
return f.read(MAX_READ_CHARS + 1)
data = await asyncio.to_thread(_read)
except FileNotFoundError:
return {"error": f"read_file: {path}: not found", "exit_code": 1}
except PermissionError:
return {"error": f"read_file: {path}: permission denied", "exit_code": 1}
except IsADirectoryError:
return {"error": f"read_file: {path}: is a directory (use ls)", "exit_code": 1}
except OSError as e:
return {"error": f"read_file: {path}: {e}", "exit_code": 1}
if not (offset > 0 or limit > 0) and len(data) > MAX_READ_CHARS:
data = data[:MAX_READ_CHARS] + f"\n... [truncated at {MAX_READ_CHARS} chars]"
return {"output": data, "exit_code": 0}
class WriteFileTool:
async def execute(self, content: str, ctx: dict) -> dict:
from src.tool_execution import _resolve_tool_path, _resolve_search_root, _truncate
lines = content.split("\n", 1)
raw_path = lines[0].strip()
body = lines[1] if len(lines) > 1 else ""
try:
path = _resolve_tool_path(raw_path)
except ValueError as e:
return {"error": f"write_file: {e}", "exit_code": 1}
try:
def _write():
old = ""
try:
with open(path, "r", encoding="utf-8") as f:
old = f.read()
except (FileNotFoundError, IsADirectoryError, UnicodeDecodeError, OSError):
old = ""
d = os.path.dirname(path)
if d:
os.makedirs(d, exist_ok=True)
with open(path, "w", encoding="utf-8") as f:
f.write(body)
return old, len(body)
old_content, size = await asyncio.to_thread(_write)
except PermissionError:
return {"error": f"write_file: {path}: permission denied", "exit_code": 1}
except OSError as e:
return {"error": f"write_file: {path}: {e}", "exit_code": 1}
diff = _unified_diff(old_content, body, path)
result = {"output": f"Wrote {size} bytes to {path}", "exit_code": 0}
if diff:
result["diff"] = diff
return result
class LsTool:
async def execute(self, content: str, ctx: dict) -> dict:
from src.tool_execution import _resolve_tool_path, _resolve_search_root, _truncate
raw_path = ""
_s = (content or "").strip()
if _s.startswith("{"):
try:
raw_path = str(json.loads(_s).get("path", "")).strip()
except json.JSONDecodeError:
raw_path = ""
else:
raw_path = _s.split("\n", 1)[0].strip()
try:
root = _resolve_search_root(raw_path)
except ValueError as e:
return {"error": f"ls: {e}", "exit_code": 1}
def _ls():
if not os.path.isdir(root):
return None, f"ls: {root}: not a directory"
rows = []
try:
with os.scandir(root) as it:
for entry in it:
if entry.name.startswith("."):
continue
try:
is_dir = entry.is_dir(follow_symlinks=False)
size = entry.stat(follow_symlinks=False).st_size if not is_dir else 0
except OSError:
continue
rows.append((is_dir, entry.name, size))
except (PermissionError, OSError) as _e:
return None, f"ls: {_e}"
rows.sort(key=lambda r: (not r[0], r[1].lower()))
lines = [f"{root}:"]
for is_dir, name, size in rows[:_CODENAV_MAX_HITS]:
lines.append(f" {name}/" if is_dir else f" {name} ({size} B)")
if len(rows) > _CODENAV_MAX_HITS:
lines.append(f" ... [{len(rows) - _CODENAV_MAX_HITS} more]")
if not rows:
lines.append(" (empty)")
return "\n".join(lines), None
out, err = await asyncio.to_thread(_ls)
if err:
return {"error": err, "exit_code": 1}
return {"output": _truncate(out), "exit_code": 0}
class GlobTool:
async def execute(self, content: str, ctx: dict) -> dict:
from src.tool_execution import _resolve_tool_path, _resolve_search_root, _truncate
args = {}
_s = (content or "").strip()
if _s.startswith("{"):
try:
args = json.loads(_s)
except json.JSONDecodeError:
args = {}
else:
args = {"pattern": _s}
pattern = str(args.get("pattern", "")).strip()
if not pattern:
return {"error": "glob: pattern is required", "exit_code": 1}
try:
root = _resolve_search_root(str(args.get("path", "")))
except ValueError as e:
return {"error": f"glob: {e}", "exit_code": 1}
def _glob():
from pathlib import Path
base = Path(root)
if not base.is_dir():
return None, f"glob: {root}: not a directory"
matched = []
try:
for p in base.rglob(pattern):
if set(p.relative_to(base).parts) & _CODENAV_SKIP_DIRS:
continue
try:
mtime = p.stat().st_mtime
except OSError:
mtime = 0
matched.append((mtime, str(p)))
if len(matched) > _CODENAV_MAX_HITS * 5:
break
except (OSError, ValueError) as _e:
return None, f"glob: {_e}"
matched.sort(key=lambda t: t[0], reverse=True)
return [pth for _, pth in matched[:_CODENAV_MAX_HITS]], None
paths, err = await asyncio.to_thread(_glob)
if err:
return {"error": err, "exit_code": 1}
if not paths:
return {"output": f"No files matching {pattern!r} under {root}", "exit_code": 0}
out = "\n".join(paths)
if len(paths) >= _CODENAV_MAX_HITS:
out += f"\n... [capped at {_CODENAV_MAX_HITS} files]"
return {"output": _truncate(out), "exit_code": 0}
class GrepTool:
async def execute(self, content: str, ctx: dict) -> dict:
from src.tool_execution import _resolve_tool_path, _resolve_search_root, _truncate
args: Dict[str, Any] = {}
_s = (content or "").strip()
if _s.startswith("{"):
try:
args = json.loads(_s)
except json.JSONDecodeError:
args = {}
else:
args = {"pattern": _s}
pattern = str(args.get("pattern", "")).strip()
if not pattern:
return {"error": "grep: pattern is required", "exit_code": 1}
ignore_case = bool(args.get("ignore_case"))
glob_pat = str(args.get("glob", "") or "").strip()
try:
max_hits = int(args.get("max_results") or _CODENAV_MAX_HITS)
except (TypeError, ValueError):
max_hits = _CODENAV_MAX_HITS
max_hits = max(1, min(max_hits, _CODENAV_MAX_HITS))
try:
root = _resolve_search_root(str(args.get("path", "")))
except ValueError as e:
return {"error": f"grep: {e}", "exit_code": 1}
def _grep():
import re as _re
import shutil
rg = shutil.which("rg")
if rg:
cmd = [rg, "--line-number", "--no-heading", "--color=never",
"--max-count", str(max_hits)]
if ignore_case:
cmd.append("--ignore-case")
if glob_pat:
cmd += ["--glob", glob_pat]
for _d in _CODENAV_SKIP_DIRS:
cmd += ["--glob", f"!**/{_d}/**"]
cmd += ["--regexp", pattern, root]
try:
import subprocess
p = subprocess.run(cmd, capture_output=True, text=True, timeout=20)
lines = [ln for ln in (p.stdout or "").splitlines() if ln][:max_hits]
return lines, None
except subprocess.TimeoutExpired:
return None, "grep: timed out"
except Exception as _e:
return None, f"grep: {_e}"
try:
rx = _re.compile(pattern, _re.IGNORECASE if ignore_case else 0)
except _re.error as _e:
return None, f"grep: bad pattern: {_e}"
hits = []
if os.path.isfile(root):
file_iter = [root]
else:
file_iter = []
for dp, dns, fns in os.walk(root):
dns[:] = [d for d in dns if d not in _CODENAV_SKIP_DIRS]
for fn in fns:
if glob_pat and not fnmatch.fnmatch(fn, glob_pat):
continue
file_iter.append(os.path.join(dp, fn))
for fp in file_iter:
if len(hits) >= max_hits:
break
try:
with open(fp, "r", encoding="utf-8", errors="strict") as f:
for i, line in enumerate(f, 1):
if rx.search(line):
hits.append(f"{fp}:{i}:{line.rstrip()[:_CODENAV_MAX_LINE]}")
if len(hits) >= max_hits:
break
except (UnicodeDecodeError, OSError):
continue
return hits, None
lines, err = await asyncio.to_thread(_grep)
if err:
return {"error": err, "exit_code": 1}
if not lines:
return {"output": f"No matches for {pattern!r} under {root}", "exit_code": 0}
out = "\n".join(ln[:_CODENAV_MAX_LINE] for ln in lines)
if len(lines) >= max_hits:
out += f"\n... [capped at {max_hits} matches]"
return {"output": _truncate(out), "exit_code": 0}
class GetWorkspaceTool:
"""Report the active workspace folder (no args). File tools are confined to
it; the shell starts there (cwd) but is NOT sandboxed."""
async def execute(self, content: str, ctx: dict) -> dict:
from src.tool_execution import get_active_workspace
ws = get_active_workspace()
if ws:
return {
"output": f"{ws}\n(File tools are confined to this folder; the shell starts "
f"here but is not sandboxed and can reach outside it.)",
"exit_code": 0,
}
return {
"output": "No workspace is set. File tools use the default allowed roots; "
"resolve paths from the user or use absolute paths.",
"exit_code": 0,
}
+153
View File
@@ -0,0 +1,153 @@
import asyncio
import sys
import time
import collections
from typing import Optional, Callable, Awaitable, Tuple, Dict
from src.constants import MAX_OUTPUT_CHARS
DEFAULT_BASH_TIMEOUT = 60 * 60 # 1 hour
DEFAULT_PYTHON_TIMEOUT = 60 * 60
PROGRESS_INTERVAL_S = 2.0
PROGRESS_TAIL_LINES = 12
async def _run_subprocess_streaming(
proc: asyncio.subprocess.Process,
*,
timeout: float,
progress_cb: Optional[Callable[[Dict], Awaitable[None]]] = None,
) -> Tuple[str, str, Optional[int], bool]:
started = time.time()
stdout_full: list[str] = []
stderr_full: list[str] = []
tail = collections.deque(maxlen=PROGRESS_TAIL_LINES)
async def _reader(stream, full_buf, label: str):
if stream is None:
return
while True:
line = await stream.readline()
if not line:
break
decoded = line.decode("utf-8", errors="replace").rstrip("\n")
full_buf.append(decoded)
if label == "err":
tail.append(f"! {decoded}")
else:
tail.append(decoded)
async def _progress_emitter():
await asyncio.sleep(PROGRESS_INTERVAL_S)
while True:
if progress_cb:
try:
await progress_cb({
"elapsed_s": round(time.time() - started, 1),
"tail": "\n".join(list(tail)),
})
except Exception:
pass
await asyncio.sleep(PROGRESS_INTERVAL_S)
rd_out = asyncio.create_task(_reader(proc.stdout, stdout_full, "out"))
rd_err = asyncio.create_task(_reader(proc.stderr, stderr_full, "err"))
prog_task = asyncio.create_task(_progress_emitter()) if progress_cb else None
timed_out = False
try:
await asyncio.wait_for(proc.wait(), timeout=timeout)
except asyncio.TimeoutError:
timed_out = True
try:
proc.kill()
except Exception:
pass
try:
await asyncio.wait_for(proc.wait(), timeout=2)
except Exception:
pass
except asyncio.CancelledError:
try:
proc.kill()
except Exception:
pass
try:
await asyncio.wait_for(proc.wait(), timeout=2)
except Exception:
pass
for t in (rd_out, rd_err):
t.cancel()
if prog_task is not None:
prog_task.cancel()
raise
finally:
if prog_task is not None and not prog_task.done():
prog_task.cancel()
try:
await prog_task
except (asyncio.CancelledError, Exception):
pass
for t in (rd_out, rd_err):
try:
await asyncio.wait_for(t, timeout=1)
except Exception:
pass
return (
"\n".join(stdout_full),
"\n".join(stderr_full),
proc.returncode,
timed_out,
)
class BashTool:
async def execute(self, content: str, ctx: dict) -> dict:
from src.tool_execution import agent_cwd, _truncate
progress_cb = ctx.get("progress_cb")
_subproc_env = ctx.get("subproc_env")
proc = await asyncio.create_subprocess_shell(
content,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
env=_subproc_env,
cwd=agent_cwd(),
)
stdout, stderr, rc, timed_out = await _run_subprocess_streaming(
proc,
timeout=DEFAULT_BASH_TIMEOUT,
progress_cb=progress_cb,
)
if timed_out:
return {"error": f"bash: timed out after {DEFAULT_BASH_TIMEOUT}s — process killed", "exit_code": 124, "stdout": _truncate(stdout, MAX_OUTPUT_CHARS), "stderr": _truncate(stderr, MAX_OUTPUT_CHARS)}
output = stdout.rstrip()
err = stderr.rstrip()
if err:
output = (output + "\nSTDERR: " + err).strip() if output else "STDERR: " + err
output = _truncate(output, MAX_OUTPUT_CHARS)
return {"output": output or "(no output)", "exit_code": rc or 0}
class PythonTool:
async def execute(self, content: str, ctx: dict) -> dict:
from src.tool_execution import agent_cwd, _truncate
progress_cb = ctx.get("progress_cb")
_subproc_env = ctx.get("subproc_env")
proc = await asyncio.create_subprocess_exec(
(sys.executable or "python"), "-I", "-c", content,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
env=_subproc_env,
cwd=agent_cwd(),
)
stdout, stderr, rc, timed_out = await _run_subprocess_streaming(
proc,
timeout=DEFAULT_PYTHON_TIMEOUT,
progress_cb=progress_cb,
)
if timed_out:
return {"error": f"python: timed out after {DEFAULT_PYTHON_TIMEOUT}s — process killed", "exit_code": 124, "stdout": _truncate(stdout, MAX_OUTPUT_CHARS), "stderr": _truncate(stderr, MAX_OUTPUT_CHARS)}
output = stdout.rstrip()
err = stderr.rstrip()
if err:
output = (output + "\nSTDERR: " + err).strip() if output else "STDERR: " + err
output = _truncate(output, MAX_OUTPUT_CHARS)
return {"output": output or "(no output)", "exit_code": rc or 0}
+101
View File
@@ -0,0 +1,101 @@
import asyncio
import json
from typing import Dict, Any
from src.constants import MAX_OUTPUT_CHARS
class WebSearchTool:
async def execute(self, content: str, ctx: dict) -> dict:
from src.search import comprehensive_web_search
raw = content.strip()
query = raw
time_filter = None
max_pages = 5
if raw.startswith("{"):
try:
parsed = json.loads(raw)
if isinstance(parsed, dict) and "query" in parsed:
query = str(parsed.get("query", "")).strip()
tf = parsed.get("time_filter") or parsed.get("freshness")
if isinstance(tf, str) and tf.lower() in ("day", "week", "month", "year"):
time_filter = tf.lower()
mp = parsed.get("max_pages")
if isinstance(mp, int) and 1 <= mp <= 10:
max_pages = mp
except json.JSONDecodeError:
pass
if not query:
query = raw.split("\n")[0].strip()
if time_filter is None:
q_lc = query.lower()
if any(kw in q_lc for kw in ("today", "latest", "breaking", "this morning", "right now", "currently")):
time_filter = "day"
elif any(kw in q_lc for kw in ("this week", "past week", "recent news", "last few days")):
time_filter = "week"
elif any(kw in q_lc for kw in ("this month", "past month")):
time_filter = "month"
elif " news" in q_lc or q_lc.startswith("news ") or q_lc.endswith(" news"):
time_filter = "week"
loop = asyncio.get_running_loop()
text, sources = await asyncio.wait_for(
loop.run_in_executor(
None,
lambda: comprehensive_web_search(
query,
max_pages=max_pages,
time_filter=time_filter,
return_sources=True,
),
),
timeout=30,
)
output = text[:MAX_OUTPUT_CHARS] if len(text) > MAX_OUTPUT_CHARS else text
if sources:
output += "\n\n<!-- SOURCES:" + json.dumps(sources) + " -->"
return {"output": output, "exit_code": 0}
class WebFetchTool:
async def execute(self, content: str, ctx: dict) -> dict:
from src.search.content import fetch_webpage_content
raw = content.strip()
url = ""
if raw.startswith("{"):
try:
parsed = json.loads(raw)
if isinstance(parsed, dict):
url = str(parsed.get("url") or "").strip()
except json.JSONDecodeError:
url = ""
if not url:
url = raw.split("\n")[0].strip()
if not url or url.startswith("{") or any(c in url for c in (" ", "\t", "\n")):
return {"error": "web_fetch: provide a single URL or domain, e.g. example.com", "exit_code": 1}
low = url.lower()
if "://" in low and not low.startswith(("http://", "https://")):
return {"error": f"web_fetch: unsupported URL scheme (only http/https): {url[:80]}", "exit_code": 1}
if not low.startswith(("http://", "https://")):
url = "https://" + url
loop = asyncio.get_running_loop()
try:
result = await asyncio.wait_for(
loop.run_in_executor(None, lambda: fetch_webpage_content(url, timeout=10)),
timeout=30,
)
except asyncio.TimeoutError:
return {"error": f"web_fetch: timed out fetching {url}", "exit_code": 1}
except Exception as e:
return {"error": f"web_fetch: {url}: {e}", "exit_code": 1}
err = result.get("error")
text = (result.get("content") or "").strip()
title = result.get("title") or ""
if not text:
if err:
return {"error": f"web_fetch: {url}: {err}", "exit_code": 1}
return {"error": f"web_fetch: {url}: no readable text content (not HTML, or the page needs JS/login)", "exit_code": 1}
header = (f"# {title}\n" if title else "") + f"Source: {url}\n\n"
output = header + text
if len(output) > MAX_OUTPUT_CHARS:
output = output[:MAX_OUTPUT_CHARS] + "\n\n[...truncated]"
return {"output": output, "exit_code": 0}
+9 -4
View File
@@ -24,7 +24,9 @@ MAX_PIPELINE_STEPS = 10
# ---------------------------------------------------------------------------
# Global managers (set from app.py, same pattern as _mcp_manager)
# ---------------------------------------------------------------------------
# _session_manager is kept as a local cache for performance (avoiding
# repeated get_session_manager_instance() calls). It's synced with
# the authoritative singleton in core.models.
_session_manager = None
_memory_manager = None
_memory_vector = None
@@ -33,11 +35,15 @@ _personal_docs_manager = None
def set_session_manager(mgr):
"""Set the global session manager. Syncs local cache + core singleton."""
global _session_manager
_session_manager = mgr
from core.models import set_session_manager_instance
set_session_manager_instance(mgr)
def get_session_manager():
"""Get the global session manager."""
return _session_manager
@@ -966,16 +972,15 @@ async def do_manage_memory(content: str, session_id: Optional[str] = None, owner
memories = [m for m in memories if m.get("category", "").lower() == category_filter]
if not memories:
return {"results": "No memories found" + (f" in category '{category_filter}'" if category_filter else "") + "."}
result_lines = [f"Found {len(memories)} memory entries:\n"]
for m in memories[:100]:
for m in memories:
cat = m.get("category", "fact")
mid = m.get("id", "?")[:8]
text = m.get("text", "")
if len(text) > 150:
text = text[:150] + "..."
result_lines.append(f"- [{cat}] `{mid}` — {text}")
if len(memories) > 100:
result_lines.append(f"... and {len(memories) - 100} more")
return {"results": "\n".join(result_lines)}
elif action == "add":
+16 -2
View File
@@ -4,6 +4,8 @@ import logging
from typing import Dict
from cryptography.fernet import Fernet, InvalidToken
from core.platform_compat import safe_chmod
logger = logging.getLogger(__name__)
class APIKeyManager:
@@ -15,12 +17,20 @@ class APIKeyManager:
def get_or_create_key(self) -> bytes:
"""Get or create encryption key for API keys"""
if os.path.exists(self.key_file):
# Older versions wrote .key with the process umask (often 0o644,
# i.e. group/world-readable). Re-restrict on read so existing
# installs heal without needing the key to be regenerated.
safe_chmod(self.key_file, 0o600)
with open(self.key_file, 'rb') as f:
return f.read()
else:
key = Fernet.generate_key()
with open(self.key_file, 'wb') as f:
f.write(key)
# This key decrypts every stored provider credential, so restrict it
# to the owner (0o600) — it must not be group/world-readable. No-op
# on Windows (files there are ACL-restricted to the user already).
safe_chmod(self.key_file, 0o600)
return key
def encrypt_api_key(self, api_key: str) -> str:
@@ -57,7 +67,12 @@ class APIKeyManager:
# Legacy/wrong shape (e.g. a list) — .items() would raise. Ignore it.
logger.warning("API keys file has unexpected shape (%s); ignoring", type(encrypted_keys).__name__)
return {}
return encrypted_keys
return {
str(provider): key
for provider, key in encrypted_keys.items()
if isinstance(key, str)
}
def save(self, provider: str, api_key: str):
"""Save encrypted API key to file.
@@ -82,4 +97,3 @@ class APIKeyManager:
except (InvalidToken, ValueError) as e:
logger.warning("Failed to decrypt API key for %s: %s", provider, e)
return decrypted
+2
View File
@@ -55,6 +55,8 @@ async def _drain_agent(sess, messages):
if "delta" in d:
delta = d.get("delta")
if isinstance(delta, str):
if d.get("thinking"):
continue
full += delta
elif d.get("type") == "agent_step":
round_num = d.get("round", round_num)
+32 -16
View File
@@ -579,6 +579,24 @@ def _classify_event_heuristic(summary: str) -> tuple:
return etype, None
def _memory_context_lines(mems, limit: int = 40) -> list:
"""Render Memory rows into short personal-context bullets for event classify.
Reads the Memory ORM `text` column. The previous inline code read a
non-existent `content` attribute, so it raised AttributeError on the first
row, the surrounding except swallowed it, and the classifier ran with no
personal context at all. getattr keeps it robust to future schema drift.
"""
lines: list = []
for m in mems:
c = (getattr(m, "text", "") or "").strip()
if c:
lines.append(f"- {c[:200]}")
if len(lines) >= limit:
break
return lines
async def action_classify_events(owner: str, **kwargs) -> Tuple[str, bool]:
"""Hybrid classification of upcoming calendar events: fast heuristic for
obvious cases, LLM fallback for ambiguous ones. Assigns event_type +
@@ -614,16 +632,11 @@ async def action_classify_events(owner: str, **kwargs) -> Tuple[str, bool]:
try:
from core.database import Memory as _Mem
_mems = db.query(_Mem).filter(_Mem.owner == owner).limit(60).all() if owner else []
if _mems:
_lines = []
for m in _mems:
c = (m.content or "").strip()
if c:
_lines.append(f"- {c[:200]}")
if _lines:
_memory_context = "USER CONTEXT (relationships, work, life):\n" + "\n".join(_lines[:40]) + "\n\n"
_lines = _memory_context_lines(_mems)
if _lines:
_memory_context = "USER CONTEXT (relationships, work, life):\n" + "\n".join(_lines) + "\n\n"
except Exception as _me:
logger.debug(f"Could not load memory for classify: {_me}")
logger.warning(f"Could not load memory for classify: {_me}")
classified_h = 0
classified_llm = 0
@@ -796,14 +809,14 @@ async def action_learn_sender_signatures(owner: str, **kwargs) -> Tuple[str, boo
import email as _email_mod
import asyncio as _aio
from datetime import datetime as _dt, timedelta as _td
from routes.email_helpers import _imap_connect, SCHEDULED_DB
from routes.email_helpers import _email_cache_owner_clause, _imap_connect, SCHEDULED_DB
from src.endpoint_resolver import resolve_endpoint
from src.llm_core import llm_call_async
# 1. Pull recent UIDs + From headers cheaply (header-only fetch).
def _pull_headers():
results = []
conn = _imap_connect(None)
conn = _imap_connect(None, owner=owner)
try:
conn.select("INBOX", readonly=True)
status, data = conn.search(None, "ALL")
@@ -855,9 +868,11 @@ async def action_learn_sender_signatures(owner: str, **kwargs) -> Tuple[str, boo
# 3. Eligibility: ≥3 emails AND (no cache OR cache > 30 days old).
try:
conn = _sql3.connect(SCHEDULED_DB)
owner_clause, owner_params = _email_cache_owner_clause(owner)
cached = {
r[0]: r[1] for r in conn.execute(
"SELECT from_address, last_built_at FROM sender_signatures"
f"SELECT from_address, last_built_at FROM sender_signatures WHERE {owner_clause}",
owner_params,
).fetchall()
}
conn.close()
@@ -888,7 +903,7 @@ async def action_learn_sender_signatures(owner: str, **kwargs) -> Tuple[str, boo
def _fetch_bodies(_msgs):
bodies = []
conn2 = _imap_connect(None)
conn2 = _imap_connect(None, owner=owner)
try:
conn2.select("INBOX", readonly=True)
for mm in _msgs:
@@ -965,11 +980,12 @@ async def action_learn_sender_signatures(owner: str, **kwargs) -> Tuple[str, boo
try:
conn = _sql3.connect(SCHEDULED_DB)
owner_value = (owner or "").strip()
conn.execute(
"INSERT OR REPLACE INTO sender_signatures "
"(from_address, signature_text, sample_count, last_built_at, model_used, source) "
"VALUES (?, ?, ?, ?, ?, ?)",
(addr, cached_sig, len(bodies), _dt.utcnow().isoformat(), model, "llm"),
"(from_address, owner, signature_text, sample_count, last_built_at, model_used, source) "
"VALUES (?, ?, ?, ?, ?, ?, ?)",
(addr, owner_value, cached_sig, len(bodies), _dt.utcnow().isoformat(), model, "llm"),
)
conn.commit()
conn.close()
+84 -6
View File
@@ -5,11 +5,13 @@ Auto-registration of built-in MCP servers on startup.
Each server runs as a stdio subprocess managed by McpManager.
"""
import asyncio
import json
import logging
import os
import shutil
import subprocess
import sys
import asyncio
from core.platform_compat import IS_WINDOWS, which_tool
@@ -196,18 +198,29 @@ def _npx_package_from_args(args):
async def _is_npx_package_cached(npx_path, package_spec, timeout_s=5):
"""Probe whether an npx package is already in the local cache.
Runs `npx --no-install <pkg> --version`. --no-install tells npx to
fail instead of downloading, so a cache miss returns fast. We treat
"exited 0 with non-empty stdout" as proof of a working cached copy.
Anything else (non-zero exit, empty stdout, timeout, missing npx,
network error) means we should skip the server.
First checks the local `_npx` cache for an installed package. If the
package is not found there, falls back to `npx --no-install <pkg>
--version` so older npm layouts still work without downloading.
"""
if _is_package_in_npx_cache(package_spec):
return True
try:
proc = await asyncio.create_subprocess_exec(
npx_path, "--no-install", package_spec, "--version",
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
except NotImplementedError:
try:
result = subprocess.run(
[npx_path, "--no-install", package_spec, "--version"],
capture_output=True,
timeout=timeout_s,
)
except (subprocess.TimeoutExpired, OSError, ValueError):
return False
return result.returncode == 0 and bool(result.stdout.strip())
except (OSError, ValueError):
return False
try:
@@ -220,3 +233,68 @@ async def _is_npx_package_cached(npx_path, package_spec, timeout_s=5):
pass
return False
return proc.returncode == 0 and bool(stdout.strip())
def _is_package_in_npx_cache(package_spec):
"""Return True when npm's `_npx` cache already contains package_spec."""
package_name = _npx_package_name(package_spec)
if not package_name:
return False
for cache_root in _npm_cache_roots():
npx_root = os.path.join(cache_root, "_npx")
if _npx_cache_contains_package(npx_root, package_name):
return True
return False
def _npx_package_name(package_spec):
"""Strip a version/range suffix from an npm package spec."""
if not package_spec:
return ""
if package_spec.startswith("@"):
parts = package_spec.split("@", 2)
if len(parts) >= 3:
return f"@{parts[1]}"
return package_spec
return package_spec.split("@", 1)[0]
def _npm_cache_roots():
roots = []
configured = os.environ.get("npm_config_cache")
if configured:
roots.append(os.path.expanduser(configured))
roots.append(os.path.join(os.path.expanduser("~"), ".npm"))
local_app_data = os.environ.get("LOCALAPPDATA")
if local_app_data:
roots.append(os.path.join(local_app_data, "npm-cache"))
return list(dict.fromkeys(roots))
def _npx_cache_contains_package(npx_root, package_name):
if not os.path.isdir(npx_root):
return False
package_path = os.path.join("node_modules", *package_name.split("/"), "package.json")
try:
entries = list(os.scandir(npx_root))
except OSError:
return False
for entry in entries:
try:
is_dir = entry.is_dir()
except OSError:
continue
cached_name = _cached_package_name(os.path.join(entry.path, package_path))
if is_dir and cached_name == package_name:
return True
return False
def _cached_package_name(package_json_path):
try:
with open(package_json_path, encoding="utf-8") as fh:
data = json.load(fh)
except (OSError, ValueError):
return ""
return str(data.get("name", "")).strip()
+178 -1
View File
@@ -128,6 +128,17 @@ def validate_caldav_url(raw_url: str) -> str:
return urlunparse(parsed._replace(fragment="")).rstrip("/")
def _event_etag(obj) -> str:
"""Best-effort ETag extraction from python-caldav resources."""
try:
etag = getattr(obj, "etag", None)
if callable(etag):
etag = etag()
return str(etag or "")
except Exception:
return ""
def _stable_cal_id(remote_url: str, owner: str = "", account_id: str = "") -> str:
"""Deterministic local id for a remote CalDAV calendar, scoped to owner
and account so two users or one user with two accounts pointing at
@@ -316,11 +327,12 @@ def _sync_blocking(owner: str, url: str, username: str, password: str, account_i
color="#5b8abf",
source="caldav",
account_id=account_id or None,
caldav_base_url=remote_url,
)
db.add(local_cal)
db.commit()
else:
# Refresh display name and stamp account_id if missing.
# Refresh display name and stamp CalDAV metadata if missing.
changed = False
if local_cal.name != display_name:
local_cal.name = display_name
@@ -328,6 +340,9 @@ def _sync_blocking(owner: str, url: str, username: str, password: str, account_i
if account_id and not local_cal.account_id:
local_cal.account_id = account_id
changed = True
if local_cal.caldav_base_url != remote_url:
local_cal.caldav_base_url = remote_url
changed = True
if changed:
db.commit()
result["calendars"] += 1
@@ -395,6 +410,9 @@ def _sync_blocking(owner: str, url: str, username: str, password: str, account_i
existing = _find_existing_event(db, pending, uid_val, local_cal.id)
if existing:
if existing.caldav_sync_pending in {"create", "update"}:
result["events"] += 1
continue
existing.calendar_id = local_cal.id
existing.summary = summary
existing.description = description
@@ -405,6 +423,9 @@ def _sync_blocking(owner: str, url: str, username: str, password: str, account_i
existing.is_utc = row_is_utc
existing.rrule = rrule
existing.origin = "caldav"
existing.remote_href = str(getattr(obj, "url", "") or "") or None
existing.remote_etag = _event_etag(obj) or None
existing.caldav_sync_pending = None
else:
new_ev = CalendarEvent(
uid=uid_val,
@@ -418,6 +439,8 @@ def _sync_blocking(owner: str, url: str, username: str, password: str, account_i
is_utc=row_is_utc,
rrule=rrule,
origin="caldav",
remote_href=str(getattr(obj, "url", "") or "") or None,
remote_etag=_event_etag(obj) or None,
)
db.add(new_ev)
pending[uid_val] = new_ev
@@ -442,6 +465,8 @@ def _sync_blocking(owner: str, url: str, username: str, password: str, account_i
CalendarEvent.origin == "caldav",
CalendarEvent.dtstart >= start,
CalendarEvent.dtstart <= end,
CalendarEvent.remote_href.isnot(None),
CalendarEvent.caldav_sync_pending.is_(None),
~CalendarEvent.uid.in_(seen_uids) if seen_uids else CalendarEvent.uid.isnot(None),
).all()
for ev in stale:
@@ -458,6 +483,92 @@ def _sync_blocking(owner: str, url: str, username: str, password: str, account_i
return result
def _event_payload(ev) -> dict:
return {
"uid": ev.uid,
"summary": ev.summary,
"description": ev.description,
"location": ev.location,
"dtstart": ev.dtstart,
"dtend": ev.dtend,
"all_day": ev.all_day,
"is_utc": ev.is_utc,
"rrule": ev.rrule or "",
}
def _load_event_for_writeback(owner: str, uid: str) -> tuple[str, str, dict] | None:
from core.database import CalendarCal, CalendarEvent, SessionLocal
db = SessionLocal()
try:
ev = (
db.query(CalendarEvent)
.join(CalendarCal)
.filter(CalendarEvent.uid == uid, CalendarCal.owner == owner)
.first()
)
if not ev or not ev.calendar or ev.calendar.source != "caldav":
return None
return ev.calendar.source, ev.calendar.id, _event_payload(ev)
finally:
db.close()
def _load_delete_for_writeback(owner: str, uid: str) -> tuple[str, str, dict] | None:
from core.database import CalendarCal, CalendarDeletedEvent, CalendarEvent, SessionLocal
db = SessionLocal()
try:
tombstone = db.query(CalendarDeletedEvent).filter(
CalendarDeletedEvent.uid == uid,
CalendarDeletedEvent.owner == owner,
).first()
if tombstone:
return "caldav", tombstone.calendar_id, {"uid": uid}
ev = (
db.query(CalendarEvent)
.join(CalendarCal)
.filter(CalendarEvent.uid == uid, CalendarCal.owner == owner)
.first()
)
if not ev or not ev.calendar or ev.calendar.source != "caldav":
return None
return ev.calendar.source, ev.calendar.id, {"uid": uid}
finally:
db.close()
def _pending_writeback_uids(owner: str) -> tuple[list[str], list[str]]:
from core.database import CalendarCal, CalendarDeletedEvent, CalendarEvent, SessionLocal
db = SessionLocal()
try:
rows = (
db.query(CalendarEvent.uid)
.join(CalendarCal)
.filter(
CalendarCal.owner == owner,
CalendarCal.source == "caldav",
CalendarEvent.status != "cancelled",
(
(CalendarEvent.caldav_sync_pending.isnot(None))
| (CalendarEvent.remote_href.is_(None))
),
)
.all()
)
delete_rows = (
db.query(CalendarDeletedEvent.uid)
.filter(CalendarDeletedEvent.owner == owner)
.all()
)
return [row[0] for row in rows], [row[0] for row in delete_rows]
finally:
db.close()
def _load_caldav_accounts(owner: str) -> list:
"""Return the list of CalDAV accounts for *owner*, auto-migrating the legacy
single-account ``caldav`` key to the new ``caldav_accounts`` list on first call.
@@ -533,3 +644,69 @@ async def sync_caldav(owner: str) -> dict:
for err in result.get("errors", []):
totals["errors"].append(f"{label}: {err}")
return totals
async def push_event_create(owner: str, uid: str) -> dict:
loaded = _load_event_for_writeback(owner, uid)
if not loaded:
return {"ok": True, "skipped": True}
source, calendar_id, payload = loaded
from src.caldav_writeback import writeback_event
return await writeback_event(owner, source, calendar_id, payload)
async def push_event_update(owner: str, uid: str) -> dict:
return await push_event_create(owner, uid)
async def push_event_delete(owner: str, uid: str) -> dict:
loaded = _load_delete_for_writeback(owner, uid)
if not loaded:
return {"ok": True, "skipped": True}
source, calendar_id, payload = loaded
from src.caldav_writeback import writeback_event
return await writeback_event(owner, source, calendar_id, payload, delete=True)
async def push_pending_events(owner: str) -> dict:
result = {"events": 0, "errors": []}
uids, delete_uids = _pending_writeback_uids(owner)
for event_uid in uids:
try:
out = await push_event_update(owner, event_uid)
if out.get("ok"):
result["events"] += 1
elif not out.get("skipped"):
result["errors"].append(f"{event_uid}: {str(out.get('error') or out)[:160]}")
except Exception as e:
logger.warning("CalDAV pending push failed for uid=%s: %s", event_uid, e)
result["errors"].append(f"{event_uid}: {str(e)[:160]}")
for event_uid in delete_uids:
try:
out = await push_event_delete(owner, event_uid)
if out.get("ok"):
result["events"] += 1
elif not out.get("skipped"):
result["errors"].append(f"{event_uid}: {str(out.get('error') or out)[:160]}")
except Exception as e:
logger.warning("CalDAV pending delete failed for uid=%s: %s", event_uid, e)
result["errors"].append(f"{event_uid}: {str(e)[:160]}")
return result
async def sync_caldav_direction(owner: str, direction: str = "pull") -> dict:
direction = (direction or "pull").strip().lower()
if direction == "pull":
return await sync_caldav(owner)
if direction == "push":
return await push_pending_events(owner)
if direction == "both":
pushed = await push_pending_events(owner)
pulled = await sync_caldav(owner)
return {"push": pushed, "pull": pulled}
return {
"calendars": 0,
"events": 0,
"deleted": 0,
"errors": [f"Unsupported CalDAV sync direction: {direction}"],
}
+92 -6
View File
@@ -89,6 +89,23 @@ def find_remote_calendar(calendars, local_cal_id: str, owner: str = "", account_
return None
def _resource_href(obj) -> str:
try:
return str(getattr(obj, "url", "") or "")
except Exception:
return ""
def _resource_etag(obj) -> str:
try:
etag = getattr(obj, "etag", None)
if callable(etag):
etag = etag()
return str(etag or "")
except Exception:
return ""
def push_event(calendars, local_cal_id: str, ev: dict, *, delete: bool = False,
owner: str = "", account_id: str = "") -> dict:
"""Create/update (or delete) ``ev`` on the matching remote calendar.
@@ -105,6 +122,7 @@ def push_event(calendars, local_cal_id: str, ev: dict, *, delete: bool = False,
remote = find_remote_calendar(calendars, local_cal_id, owner=owner, account_id=account_id)
if remote is None:
return {"ok": False, "error": "remote calendar not found"}
remote_url = str(getattr(remote, "url", "") or "")
try:
existing = remote.event_by_uid(uid)
@@ -113,17 +131,34 @@ def push_event(calendars, local_cal_id: str, ev: dict, *, delete: bool = False,
if delete:
if existing is None:
return {"ok": True, "note": "already absent on remote"}
return {"ok": True, "note": "already absent on remote", "calendar_url": remote_url}
existing.delete()
return {"ok": True}
return {
"ok": True,
"calendar_url": remote_url,
"remote_href": _resource_href(existing),
"remote_etag": _resource_etag(existing),
}
ical = build_event_ical(ev)
if existing is not None:
existing.data = ical
existing.save()
return {"ok": True, "updated": True}
remote.save_event(ical)
return {"ok": True, "created": True}
return {
"ok": True,
"updated": True,
"calendar_url": remote_url,
"remote_href": _resource_href(existing),
"remote_etag": _resource_etag(existing),
}
created = remote.save_event(ical)
return {
"ok": True,
"created": True,
"calendar_url": remote_url,
"remote_href": _resource_href(created),
"remote_etag": _resource_etag(created),
}
def _discover_calendars(client):
@@ -154,6 +189,54 @@ def _writeback_blocking(local_cal_id, ev, delete, url, username, password,
owner=owner, account_id=account_id)
def _persist_writeback_result(owner: str, calendar_id: str, uid: str, result: dict, *, delete: bool) -> None:
from core.database import CalendarCal, CalendarDeletedEvent, CalendarEvent, SessionLocal
if not uid or not isinstance(result, dict):
return
db = SessionLocal()
try:
calendar = db.query(CalendarCal).filter(
CalendarCal.id == calendar_id,
CalendarCal.owner == owner,
).first()
if calendar and result.get("calendar_url"):
calendar.caldav_base_url = result.get("calendar_url")
if delete:
tombstone = db.query(CalendarDeletedEvent).filter(
CalendarDeletedEvent.uid == uid,
CalendarDeletedEvent.owner == owner,
).first()
if result.get("ok"):
if tombstone:
db.delete(tombstone)
elif tombstone:
tombstone.last_error = str(result.get("error") or result)[:500]
db.commit()
return
event = (
db.query(CalendarEvent)
.join(CalendarCal)
.filter(CalendarEvent.uid == uid, CalendarCal.owner == owner)
.first()
)
if event and result.get("ok"):
if result.get("remote_href"):
event.remote_href = result.get("remote_href")
if result.get("remote_etag"):
event.remote_etag = result.get("remote_etag")
event.caldav_sync_pending = None
db.commit()
except Exception:
db.rollback()
logger.exception("CalDAV write-back metadata persistence failed")
finally:
db.close()
async def writeback_event(owner: str, calendar_source: str, calendar_id: str,
ev: dict, *, delete: bool = False) -> dict:
"""Best-effort push of a local change to the remote CalDAV server.
@@ -204,9 +287,12 @@ async def writeback_event(owner: str, calendar_source: str, calendar_id: str,
result = await asyncio.to_thread(
_writeback_blocking, calendar_id, ev, delete, url, user, pw, owner, acc_id
)
_persist_writeback_result(owner, calendar_id, (ev or {}).get("uid", ""), result, delete=delete)
if not result.get("ok"):
logger.warning("CalDAV write-back did not apply: %s", result.get("error") or result)
return result
except Exception as e:
logger.exception("CalDAV write-back raised")
return {"ok": False, "error": str(e)[:200]}
result = {"ok": False, "error": str(e)[:200]}
_persist_writeback_result(owner, calendar_id, (ev or {}).get("uid", ""), result, delete=delete)
return result
+13 -9
View File
@@ -175,6 +175,19 @@ class ChatProcessor:
Returns:
Tuple of (preface messages, rag_sources list)
Note on KV-cache friendliness: the ``system``-role messages assembled
here are later concatenated into a single system message and sent as
the very first thing in the payload (see ``llm_core``'s "consolidate
system messages" step). Local OpenAI-compatible backends (llama.cpp /
LM Studio) key their KV cache off the byte-identical token prefix, so
*anything* that changes turn-to-turn timestamps, retrieved snippets,
per-turn counts must NOT be folded into a system message here. Such
content belongs in a separate ``user``/context message appended near
the end of the array (see ``current_datetime_context_message`` and
``untrusted_context_message`` callers in ``build_chat_context``),
which keeps the static system prefix byte-identical across turns of
the same session and lets the backend reuse its cached prefix.
"""
preface = []
rag_sources = []
@@ -185,15 +198,6 @@ class ChatProcessor:
"role": "system",
"content": preset_system_prompt
})
if not agent_mode:
try:
from src.user_time import current_datetime_prompt
preface.append({
"role": "system",
"content": current_datetime_prompt(),
})
except Exception:
logger.debug("Failed to add current date/time context", exc_info=True)
preface.append({
"role": "system",
"content": UNTRUSTED_CONTEXT_POLICY,
+27 -7
View File
@@ -31,16 +31,22 @@ def compute_input_token_budget(
Args:
configured: the value read from settings (may be the default).
context_length: the model's discovered context window (0/unknown if none).
explicit: True if the user explicitly set ``agent_input_token_budget``.
context_length: the model's discovered context window. Pass 0 when the
window is unknown / only a bare fallback auto-scaling then stays
conservative instead of trusting an unproven window (review on #4122).
explicit: True if the user set a NON-default budget. The default value is
the "auto" sentinel (scale to the window); any other value is an
explicit cap. (A deliberately-chosen default can't be distinguished
from a materialized default by value, so the default reads as auto.)
Rules:
- Explicit user budget is honoured exactly, only clamped to the model's
window when that window is known (never send more than the model holds).
- Otherwise (default), scale to ``headroom`` of the context window, capped
at ``hard_max`` so long-context models use their capacity.
- When the window is unknown, fall back to the configured/default value
(preserving the previous behaviour).
window when that window is known (the user's deliberate choice wins;
``hard_max`` is an auto-budget ceiling only see #1230).
- Otherwise (auto), scale to ``headroom`` of the context window, capped at
``hard_max`` so long-context models use their capacity.
- When the window is unknown (context_length <= 0), use the conservative
``default`` budget and do NOT scale off the fallback.
"""
configured = int(configured or 0)
context_length = int(context_length or 0)
@@ -53,3 +59,17 @@ def compute_input_token_budget(
return max(1, min(scaled, hard_max))
return configured if configured > 0 else default
def budget_is_explicit(configured: int, *, default: int = DEFAULT_BUDGET) -> bool:
"""Whether a configured agent_input_token_budget is a deliberate explicit cap.
The default value is the "auto" sentinel (scale to the model's window), so only
a NON-default positive value counts as explicit. This keys off the VALUE, not
settings *presence* the settings-save path materializes every default into
settings.json, so a persisted default must still read as auto (the regression
#4121 / #1230 are about). Centralised here so the materialized-default contract
is unit-testable and can't silently regress to a presence check.
"""
configured = int(configured or 0)
return configured > 0 and configured != default
+13 -5
View File
@@ -244,9 +244,17 @@ def trim_for_context(messages: List[Dict], context_length: int, reserve_tokens:
protected_tokens = estimate_tokens(protected_msgs)
budget -= protected_tokens
# Priority: keep first system msg (preset prompt), drop others (memory, RAG, memo)
essential_system = system_msgs[:1] if system_msgs else []
extra_system = system_msgs[1:]
# Priority: keep first system msg (preset prompt), drop others (memory, RAG, memo).
# Exception: a research-spinoff primer (the seeded report that grounds a
# "Discuss" chat) must never be dropped — it is the conversation's whole
# knowledge base. Treat any system message carrying research_spinoff_from
# metadata as essential alongside the leading system prompt.
def _is_research_primer(m):
return bool((m.get("metadata") or {}).get("research_spinoff_from"))
_primers = [m for m in system_msgs if _is_research_primer(m)]
_non_primer = [m for m in system_msgs if not _is_research_primer(m)]
essential_system = (_non_primer[:1] if _non_primer else []) + _primers
extra_system = _non_primer[1:]
# Try dropping extra system messages one by one (from the end)
trimmed = essential_system + convo_msgs
@@ -438,8 +446,8 @@ def _update_session_history(session, split_point: int, summary: str,
)
new_history = system_prefix + [summary_msg] + recent_history
try:
from core import models as _core_models
manager = getattr(_core_models, "_session_manager", None)
from core.models import get_session_manager_instance
manager = get_session_manager_instance()
except Exception:
manager = None
if manager and getattr(session, "id", None):
+22 -3
View File
@@ -136,7 +136,8 @@ async def _tick() -> None:
return
try:
state = json.loads(state_path.read_text(encoding="utf-8"))
except Exception:
except Exception as e:
logger.warning("cookbook_serve_lifecycle: state file unreadable (%s), skipping tick", e)
return
tasks = state.get("tasks") or []
now_ms = int(time.time() * 1000)
@@ -178,8 +179,26 @@ async def _tick() -> None:
if stopped_any:
try:
from core.atomic_io import atomic_write_json
state["tasks"] = tasks
atomic_write_json(state_path, state)
# Re-read the state file so concurrent UI writes (task adds,
# status flips, config edits) are not silently overwritten.
# Apply only our stop mutations to the fresh snapshot.
try:
fresh = json.loads(state_path.read_text(encoding="utf-8"))
fresh_tasks = fresh.get("tasks") or []
except Exception:
fresh = state
fresh_tasks = tasks
stopped_sids = {sid for sid, _, _ in to_stop}
for ft in fresh_tasks:
if not isinstance(ft, dict):
continue
ft_sid = ft.get("sessionId") or ft.get("id")
if ft_sid in stopped_sids:
ft["status"] = "stopped"
ft["_scheduledStopAtMs"] = None
ft["_lastStatusFlipAt"] = now_ms
fresh["tasks"] = fresh_tasks
atomic_write_json(state_path, fresh)
except Exception as e:
logger.warning(f"cookbook_serve_lifecycle: state write failed: {e}")
+5
View File
@@ -232,6 +232,7 @@ class DeepResearcher:
self._start_time: float = 0
self.queries_used: Set[str] = set()
self.urls_fetched: Set[str] = set()
self.analyzed_urls: List[Dict[str, str]] = []
self.round_count: int = 0
# Track which search providers actually returned results during the
# run, in arrival order — surfaced in the visual report so users can
@@ -525,6 +526,10 @@ class DeepResearcher:
if url and url not in self.urls_fetched:
urls_to_fetch.append(r)
self.urls_fetched.add(url)
self.analyzed_urls.append({
"url": url,
"title": r.get("title", "") or url,
})
if len(urls_to_fetch) >= self.max_urls_per_round * len(queries):
break
+11 -2
View File
@@ -196,13 +196,22 @@ def _get_or_reset_collection(chroma_client, name: str, metadata: Dict[str, Any],
try:
chroma_client.delete_collection(name)
restored = chroma_client.get_or_create_collection(name=name, metadata=current)
old_embeddings = preserved.get("embeddings") or []
if ids and docs and old_embeddings:
# chromadb returns embeddings as a numpy ndarray, whose truth value
# is ambiguous — `preserved.get("embeddings") or []` and a bare
# `if ... and old_embeddings:` both raise ValueError, which aborts
# the restore and loses the rows the reset was supposed to keep.
# Use explicit None/len checks instead.
old_embeddings = preserved.get("embeddings")
if old_embeddings is None:
old_embeddings = []
if ids and docs and len(old_embeddings):
for start in range(0, len(ids), 100):
batch_ids = ids[start:start + 100]
batch_docs = docs[start:start + 100]
batch_metas = metas[start:start + 100]
batch_embeddings = old_embeddings[start:start + 100]
if hasattr(batch_embeddings, "tolist"):
batch_embeddings = batch_embeddings.tolist()
if len(batch_metas) < len(batch_ids):
batch_metas += [{}] * (len(batch_ids) - len(batch_metas))
restored.add(
+27 -14
View File
@@ -12,7 +12,7 @@ from typing import Optional, Tuple, Dict
from urllib.parse import urlparse, urlunparse
from core.database import SessionLocal, ModelEndpoint
from src.llm_core import _detect_provider, _host_match, _ollama_api_root
from src.llm_core import _detect_provider, _host_match, _is_kimi_code_url, KIMI_CODE_USER_AGENT, _ollama_api_root
logger = logging.getLogger(__name__)
@@ -183,7 +183,16 @@ def build_chat_url(base: str) -> str:
def build_models_url(base: str) -> Optional[str]:
"""Return the provider-specific model-list endpoint URL for a base."""
"""Return the provider-specific model-list endpoint URL for a base.
For OpenAI-compatible servers (LM Studio, llama.cpp, vLLM,
text-generation-webui, etc.) the model list is exposed at ``/v1/models``.
When the user-supplied base has no path e.g. ``http://localhost:1234``
we still need to land on ``/v1/models`` (issue #25); insert the ``/v1``
segment only when the path is empty, leaving any explicit non-empty path
untouched (so custom prefixes like ``/openai`` or ``/api/openai/v1`` keep
their semantics).
"""
base = normalize_base(resolve_url(base))
provider = _detect_provider(base)
if provider == "anthropic":
@@ -192,6 +201,12 @@ def build_models_url(base: str) -> Optional[str]:
return _ollama_api_root(base) + "/tags"
if provider == "chatgpt-subscription":
return None
# Generic OpenAI-compatible fallback: ensure the path lands on /v1/models
# when the user omitted a path entirely. If a non-empty path is already
# present (e.g. /openai, /api/openai/v1, /v1), trust the caller — the
# /models suffix is appended as-is and the caller's prefix is preserved.
if not urlparse(base).path:
base = base + "/v1"
return base + "/models"
@@ -215,6 +230,8 @@ def build_headers(api_key: Optional[str], base: str) -> Dict[str, str]:
if provider == "openrouter":
headers.setdefault("HTTP-Referer", "https://github.com/pewdiepie-archdaemon/odysseus")
headers.setdefault("X-OpenRouter-Title", "Odysseus")
if _is_kimi_code_url(base):
headers.setdefault("User-Agent", KIMI_CODE_USER_AGENT)
return headers
@@ -250,27 +267,23 @@ def resolve_endpoint(
ep_id = _stg(f"{setting_prefix}_endpoint_id")
model = _stg(f"{setting_prefix}_model")
# If the specific endpoint is not configured, but the caller provided a
# Fall back to utility model for task/research/auto-naming if not specifically configured.
if not ep_id and setting_prefix not in ("utility", "default"):
ep_id = _stg("utility_endpoint_id")
model = _stg("utility_model")
# If the endpoint is STILL not configured, but the caller provided a
# valid fallback (e.g. the active session model), use that immediately.
# This prevents background tasks from jumping to the global default_model
# when the user is mid-conversation with a different model.
if not ep_id and fallback_url and fallback_model:
return fallback_url, fallback_model, fallback_headers
# Unset Utility means "same as Default Chat Model".
if setting_prefix == "utility" and not ep_id:
# Unset Utility (or anything else that didn't have a fallback) means "same as Default Chat Model".
if not ep_id:
ep_id = _stg("default_endpoint_id")
model = _stg("default_model")
# Fall back to utility model for task/research/auto-naming if not specifically configured.
# If Utility itself is unset, the block above makes that resolve to Default Chat.
if not ep_id and setting_prefix != "utility":
ep_id = _stg("utility_endpoint_id")
model = _stg("utility_model")
if not ep_id:
ep_id = _stg("default_endpoint_id")
model = _stg("default_model")
if not ep_id:
return fallback_url, fallback_model, fallback_headers
+79 -5
View File
@@ -6,6 +6,7 @@ import re
from typing import Dict, List, Optional, Any
import httpx
from fastapi import HTTPException
from core.atomic_io import atomic_write_json
from core.platform_compat import safe_chmod
@@ -258,6 +259,11 @@ def add_integration(data: Dict[str, Any]) -> Dict[str, Any]:
integration.setdefault("name", "")
integration.setdefault("base_url", "")
if not isinstance(integration.get("name"), str) or not integration["name"].strip():
raise HTTPException(400, "Integration name is required")
if not isinstance(integration.get("base_url"), str) or not integration["base_url"].strip():
raise HTTPException(400, "Integration base URL is required")
integrations = load_integrations()
integrations.append(integration)
save_integrations(integrations)
@@ -266,6 +272,11 @@ def add_integration(data: Dict[str, Any]) -> Dict[str, Any]:
def update_integration(integration_id: str, data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
"""Update fields on an existing integration. Returns updated integration or None."""
if "name" in data and (not isinstance(data["name"], str) or not data["name"].strip()):
raise HTTPException(400, "Integration name is required")
if "base_url" in data and (not isinstance(data["base_url"], str) or not data["base_url"].strip()):
raise HTTPException(400, "Integration base URL is required")
integrations = load_integrations()
for item in integrations:
if item.get("id") == integration_id:
@@ -411,17 +422,80 @@ async def execute_api_call(
if "application/json" in content_type:
try:
data = response.json()
formatted = json.dumps(data, indent=2, ensure_ascii=False)
full = json.dumps(data, indent=2, ensure_ascii=False)
if len(full) > 12000:
if isinstance(data, list):
# Binary-search for the largest prefix such that the
# final array (prefix + sentinel) fits within the limit.
# Pre-compute the sentinel so we know its serialized size.
sentinel_placeholder = {
"_truncated": True,
"total_items": len(data),
"shown_items": 0,
}
# Overhead: the sentinel appears as an extra array element.
# Add a conservative padding for the separating comma,
# newline, and indentation characters (~6 chars).
sentinel_overhead = len(
json.dumps(sentinel_placeholder, indent=2, ensure_ascii=False)
) + 6
budget = 12000 - sentinel_overhead
lo, hi = 0, len(data)
while lo < hi:
mid = (lo + hi + 1) // 2
candidate = json.dumps(
data[:mid], indent=2, ensure_ascii=False
)
if len(candidate) < budget:
lo = mid
else:
hi = mid - 1
sentinel = {
"_truncated": True,
"total_items": len(data),
"shown_items": lo,
}
formatted = json.dumps(
data[:lo] + [sentinel], indent=2, ensure_ascii=False
)
elif isinstance(data, dict):
# Truncate dict entries until the result fits, then add
# the _truncated marker. Walk keys in insertion order.
DICT_LIMIT = 12000
kept: dict = {}
for k, v in data.items():
candidate = json.dumps(
{**kept, k: v, "_truncated": True},
indent=2,
ensure_ascii=False,
)
if len(candidate) <= DICT_LIMIT:
kept[k] = v
else:
break
formatted = json.dumps(
{**kept, "_truncated": True}, indent=2, ensure_ascii=False
)
else:
total = len(full)
formatted = full[:12000] + f"\n... (truncated, {total} chars total)"
else:
formatted = full
except (json.JSONDecodeError, ValueError):
formatted = response.text
if len(formatted) > 12000:
total = len(formatted)
formatted = formatted[:12000] + f"\n... (truncated, {total} chars total)"
elif "text/html" in content_type:
formatted = _strip_html_tags(response.text)
if len(formatted) > 12000:
total = len(formatted)
formatted = formatted[:12000] + f"\n... (truncated, {total} chars total)"
else:
formatted = response.text
# Truncate
if len(formatted) > 12000:
formatted = formatted[:12000] + "\n... (truncated)"
if len(formatted) > 12000:
total = len(formatted)
formatted = formatted[:12000] + f"\n... (truncated, {total} chars total)"
output = f"HTTP {status}\n{formatted}"
+314 -17
View File
@@ -7,6 +7,7 @@ import logging
import hashlib
import threading
import re
import os
from fastapi import HTTPException
from typing import Optional, Dict, List, Tuple
from src.model_context import get_context_length, DEFAULT_CONTEXT
@@ -22,6 +23,24 @@ class LLMConfig:
MAX_RETRIES = 3
RETRY_DELAY = 0.5
STREAM_TIMEOUT = 300
# TCP+TLS connect budget for a SINGLE attempt. The old hard-coded 3.0s
# assumed LAN/Tailscale peers ('SYN in <100ms'); it is too tight for public
# cloud endpoints (offshore APIs take ~0.5-1.5s cold, with jitter), so a
# brief blip on the first connect of an idle chat surfaced as a 503 on the
# streaming path (which, unlike llm_call, does not retry the connect). A
# genuinely dead upstream stays bounded by the dead-host cooldown. Override
# with env LLM_CONNECT_TIMEOUT (seconds).
CONNECT_TIMEOUT = float(os.getenv('LLM_CONNECT_TIMEOUT', '10') or '10')
def _call_timeout(read_timeout) -> httpx.Timeout:
"""Per-request timeout for non-streaming LLM calls (connect from config)."""
return httpx.Timeout(connect=LLMConfig.CONNECT_TIMEOUT, read=float(read_timeout), write=10.0, pool=5.0)
def _stream_timeout(read_timeout) -> httpx.Timeout:
"""Per-request timeout for streaming LLM calls (connect from config)."""
return httpx.Timeout(connect=LLMConfig.CONNECT_TIMEOUT, read=float(read_timeout), write=30.0, pool=5.0)
# Cache for LLM responses
@@ -276,6 +295,24 @@ def _is_ollama_native_url(url: str) -> bool:
return local_ollama_host and (path == "" or path == "/api" or path.startswith("/api/"))
def _is_ollama_openai_compat_url(url: str) -> bool:
"""Return True for local Ollama's OpenAI-compatible /v1 surface.
Mirrors the host detection used by ``_is_ollama_native_url`` so that the
two helpers stay in lockstep: a localhost Ollama on a non-default port
(custom ``OLLAMA_HOST``, reverse proxy, container port remap) is treated
the same way here as it is on the native ``/api`` path.
"""
try:
parsed = urlparse(url or "")
except Exception:
return False
host = parsed.hostname or ""
path = (parsed.path or "").rstrip("/")
local_ollama_host = host in {"localhost", "127.0.0.1", "0.0.0.0", "::1"} or parsed.port == 11434
return local_ollama_host and (path == "/v1" or path.startswith("/v1/"))
def _ollama_api_root(url: str) -> str:
"""Return a native Ollama API root such as https://ollama.com/api."""
url = (url or "").strip().rstrip("/")
@@ -405,6 +442,146 @@ def _host_match(url: str, *domains: str) -> bool:
return any(host == d or host.endswith("." + d) for d in domains)
# Kimi Code subscription keys (api.kimi.com/coding/v1) require a whitelisted
# coding-agent User-Agent; otherwise the API returns 403 access_terminated_error.
# Tried in order; first success is cached per base URL for later requests.
KIMI_CODE_USER_AGENTS: tuple[str, ...] = (
"claude-code/0.1.0",
"claude-code/1.0.0",
"KimiCLI/1.0",
"Kilo-Code/1.0",
"Roo-Code/1.0",
"Cursor/1.0",
)
KIMI_CODE_USER_AGENT = KIMI_CODE_USER_AGENTS[0]
_kimi_code_ua_cache: dict[str, str] = {}
def _is_kimi_code_url(url: str) -> bool:
if not url or not _host_match(url, "kimi.com"):
return False
try:
return "/coding" in (urlparse(url).path or "")
except Exception:
return False
def _kimi_code_base_key(url: str) -> str:
"""Normalize a Kimi Code chat/models URL to its OpenAI base (.../coding/v1)."""
parsed = urlparse(url)
path = (parsed.path or "").rstrip("/")
for suffix in ("/chat/completions", "/models", "/completions"):
if path.endswith(suffix):
path = path[: -len(suffix)]
path = path.rstrip("/") or "/coding/v1"
return f"{parsed.scheme}://{parsed.netloc}{path}"
def _is_kimi_code_access_denied(status: int, body: bytes | str) -> bool:
if status != 403:
return False
text = body.decode("utf-8", errors="replace") if isinstance(body, bytes) else (body or "")
lower = text.lower()
return (
"access_terminated_error" in lower
or "coding agents" in lower
or "only available for coding" in lower
)
def _kimi_code_ua_candidates(url: str) -> list[str]:
if not _is_kimi_code_url(url):
return []
base_key = _kimi_code_base_key(url)
cached = _kimi_code_ua_cache.get(base_key)
if cached:
return [cached] + [ua for ua in KIMI_CODE_USER_AGENTS if ua != cached]
return list(KIMI_CODE_USER_AGENTS)
def _remember_kimi_code_user_agent(url: str, user_agent: str) -> None:
_kimi_code_ua_cache[_kimi_code_base_key(url)] = user_agent
def apply_kimi_code_headers(headers: Optional[Dict], url: str) -> Dict[str, str]:
"""Pick a Kimi Code User-Agent (cached probe when possible)."""
h = dict(headers or {})
if not _is_kimi_code_url(url):
return h
base_key = _kimi_code_base_key(url)
cached = _kimi_code_ua_cache.get(base_key)
if cached:
h["User-Agent"] = cached
return h
models_url = base_key.rstrip("/") + "/models"
from src.tls_overrides import llm_verify
for ua in KIMI_CODE_USER_AGENTS:
trial = dict(h)
trial["User-Agent"] = ua
try:
r = httpx.get(models_url, headers=trial, timeout=8, verify=llm_verify())
except Exception:
continue
if _is_kimi_code_access_denied(r.status_code, r.content):
logger.debug("Kimi Code rejected User-Agent %s (403), trying next", ua)
continue
if r.status_code < 400:
_remember_kimi_code_user_agent(url, ua)
h["User-Agent"] = ua
return h
break
h.setdefault("User-Agent", KIMI_CODE_USER_AGENT)
return h
def httpx_get_kimi_aware(url: str, headers: Optional[Dict], **kwargs):
h = apply_kimi_code_headers(headers, url)
if not _is_kimi_code_url(url):
return httpx.get(url, headers=h, **kwargs)
last = None
for ua in _kimi_code_ua_candidates(url):
trial = dict(h)
trial["User-Agent"] = ua
last = httpx.get(url, headers=trial, **kwargs)
if not _is_kimi_code_access_denied(last.status_code, last.content):
if last.status_code < 400:
_remember_kimi_code_user_agent(url, ua)
return last
return last
def httpx_post_kimi_aware(url: str, headers: Optional[Dict], **kwargs):
h = apply_kimi_code_headers(headers, url)
if not _is_kimi_code_url(url):
return httpx.post(url, headers=h, **kwargs)
last = None
for ua in _kimi_code_ua_candidates(url):
trial = dict(h)
trial["User-Agent"] = ua
last = httpx.post(url, headers=trial, **kwargs)
if not _is_kimi_code_access_denied(last.status_code, last.content):
if last.status_code < 400:
_remember_kimi_code_user_agent(url, ua)
return last
return last
async def httpx_post_kimi_aware_async(client, url: str, headers: Optional[Dict], **kwargs):
h = apply_kimi_code_headers(headers, url)
if not _is_kimi_code_url(url):
return await client.post(url, headers=h, **kwargs)
last = None
for ua in _kimi_code_ua_candidates(url):
trial = dict(h)
trial["User-Agent"] = ua
last = await client.post(url, headers=trial, **kwargs)
if not _is_kimi_code_access_denied(last.status_code, last.content):
if last.status_code < 400:
_remember_kimi_code_user_agent(url, ua)
return last
return last
def _detect_provider(url: str) -> str:
"""Detect the API provider from a configured endpoint URL.
@@ -426,6 +603,10 @@ def _detect_provider(url: str) -> str:
return "openrouter"
if _host_match(url, "groq.com"):
return "groq"
if _host_match(url, "nvidia.com"):
return "nvidia"
if _host_match(url, "moonshot.ai") or _host_match(url, "moonshot.cn"):
return "moonshot"
from src.chatgpt_subscription import is_chatgpt_subscription_base
if is_chatgpt_subscription_base(url):
return "chatgpt-subscription"
@@ -435,6 +616,53 @@ def _detect_provider(url: str) -> str:
return "openai"
def _is_self_hosted_openai_compatible(url: str) -> bool:
"""True for custom/local OpenAI-compatible servers (llama.cpp, LM Studio,
vLLM, text-generation-webui, etc.) as opposed to cloud APIs.
Used to gate llama.cpp-server-specific payload extras (``session_id``,
``cache_prompt``) used for KV-cache slot affinity (issue #2927). Strict
cloud providers reject unrecognized top-level fields (api.openai.com
returns 400, Mistral returns 422 "extra_forbidden", issue #3793), and any
unknown OpenAI-compatible host used to be treated as self-hosted, so those
fields leaked to every strict provider added as a custom endpoint.
A server only counts as self-hosted when it also resolves as local:
loopback/private/tailscale host, or the endpoint explicitly configured
with kind "local". A self-hosted server exposed via a public hostname
loses the affinity hint unless its endpoint kind is set to "local" -
a lost perf hint, versus a hard 4xx on every request the other way.
"""
if _detect_provider(url) != "openai" or _host_match(url, "openai.com"):
return False
from src.model_context import is_local_endpoint
return is_local_endpoint(url)
def _apply_local_cache_affinity(payload: Dict, url: str, session_id: Optional[str]) -> None:
"""Add llama.cpp-server slot-affinity hints to an outgoing payload, in place.
As diagnosed in issue #2927, llama.cpp assigns requests to processing
slots via LRU when no stable identifier is present ("session_id=<empty>
server-selected (LCP/LRU)"), which means consecutive turns of the same
chat can land on different slots and lose their cached prefix entirely.
Sending a stable ``session_id`` (derived from the Odysseus session) lets
the server keep routing the same conversation to the same slot, and
``cache_prompt: true`` asks it to retain/reuse the prefix it already has.
Both fields are llama.cpp / LM Studio extensions to the OpenAI schema; we
only set them for self-hosted OpenAI-compatible endpoints (never
api.openai.com or other cloud providers, which reject unrecognized
top-level request fields).
"""
if not session_id:
return
if not _is_self_hosted_openai_compatible(url):
return
payload.setdefault("session_id", str(session_id))
payload.setdefault("cache_prompt", True)
def _provider_headers(provider: str, headers: Optional[Dict] = None) -> Dict[str, str]:
h = {"Content-Type": "application/json"}
if isinstance(headers, dict):
@@ -471,9 +699,16 @@ def _provider_label(url: str) -> str:
if is_copilot_base(url): return "GitHub Copilot"
if _host_match(url, "mistral.ai"): return "Mistral"
if _host_match(url, "deepseek.com"): return "DeepSeek"
if _host_match(url, "nvidia.com"): return "NVIDIA"
if _host_match(url, "googleapis.com"): return "Google"
if _host_match(url, "together.xyz", "together.ai"): return "Together"
if _host_match(url, "fireworks.ai"): return "Fireworks"
if _host_match(url, "kimi.com"):
try:
if "/coding" in (urlparse(url).path or ""):
return "Kimi Code"
except Exception:
pass
if _is_ollama_native_url(url): return "Ollama"
try:
host = (urlparse(url).hostname or "").lower()
@@ -542,8 +777,9 @@ def _build_chatgpt_responses_payload(
}
if not _restricts_temperature(model):
payload["temperature"] = temperature
if max_tokens and max_tokens > 0:
payload["max_output_tokens"] = max_tokens
# ChatGPT Subscription Codex API does not support max_output_tokens —
# passing it returns HTTP 400 "Unsupported parameter: max_output_tokens".
# Do not include it in the payload.
return payload
@@ -613,7 +849,7 @@ def _uses_max_completion_tokens(model: str) -> bool:
# perfectly good model as failing. For these models we omit the field and let
# the API use its required default. (gpt-4.5 is intentionally excluded — it is
# not a reasoning model and accepts temperature normally.)
_FIXED_TEMPERATURE_MODELS = ("o1", "o3", "o4", "gpt-5")
_FIXED_TEMPERATURE_MODELS = ("o1", "o3", "o4", "gpt-5", "kimi-for-coding")
def _restricts_temperature(model: str) -> bool:
"""Check if a model rejects any non-default temperature."""
@@ -622,6 +858,49 @@ def _restricts_temperature(model: str) -> bool:
m = model.lower()
return any(m.startswith(p) or f"/{p}" in m for p in _FIXED_TEMPERATURE_MODELS)
# The official Moonshot API fixes temperature at 1.0 in thinking mode and 0.6
# when thinking is explicitly disabled for Kimi K2.5/K2.6. Any other explicit
# value returns HTTP 400. Odysseus does not currently send the `thinking` mode
# control, so omit temperature and let Moonshot use its default thinking mode.
# Keep the gate provider-specific: self-hosted Kimi deployments may accept
# custom sampling values, and older Moonshot models have different defaults.
def _moonshot_rejects_custom_temperature(provider: str, model: str) -> bool:
"""Check if the official Moonshot API fixes temperature for this model."""
if provider != "moonshot" or not isinstance(model, str):
return False
model_id = model.lower().rsplit("/", 1)[-1]
return bool(re.match(r"^kimi-k2\.(?:5|6)(?:$|[-_:])", model_id))
def _omit_temperature(provider: str, model: str) -> bool:
"""Check if a request should use the provider's default temperature."""
return _restricts_temperature(model) or _moonshot_rejects_custom_temperature(
provider, model
)
# Anthropic removed the sampling parameters (temperature, top_p, top_k) starting
# with Claude Opus 4.7. On Opus 4.7 and later, sending `temperature` at all —
# even 0.0 — returns HTTP 400. Earlier Claude models (Opus 4.6 and below, every
# Sonnet/Haiku) still accept temperature in [0.0, 1.0], so the omission must be
# version-gated rather than applied to all `claude-*` models.
def _anthropic_rejects_temperature(model: str) -> bool:
"""Check if a native-Anthropic model rejects the temperature field (Opus 4.7+)."""
if not isinstance(model, str) or not model:
return False
# `(?<![a-z])` anchors "opus" to a word boundary so a substring match like
# `oct-opus`/`octopus-4-8` can't be read as Opus (it would otherwise strip
# temperature). Cap the minor at 1-2 digits and forbid a trailing digit so a
# dated id like `claude-opus-4-20250514` (Opus 4.0) parses as major-only (no
# minor match, kept) instead of reading the date `20250514` as a giant minor
# that would falsely test >= 4.7. Dated 4.7+ snapshots (`claude-opus-4-7-
# 20260201`) keep their explicit minor and are still matched.
match = re.search(r"(?<![a-z])opus[-_]?(\d+)[-_.](\d{1,2})(?!\d)", model.lower())
if not match:
return False
return (int(match.group(1)), int(match.group(2))) >= (4, 7)
# Models that support structured thinking — may output </think> without opening tag
_THINKING_MODEL_PATTERNS = ("qwen3", "qwq", "deepseek-r1", "deepseek-reasoner", "minimax", "m2-reap", "gemma")
@@ -725,8 +1004,11 @@ def _build_anthropic_payload(model, messages, temperature, max_tokens, stream=Fa
"model": model,
"messages": chat_messages,
"max_tokens": max_tokens if max_tokens and max_tokens > 0 else 4096,
"temperature": temperature,
}
# Opus 4.7+ removed the sampling parameters — sending `temperature` (even 0.0)
# returns HTTP 400. Omit it for those models; older Claude models still take it.
if not _anthropic_rejects_temperature(model):
payload["temperature"] = temperature
if system_parts:
system_text = "\n\n".join(system_parts)
# Send `system` as a structured text block so we can attach a prompt-cache
@@ -810,7 +1092,7 @@ def _sanitize_llm_messages(messages: List[Dict]) -> List[Dict]:
(content=None, since Gemini/Ollama reject tool_calls alongside ""). Dropping
it leaves the tool result dangling and breaks the next round.
"""
allowed = {"role", "content", "name", "tool_call_id", "tool_calls", "function_call"}
allowed = {"role", "content", "name", "tool_call_id", "tool_calls", "function_call", "reasoning_content"}
cleaned = []
for msg in messages or []:
if not isinstance(msg, dict):
@@ -1045,7 +1327,7 @@ def list_model_ids(
from src.endpoint_resolver import build_models_url
models_url = build_models_url(base_chat_url)
r = httpx.get(models_url, headers=h, timeout=timeout)
r = httpx_get_kimi_aware(models_url, h, timeout=timeout)
r.raise_for_status()
data = r.json()
model_ids = [m.get("id") for m in (data.get("data") or []) if m.get("id")]
@@ -1146,14 +1428,14 @@ def llm_call(url: str, model: str, messages: List[Dict], temperature: float = LL
"messages": messages_copy,
"temperature": temperature,
}
if _restricts_temperature(model):
if _omit_temperature(provider, model):
payload.pop("temperature", None)
if max_tokens and max_tokens > 0:
tok_key = "max_completion_tokens" if _uses_max_completion_tokens(model) else "max_tokens"
payload[tok_key] = max_tokens
try:
note_model_activity(target_url, model)
r = httpx.post(target_url, headers=h, json=payload, timeout=timeout)
r = httpx_post_kimi_aware(target_url, h, json=payload, timeout=timeout)
except Exception as e:
raise HTTPException(502, f"POST {target_url} failed: {e}")
if not r.is_success:
@@ -1247,7 +1529,8 @@ async def llm_call_async(
headers: Optional[Dict] = None,
timeout: int = LLMConfig.STREAM_TIMEOUT,
max_retries: int = LLMConfig.MAX_RETRIES,
prompt_type: Optional[str] = None
prompt_type: Optional[str] = None,
session_id: Optional[str] = None,
) -> str:
"""Asynchronous LLM call using httpx with connection pooling, timeout, retry logic, and performance logging."""
provider = _detect_provider(url)
@@ -1339,16 +1622,20 @@ async def llm_call_async(
"messages": messages_copy,
"temperature": temperature,
}
if _restricts_temperature(model):
if _omit_temperature(provider, model):
payload.pop("temperature", None)
if max_tokens and max_tokens > 0:
tok_key = "max_completion_tokens" if _uses_max_completion_tokens(model) else "max_tokens"
payload[tok_key] = max_tokens
# Suppress thinking for qwen3/gemma4 on Ollama /v1 — same as stream_llm.
if _is_ollama_openai_compat_url(url) and _supports_thinking(model):
payload["think"] = False
_apply_local_cache_affinity(payload, url, session_id)
if _is_host_dead(target_url):
raise HTTPException(503, f"Upstream {_host_key(target_url)} marked unreachable (cooldown active)")
call_timeout = httpx.Timeout(connect=3.0, read=float(timeout), write=10.0, pool=5.0)
call_timeout = _call_timeout(timeout)
attempt = 0
while attempt < max_retries:
attempt += 1
@@ -1356,7 +1643,7 @@ async def llm_call_async(
try:
note_model_activity(target_url, model)
client = _get_http_client()
r = await client.post(target_url, headers=h, json=payload, timeout=call_timeout)
r = await httpx_post_kimi_aware_async(client, target_url, h, json=payload, timeout=call_timeout)
duration = time.time() - start
if not r.is_success:
friendly = _format_upstream_error(r.status_code, r.text, target_url)
@@ -1401,7 +1688,7 @@ async def llm_call_async(
async def stream_llm(url: str, model: str, messages: List[Dict], temperature: float = LLMConfig.DEFAULT_TEMPERATURE,
max_tokens: int = LLMConfig.DEFAULT_MAX_TOKENS, headers: Optional[Dict] = None,
timeout: int = LLMConfig.STREAM_TIMEOUT, prompt_type: Optional[str] = None,
tools: Optional[List[Dict]] = None):
tools: Optional[List[Dict]] = None, session_id: Optional[str] = None):
"""Stream LLM responses with improved error handling.
Yields SSE chunks:
@@ -1452,7 +1739,7 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
"temperature": temperature,
"stream": True,
}
if _restricts_temperature(model):
if _omit_temperature(provider, model):
payload.pop("temperature", None)
if provider not in {"openrouter", "groq"}:
payload["stream_options"] = {"include_usage": True}
@@ -1461,14 +1748,23 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
payload[tok_key] = max_tokens
if tools:
payload["tools"] = tools
# For Ollama's OpenAI-compat /v1 endpoint with thinking models (qwen3,
# gemma4, etc.), suppress thinking so tool calls aren't swallowed inside
# <think> blocks. Ollama /v1 accepts "think": false as a top-level param.
if _is_ollama_openai_compat_url(url) and _supports_thinking(model):
payload["think"] = False
_apply_local_cache_affinity(payload, url, session_id)
h = _provider_headers(provider, headers)
if provider == "copilot":
from src.copilot import apply_request_headers
apply_request_headers(h, messages_copy)
# Short connect timeout: a reachable peer answers SYN in <100ms even on
# Tailscale. 3s is plenty; 30s let one dead upstream wedge the UI.
stream_timeout = httpx.Timeout(connect=3.0, read=float(timeout), write=30.0, pool=5.0)
# Connect budget from LLMConfig.CONNECT_TIMEOUT (env LLM_CONNECT_TIMEOUT).
# The dead-host cooldown still bounds a genuinely unreachable upstream, so a
# wider connect budget only affects first contact and stops a brief cold
# connect blip (offshore/public endpoints) surfacing as a 503 on this stream
# path, which -- unlike llm_call -- does not retry the connect.
stream_timeout = _stream_timeout(timeout)
if _is_host_dead(target_url):
yield f'event: error\ndata: {json.dumps({"error": f"Upstream {_host_key(target_url)} unreachable (cooldown active)", "status": 503})}\n\n'
@@ -1744,6 +2040,7 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
events.append(_stream_delta_event(part))
return events
h = apply_kimi_code_headers(h, target_url)
try:
client = _get_http_client()
async with client.stream('POST', target_url, json=payload, headers=h, timeout=stream_timeout) as r:
+82 -34
View File
@@ -5,6 +5,7 @@ Query and cache model context window sizes from OpenAI-compatible APIs.
Provides token estimation for context usage tracking.
"""
import ipaddress
import logging
import sys
from typing import Dict, List, Optional, Tuple
@@ -19,7 +20,20 @@ _LOCAL_HOSTS = {"localhost", "127.0.0.1", "0.0.0.0", "::1", "host.docker.interna
_PRIVATE_PREFIXES = ("10.", "172.16.", "172.17.", "172.18.", "172.19.",
"172.20.", "172.21.", "172.22.", "172.23.", "172.24.",
"172.25.", "172.26.", "172.27.", "172.28.", "172.29.",
"172.30.", "172.31.", "192.168.", "100.")
"172.30.", "172.31.", "192.168.")
# Tailscale uses the CGNAT range 100.64.0.0/10, NOT all of 100.0.0.0/8.
# A bare "100." prefix would classify public addresses (e.g. AWS ranges
# under 100.x outside the CGNAT block) as local; routes/model_routes.py
# already narrows this the same way for endpoint classification.
_TAILSCALE_CGNAT = ipaddress.ip_network("100.64.0.0/10")
def _in_tailscale_range(host: str) -> bool:
try:
return ipaddress.ip_address(host) in _TAILSCALE_CGNAT
except ValueError:
return False
def _normalize_base_for_compare(url: str) -> str:
@@ -64,7 +78,7 @@ def _configured_endpoint_kind(url: str) -> Optional[str]:
return None
def _is_local_endpoint(url: str) -> bool:
def is_local_endpoint(url: str) -> bool:
"""Check if URL points to a local/private/tailscale address."""
kind = _configured_endpoint_kind(url)
if kind in ("api", "proxy"):
@@ -73,7 +87,7 @@ def _is_local_endpoint(url: str) -> bool:
return True
try:
host = urlparse(url).hostname or ""
return host in _LOCAL_HOSTS or host.startswith(_PRIVATE_PREFIXES)
return host in _LOCAL_HOSTS or host.startswith(_PRIVATE_PREFIXES) or _in_tailscale_range(host)
except Exception:
return False
@@ -208,7 +222,30 @@ KNOWN_CONTEXT_WINDOWS = {
# ---------------------------------------------------------------------------
# Cache
# ---------------------------------------------------------------------------
_context_cache: Dict[Tuple[str, str], int] = {}
_context_cache: Dict[Tuple[str, str], Tuple[int, bool]] = {}
def _get_context_length_cached(endpoint_url: str, model: str) -> Tuple[int, bool]:
"""Return (context_length, known). ``known`` is False only when the value is a
bare DEFAULT_CONTEXT fallback (no endpoint report and not in the known table)."""
configured_kind = _configured_endpoint_kind(endpoint_url)
is_local = is_local_endpoint(endpoint_url)
# Key on (endpoint_url, model): the same model id can be served by two
# different remote endpoints with different real context windows (e.g. a
# capped proxy vs. the full provider), so caching by model id alone would
# serve one endpoint's window for the other (issue #2603).
cache_key = (endpoint_url, model)
if not is_local and cache_key in _context_cache:
return _context_cache[cache_key]
ctx, known = _query_context_length(endpoint_url, model)
# Only cache non-default values to allow retry on next request.
# Local endpoints can restart with a different --max-model-len while keeping
# the same model id, so always re-query them instead of serving stale cache.
if not is_local and (ctx != DEFAULT_CONTEXT or configured_kind in ("api", "proxy")):
_context_cache[cache_key] = (ctx, known)
logger.info(f"Context length for {model}: {ctx}")
return ctx, known
def get_context_length(endpoint_url: str, model: str) -> int:
@@ -218,24 +255,33 @@ def get_context_length(endpoint_url: str, model: str) -> int:
or context_window fields. Caches result per (endpoint, model).
Falls back to DEFAULT_CONTEXT if unavailable.
"""
configured_kind = _configured_endpoint_kind(endpoint_url)
is_local = _is_local_endpoint(endpoint_url)
# Key on (endpoint_url, model): the same model id can be served by two
# different remote endpoints with different real context windows (e.g. a
# capped proxy vs. the full provider), so caching by model id alone would
# serve one endpoint's window for the other (issue #2603).
cache_key = (endpoint_url, model)
if not is_local and cache_key in _context_cache:
return _context_cache[cache_key]
return _get_context_length_cached(endpoint_url, model)[0]
ctx = _query_context_length(endpoint_url, model)
# Only cache non-default values to allow retry on next request.
# Local endpoints can restart with a different --max-model-len while keeping
# the same model id, so always re-query them instead of serving stale cache.
if not is_local and (ctx != DEFAULT_CONTEXT or configured_kind in ("api", "proxy")):
_context_cache[cache_key] = ctx
logger.info(f"Context length for {model}: {ctx}")
return ctx
def get_context_length_known(endpoint_url: str, model: str) -> Tuple[int, bool]:
"""Like ``get_context_length`` but also returns whether the window was actually
discovered (endpoint-reported or in the known-models table) rather than the bare
DEFAULT_CONTEXT fallback. Callers that *scale* a budget off the window must not
trust an unknown value a fallback 128K isn't proof the model holds 128K
(review on #4122)."""
return _get_context_length_cached(endpoint_url, model)
def budget_context_for_model(endpoint_url: str, model: str, *, fallback: int = 0) -> int:
"""Context window to scale the agent input budget against.
Returns the *freshly discovered* window when it was actually proven
(endpoint-reported / known table), else 0 so auto-scaling stays conservative.
Crucially this binds the ``known`` flag to the value it proves callers must
not pair this flag with a context length from a *different* lookup (a stale
local re-query, or a caller that didn't pass one), which would budget off an
unproven number (review on #4122). On probe error, returns ``fallback`` (the
caller's best-known value) to preserve prior behaviour."""
try:
ctx, known = get_context_length_known(endpoint_url, model)
return ctx if known else 0
except Exception:
return fallback
def _lookup_known(model: str) -> Optional[int]:
@@ -257,8 +303,9 @@ def _lookup_known(model: str) -> Optional[int]:
return best_ctx
def _query_context_length(endpoint_url: str, model: str) -> int:
"""Query the model API for context length."""
def _query_context_length(endpoint_url: str, model: str) -> Tuple[int, bool]:
"""Query the model API for context length. Returns (context_length, known) where
``known`` is False only for the bare DEFAULT_CONTEXT fallback."""
known = _lookup_known(model)
api_ctx = None
configured_kind = _configured_endpoint_kind(endpoint_url)
@@ -269,11 +316,11 @@ def _query_context_length(endpoint_url: str, model: str) -> int:
if configured_kind in ("api", "proxy"):
if known:
logger.info(f"Using known context window for {model}: {known}")
return known
return DEFAULT_CONTEXT
return known, True
return DEFAULT_CONTEXT, False
# Try llama.cpp /slots endpoint first — reports actual serving context
if _is_local_endpoint(endpoint_url):
if is_local_endpoint(endpoint_url):
try:
base = endpoint_url.split("/v1")[0] if "/v1" in endpoint_url else endpoint_url.rsplit("/", 1)[0]
r = httpx.get(f"{base}/slots", timeout=REQUEST_TIMEOUT)
@@ -283,7 +330,7 @@ def _query_context_length(endpoint_url: str, model: str) -> int:
n_ctx = slots[0].get("n_ctx")
if n_ctx and isinstance(n_ctx, int) and n_ctx > 0:
logger.info(f"llama.cpp /slots reports n_ctx={n_ctx} for {model}")
return n_ctx
return n_ctx, True
except Exception:
pass
@@ -295,7 +342,8 @@ def _query_context_length(endpoint_url: str, model: str) -> int:
if is_copilot_base(endpoint_url):
if known:
logger.info(f"Using known context window for {model}: {known}")
return known or DEFAULT_CONTEXT
return known, True
return DEFAULT_CONTEXT, False
from src.endpoint_resolver import build_models_url
@@ -337,21 +385,21 @@ def _query_context_length(endpoint_url: str, model: str) -> int:
# For local/self-hosted endpoints, trust the API value (user set --max-model-len)
# For cloud APIs, use the larger value (API can report low defaults)
if api_ctx and known:
_is_local = _is_local_endpoint(endpoint_url)
_is_local = is_local_endpoint(endpoint_url)
if _is_local and api_ctx < known:
logger.info(f"Local endpoint reports {api_ctx} for {model} (known max: {known}) — using API value")
return api_ctx
return api_ctx, True
result = max(api_ctx, known)
if api_ctx < known:
logger.info(f"API reported {api_ctx} for {model}, using known {known} instead")
return result
return result, True
if api_ctx:
return api_ctx
return api_ctx, True
if known:
logger.info(f"Using known context window for {model}: {known}")
return known
return known, True
return DEFAULT_CONTEXT
return DEFAULT_CONTEXT, False
def estimate_tokens(messages: List[Dict]) -> int:
+19
View File
@@ -223,6 +223,25 @@ class ModelDiscovery:
)
return {"hosts": hosts, "items": items}
def warmup_ping_urls(self, limit: int = 5) -> List[str]:
"""The ``/models`` URLs of up to ``limit`` discovered endpoints.
Used by the startup warmup / keepalive loop to prime connections. Each
discovered item already carries a ``/v1/chat/completions`` url; swap the
suffix for the cheap ``/models`` probe. Failures degrade to an empty list
so warmup never crashes the caller.
"""
try:
items = (self.discover_models() or {}).get("items", [])
except Exception:
return []
urls: List[str] = []
for ep in items[:limit]:
url = (ep.get("url") or "").replace("/chat/completions", "/models")
if url:
urls.append(url)
return urls
def get_providers(self) -> Dict[str, Any]:
"""Get all available providers"""
discovery = self.discover_models()
+1 -1
View File
@@ -32,7 +32,7 @@ def create_office_document(
DocumentVersion,
Session as DbSession,
)
from src.tool_implementations import set_active_document
from src.agent_tools.document_tools import set_active_document
if not body_text or not body_text.strip():
return None
+32
View File
@@ -0,0 +1,32 @@
"""Compatibility helpers for optional third-party dependencies."""
from __future__ import annotations
import sys
import types
def patch_realesrgan_torchvision_compat() -> None:
"""Restore the torchvision import path expected by BasicSR/Real-ESRGAN."""
module_name = "torchvision.transforms.functional_tensor"
if module_name in sys.modules:
return
try:
from torchvision.transforms import functional
except Exception:
return
rgb_to_grayscale = getattr(functional, "rgb_to_grayscale", None)
if rgb_to_grayscale is None:
return
shim = types.ModuleType(module_name)
shim.rgb_to_grayscale = rgb_to_grayscale
shim.__getattr__ = lambda name: getattr(functional, name)
sys.modules[module_name] = shim
def prepare_optional_dependency_import(name: str) -> None:
"""Apply known import-time compatibility shims before probing a package."""
if name == "realesrgan":
patch_realesrgan_torchvision_compat()
+2 -2
View File
@@ -219,7 +219,7 @@ def create_plain_pdf_document(
pages without form-field overlays.
"""
from src.database import SessionLocal, Document, DocumentVersion, Session as DbSession
from src.tool_implementations import set_active_document
from src.agent_tools.document_tools import set_active_document
content = render_plain_pdf_markdown(upload_id, title, body_text)
db = SessionLocal()
@@ -402,7 +402,7 @@ def create_form_markdown_document(
inside the content, which the export route looks for.
"""
from src.database import SessionLocal, Document, DocumentVersion, Session as DbSession
from src.tool_implementations import set_active_document
from src.agent_tools.document_tools import set_active_document
content = render_form_as_markdown(fields, upload_id, title, intro_text=intro_text)
db = SessionLocal()
+24 -1
View File
@@ -221,6 +221,22 @@ class ResearchHandler:
# Task registry — background research with persistence
# ------------------------------------------------------------------
def rename_owner(self, old_owner: str, new_owner: str) -> int:
"""Move in-flight research tasks from one owner key to another."""
old_key = str(old_owner or "").strip().lower()
new_key = str(new_owner or "").strip().lower()
if not old_key or not new_key:
return 0
changed = 0
for entry in list(self._active_tasks.values()):
if not isinstance(entry, dict):
continue
if str(entry.get("owner", "")).strip().lower() == old_key:
entry["owner"] = new_key
changed += 1
return changed
def start_research(
self,
session_id: str,
@@ -390,7 +406,6 @@ class ResearchHandler:
def get_status(self, session_id: str) -> Optional[dict]:
"""Get current research status for a session."""
avg = self.get_avg_duration()
if session_id in self._active_tasks:
entry = self._active_tasks[session_id]
result = {
@@ -399,6 +414,14 @@ class ResearchHandler:
"query": entry["query"],
"started_at": entry["started_at"],
}
# avg_duration is a historical figure over completed reports on
# disk; get_avg_duration() globs and JSON-parses the whole research
# dir, so compute it at most once per active stream (memoized on the
# entry) instead of on every ~1s SSE poll. The disk branch below
# never used it, so it no longer pays that cost at all.
if "_avg_duration" not in entry:
entry["_avg_duration"] = self.get_avg_duration()
avg = entry["_avg_duration"]
if avg is not None:
result["avg_duration"] = round(avg, 1)
return result

Some files were not shown because too many files have changed in this diff Show More