mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-17 18:25:26 -04:00
Merge remote-tracking branch 'origin/dev' into test-main-dev-merge-20260615
# Conflicts: # src/tool_implementations.py # static/js/research/panel.js
This commit is contained in:
@@ -10,6 +10,12 @@ dist/
|
||||
build/
|
||||
.env
|
||||
.env.bak.*
|
||||
# Secrets: keep plaintext and every transient secrets.env variant out of
|
||||
# the build context. If an encrypted secrets.env is used, it is mounted
|
||||
# at runtime — never baked into the image. Mirrored in .gitignore.
|
||||
secrets.env
|
||||
secrets.env.*
|
||||
!secrets.env.example
|
||||
/data/
|
||||
/logs/
|
||||
.git/
|
||||
|
||||
@@ -190,3 +190,10 @@ SEARXNG_INSTANCE=http://localhost:8080
|
||||
# These overlays only expose the GPU devices. The slim Odysseus image
|
||||
# still needs CUDA/ROCm userspace via Cookbook -> Dependencies (vLLM,
|
||||
# llama-cpp-python, etc.) before models can actually serve on GPU.
|
||||
|
||||
# ============================================================
|
||||
# Storage Paths (Docker Compose)
|
||||
# ============================================================
|
||||
|
||||
# APP_DATA_DIR=./data
|
||||
# APP_LOGS_DIR=./logs
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
# Code owners.
|
||||
#
|
||||
# Intentionally empty for now. The catch-all rule that mapped every path to a
|
||||
# single owner froze all merges the moment "Require review from Code Owners"
|
||||
# was enabled, because no other maintainer's approval could satisfy the gate.
|
||||
# A per-area ownership map (security/auth, CI, frontend, agent internals, with
|
||||
# multiple named owners per line) is being worked out in issue #593; once
|
||||
# agreed it replaces this file. Until then, required reviews and the security
|
||||
# CI gate (docs/security-ci.md) remain in force via branch protection.
|
||||
@@ -0,0 +1,48 @@
|
||||
# Dependabot keeps dependencies and pinned action versions current.
|
||||
#
|
||||
# Why this matters for security: every workflow in this repo pins its GitHub
|
||||
# Actions to an exact commit (a SHA), which is safe but freezes them in time.
|
||||
# Dependabot opens a small, reviewable pull request whenever a newer version
|
||||
# exists -- for Python packages, npm packages, the Docker base image, and the
|
||||
# pinned Actions themselves -- so staying patched does not require manual work.
|
||||
# Updates are grouped so a week's bumps arrive as one PR per ecosystem, not a
|
||||
# flood of separate ones.
|
||||
|
||||
version: 2
|
||||
updates:
|
||||
# Python dependencies (requirements.txt + requirements-optional.txt).
|
||||
- package-ecosystem: pip
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: weekly
|
||||
open-pull-requests-limit: 5
|
||||
groups:
|
||||
python:
|
||||
patterns: ["*"]
|
||||
|
||||
# Frontend / tooling npm packages (package.json).
|
||||
- package-ecosystem: npm
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: weekly
|
||||
open-pull-requests-limit: 5
|
||||
groups:
|
||||
npm:
|
||||
patterns: ["*"]
|
||||
|
||||
# The pinned action SHAs used across .github/workflows.
|
||||
- package-ecosystem: github-actions
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: weekly
|
||||
open-pull-requests-limit: 5
|
||||
groups:
|
||||
actions:
|
||||
patterns: ["*"]
|
||||
|
||||
# The Docker base image in the Dockerfile.
|
||||
- package-ecosystem: docker
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: weekly
|
||||
open-pull-requests-limit: 5
|
||||
@@ -0,0 +1,123 @@
|
||||
# Pull Request Review Template
|
||||
|
||||
Use this shape as a copyable reference for substantive PR reviews; GitHub does
|
||||
not auto-apply this file to review comments. Omit sections that do not add
|
||||
useful signal. Lead with confirmed findings; keep speculative notes out of the
|
||||
public review unless they are framed as a concrete open question.
|
||||
|
||||
## Small PR Path
|
||||
|
||||
For narrow docs, typo, test-only, or obvious local fixes, a short review is
|
||||
enough:
|
||||
|
||||
```md
|
||||
LGTM after checking:
|
||||
- scope:
|
||||
- validation:
|
||||
- residual risk:
|
||||
```
|
||||
|
||||
Use the fuller structure below for larger, risky, multi-finding, or
|
||||
security-sensitive reviews.
|
||||
|
||||
## Findings
|
||||
|
||||
**<sub><sub></sub></sub> issue (test): Short issue title**
|
||||
|
||||
- **Problem:** Concrete broken flow, contract, input, or risk.
|
||||
|
||||
- **Impact:** Why this matters to users, CI, maintainers, data, security, or scale.
|
||||
|
||||
- **Ask:** Smallest practical correction or decision the author should make.
|
||||
|
||||
- **Location:** `path:line`
|
||||
|
||||
## Open Questions
|
||||
|
||||
- **question (scope, non-blocking): Short author question** Ask the concrete
|
||||
intent, scope, or tradeoff question.
|
||||
|
||||
## Validation
|
||||
|
||||
- Ran:
|
||||
- Not run:
|
||||
- Residual risk:
|
||||
|
||||
## PR Hygiene
|
||||
|
||||
- Target/template/checks:
|
||||
- Related, duplicate, or superseding context:
|
||||
|
||||
## No Findings Variant
|
||||
|
||||
```md
|
||||
## Findings
|
||||
|
||||
none confirmed
|
||||
|
||||
## Validation
|
||||
|
||||
- Ran:
|
||||
- Not run:
|
||||
- Residual risk:
|
||||
```
|
||||
|
||||
## Legend
|
||||
|
||||
- **Findings:** Verified, author-actionable issues that should be fixed or
|
||||
consciously accepted before merge.
|
||||
- **Priority badges:** The shields.io badges below are optional formatting for
|
||||
priority labels. Plain `P0`, `P1`, `P2`, or `P3` text is also acceptable when
|
||||
an external image dependency is undesirable or may not render.
|
||||
- **P0:** `` -
|
||||
release-blocking or actively dangerous.
|
||||
- **P1:** `` -
|
||||
serious bug, security risk, data-loss risk, or broken primary flow.
|
||||
- **P2:** `` -
|
||||
meaningful correctness, test, maintainability, or edge-case issue.
|
||||
- **P3:** `` -
|
||||
minor polish or low-risk cleanup.
|
||||
- **Intent labels:**
|
||||
- **`issue`:** A confirmed defect, regression, broken contract, or concrete
|
||||
risk.
|
||||
- **`suggestion`:** A non-blocking improvement that would make the PR clearer,
|
||||
safer, or easier to maintain.
|
||||
- **`nit`:** A tiny, non-blocking cleanup or style note. Use it only when the
|
||||
author can safely ignore it without changing the review outcome.
|
||||
- **`question`:** A real author-facing clarification about intent, scope, or
|
||||
tradeoffs. Do not use questions to hide an issue that should be stated
|
||||
directly.
|
||||
- **`LGTM`:** "Looks good to me." Use only when the review found no blocking
|
||||
issues, or when any remaining notes are clearly optional.
|
||||
- **Decorations:** Optional labels in parentheses that clarify the finding type,
|
||||
scope, or merge impact.
|
||||
- **`security`:** Auth, authorization, ownership, secrets, SSRF, injection,
|
||||
unsafe external input, or other trust-boundary concerns.
|
||||
- **`test`:** Missing, failing, misleading, brittle, or insufficient tests.
|
||||
- **`scope`:** PR scope, feature boundaries, unrelated churn, or work that
|
||||
should be split into a separate issue or PR.
|
||||
- **`ci`:** CI configuration, workflow failures, flaky checks, or validation
|
||||
signal quality.
|
||||
- **`api`:** Route, request/response, public function, schema, persistence, or
|
||||
integration contract changes.
|
||||
- **`docs`:** User-facing docs, contributor docs, examples, or comments that
|
||||
need to change with the code.
|
||||
- **`non-blocking`:** Useful feedback that should not prevent merge by
|
||||
itself.
|
||||
- **Finding fields:**
|
||||
- **Problem:** What is wrong, what contract is ambiguous, or what risk the PR
|
||||
introduces.
|
||||
- **Impact:** Why the problem matters in practical terms.
|
||||
- **Ask:** The smallest concrete fix, test, or decision requested from the PR
|
||||
author.
|
||||
- **Location:** The most useful repo-relative file and line reference for the
|
||||
finding, using `path:line`.
|
||||
- **Optional sections:**
|
||||
- **Open Questions:** Genuine scope or intent questions; omit when there are
|
||||
no real questions.
|
||||
- **Validation:** What the reviewer ran, what was intentionally not run, and
|
||||
what risk remains after review.
|
||||
- **PR Hygiene:** Target-branch, template, CI/check, duplicate, related-work,
|
||||
or superseding-PR notes.
|
||||
- **`none confirmed`:** Use only when no review-worthy findings were confirmed;
|
||||
still list validation gaps or residual risk when relevant.
|
||||
@@ -19,10 +19,10 @@ jobs:
|
||||
name: Python syntax (compileall)
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
with:
|
||||
persist-credentials: false
|
||||
- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: "3.11"
|
||||
# Byte-compile sources — catches syntax errors without installing deps.
|
||||
@@ -32,10 +32,10 @@ jobs:
|
||||
name: JS syntax (node --check)
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
with:
|
||||
persist-credentials: false
|
||||
- uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
|
||||
- uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
|
||||
with:
|
||||
node-version: "20"
|
||||
# Syntax-check our own JS (skip vendored libs in static/lib).
|
||||
@@ -54,7 +54,7 @@ jobs:
|
||||
# ROADMAP "fresh install smoke tests" item; make this required once green.
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
@@ -81,7 +81,7 @@ jobs:
|
||||
echo "docs_only=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
if: steps.docs-check.outputs.docs_only != 'true'
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
@@ -0,0 +1,61 @@
|
||||
# CodeQL code scanning
|
||||
#
|
||||
# Purpose: GitHub's own static analysis engine reads the application source
|
||||
# (Python backend + the JavaScript frontend) and looks for real
|
||||
# vulnerabilities -- SQL/command injection, path traversal, auth mistakes,
|
||||
# unsafe deserialization. Findings appear in the repo's Security tab. This is
|
||||
# the deepest check in the suite and the most valuable for a high-profile
|
||||
# target.
|
||||
#
|
||||
# It runs on every push to main and on a weekly schedule (to catch newly
|
||||
# disclosed query patterns against unchanged code). It deliberately does NOT
|
||||
# run on pull requests: most PRs here come from forks, whose read-only token
|
||||
# cannot publish results, which would produce confusing failures. To scan pull
|
||||
# requests too, a maintainer can instead enable CodeQL "default setup" in
|
||||
# Settings -> Security -> Code scanning (one toggle, no file needed) -- see
|
||||
# docs/security-ci.md.
|
||||
|
||||
name: CodeQL
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
schedule:
|
||||
# Weekly, Monday 06:00 UTC.
|
||||
- cron: '0 6 * * 1'
|
||||
workflow_dispatch:
|
||||
|
||||
permissions: {}
|
||||
|
||||
concurrency:
|
||||
group: codeql-${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
analyze:
|
||||
name: Analyze (${{ matrix.language }})
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
security-events: write # publish results to the Security tab
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
# Both are interpreted, so CodeQL needs no build step (build-mode none).
|
||||
language: [python, javascript-typescript]
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4.36.2
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
build-mode: none
|
||||
|
||||
- name: Perform CodeQL analysis
|
||||
uses: github/codeql-action/analyze@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4.36.2
|
||||
with:
|
||||
category: "/language:${{ matrix.language }}"
|
||||
@@ -0,0 +1,52 @@
|
||||
# Container security: Dockerfile lint
|
||||
#
|
||||
# Purpose: the Docker image is how most people run Odysseus, so it is part of
|
||||
# the attack surface. hadolint lints the Dockerfile for mistakes and insecure
|
||||
# patterns (running as root longer than needed, unpinned base image, bad apt
|
||||
# usage). Blocking.
|
||||
#
|
||||
# The image vulnerability scan (Trivy, advisory) lives in its own file,
|
||||
# container-trivy.yml. Keeping it separate lets that advisory scan be
|
||||
# path-filtered and held to a read-only token on pull requests without
|
||||
# weakening this blocking gate, which must always report so a required check
|
||||
# never hangs.
|
||||
#
|
||||
# Note: a separate open PR (#120) proposes a local `scripts/scan_image.py`.
|
||||
# This job is complementary -- it is a CI gate, not a script a contributor has
|
||||
# to remember to run.
|
||||
|
||||
name: Container scan
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
branches: [main]
|
||||
workflow_dispatch:
|
||||
|
||||
permissions: {}
|
||||
|
||||
concurrency:
|
||||
group: container-scan-${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
hadolint:
|
||||
name: hadolint (Dockerfile lint)
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Lint Dockerfile
|
||||
uses: hadolint/hadolint-action@2332a7b74a6de0dda2e2221d575162eba76ba5e5 # v3.3.0
|
||||
with:
|
||||
dockerfile: Dockerfile
|
||||
# DL3008: pinning apt package versions is impractical on a -slim base
|
||||
# image. Debian purges old package versions from its repos, so a
|
||||
# pinned version breaks future rebuilds. The base image itself is
|
||||
# what should be pinned (tracked by Dependabot's docker ecosystem).
|
||||
ignore: DL3008
|
||||
@@ -0,0 +1,125 @@
|
||||
# Container image vulnerability scan (advisory)
|
||||
#
|
||||
# Trivy builds the application image and scans it for known-vulnerable OS and
|
||||
# Python packages. Advisory only -- it reports findings to the repo's Security
|
||||
# tab without blocking a merge, because the image inevitably contains
|
||||
# already-known CVEs in upstream packages that are not this project's bug.
|
||||
#
|
||||
# Split from the Dockerfile lint (container-scan.yml) for two reasons:
|
||||
#
|
||||
# - Least privilege. The image build runs Dockerfile instructions, which on a
|
||||
# pull request are attacker-influenceable. That path (the `scan` job) is
|
||||
# held to a read-only token and never publishes results. Only `publish`,
|
||||
# which runs on push to main (curated, fast-forwarded from reviewed dev),
|
||||
# gets security-events:write to upload SARIF.
|
||||
# - Cost. Docs-only changes do not rebuild the image (paths-ignore below),
|
||||
# matching docker-publish.yml. hadolint stays on the broad trigger in
|
||||
# container-scan.yml so the blocking gate always reports.
|
||||
|
||||
name: Container scan (Trivy)
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths-ignore:
|
||||
- '**.md'
|
||||
- 'docs/**'
|
||||
- '.github/ISSUE_TEMPLATE/**'
|
||||
push:
|
||||
branches: [main]
|
||||
paths-ignore:
|
||||
- '**.md'
|
||||
- 'docs/**'
|
||||
- '.github/ISSUE_TEMPLATE/**'
|
||||
workflow_dispatch:
|
||||
|
||||
permissions: {}
|
||||
|
||||
concurrency:
|
||||
group: container-trivy-${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
# Pull requests and manual runs: build and scan under a read-only token.
|
||||
# The build executes PR-supplied Dockerfile instructions, so this job must
|
||||
# not hold any write scope, and it does not upload to the Security tab.
|
||||
scan:
|
||||
name: Trivy (image scan, advisory)
|
||||
if: github.event_name != 'push'
|
||||
runs-on: ubuntu-latest
|
||||
# Advisory: a CVE in an upstream package must not block a PR.
|
||||
continue-on-error: true
|
||||
permissions:
|
||||
contents: read
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up Buildx
|
||||
uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5 # v4.1.0
|
||||
|
||||
# Build without pushing so a broken Dockerfile is caught here, and the
|
||||
# exact image we ship is what gets scanned.
|
||||
- name: Build image
|
||||
uses: docker/build-push-action@f9f3042f7e2789586610d6e8b85c8f03e5195baf # v7.2.0
|
||||
with:
|
||||
context: .
|
||||
push: false
|
||||
load: true
|
||||
tags: odysseus:ci
|
||||
|
||||
- name: Scan image with Trivy
|
||||
uses: aquasecurity/trivy-action@ed142fd0673e97e23eac54620cfb913e5ce36c25 # v0.36.0
|
||||
with:
|
||||
image-ref: odysseus:ci
|
||||
format: table
|
||||
ignore-unfixed: true
|
||||
env:
|
||||
# Pin the vuln DB source to GHCR to avoid rate-limited Docker Hub
|
||||
# mirrors that flake on shared runners.
|
||||
TRIVY_DB_REPOSITORY: ghcr.io/aquasecurity/trivy-db:2
|
||||
|
||||
# Push to main only: build, scan, and publish SARIF to the Security tab.
|
||||
# This is the only path that runs trusted code, so it is the only one granted
|
||||
# security-events:write.
|
||||
publish:
|
||||
name: Trivy (image scan + SARIF upload)
|
||||
if: github.event_name == 'push'
|
||||
runs-on: ubuntu-latest
|
||||
continue-on-error: true
|
||||
permissions:
|
||||
contents: read
|
||||
security-events: write # upload SARIF to the Security tab
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up Buildx
|
||||
uses: docker/setup-buildx-action@d7f5e7f509e45cec5c76c4d5afdd7de93d0b3df5 # v4.1.0
|
||||
|
||||
- name: Build image
|
||||
uses: docker/build-push-action@f9f3042f7e2789586610d6e8b85c8f03e5195baf # v7.2.0
|
||||
with:
|
||||
context: .
|
||||
push: false
|
||||
load: true
|
||||
tags: odysseus:ci
|
||||
|
||||
- name: Scan image with Trivy
|
||||
uses: aquasecurity/trivy-action@ed142fd0673e97e23eac54620cfb913e5ce36c25 # v0.36.0
|
||||
with:
|
||||
image-ref: odysseus:ci
|
||||
format: sarif
|
||||
output: trivy-results.sarif
|
||||
ignore-unfixed: true
|
||||
env:
|
||||
TRIVY_DB_REPOSITORY: ghcr.io/aquasecurity/trivy-db:2
|
||||
|
||||
- name: Upload Trivy results
|
||||
uses: github/codeql-action/upload-sarif@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4.36.2
|
||||
with:
|
||||
sarif_file: trivy-results.sarif
|
||||
category: trivy-image
|
||||
@@ -0,0 +1,71 @@
|
||||
# Supply-chain review
|
||||
#
|
||||
# Purpose: defend against "side-chain" / supply-chain attacks -- a pull request
|
||||
# that adds (or bumps) a dependency to a version with a known vulnerability or a
|
||||
# disallowed license. Two layers:
|
||||
#
|
||||
# - dependency-review: runs ONLY on pull requests. It compares the
|
||||
# dependencies before and after the PR and blocks the merge if the change
|
||||
# pulls in a package with a known security advisory. This is the gate.
|
||||
# - pip-audit: scans the project's current Python requirements against the
|
||||
# advisory database. Advisory only (it never blocks a merge), because it can
|
||||
# flag a pre-existing issue in an already-shipped dependency.
|
||||
|
||||
name: Dependency review
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
branches: [main]
|
||||
workflow_dispatch:
|
||||
|
||||
# Default-deny token; jobs grant only read access.
|
||||
permissions: {}
|
||||
|
||||
concurrency:
|
||||
group: dependency-review-${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
dependency-review:
|
||||
name: dependency-review (PR gate)
|
||||
# Only meaningful on a pull request -- it needs a base..head diff to review.
|
||||
if: github.event_name == 'pull_request'
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Review dependency changes
|
||||
uses: actions/dependency-review-action@a1d282b36b6f3519aa1f3fc636f609c47dddb294 # v5.0.0
|
||||
with:
|
||||
# Fail the PR on any newly introduced moderate-or-worse advisory.
|
||||
fail-on-severity: moderate
|
||||
|
||||
pip-audit:
|
||||
name: pip-audit (advisory)
|
||||
runs-on: ubuntu-latest
|
||||
# Advisory: report known-vulnerable Python deps without blocking the merge.
|
||||
continue-on-error: true
|
||||
permissions:
|
||||
contents: read
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: '3.12'
|
||||
|
||||
- name: Run pip-audit on requirements
|
||||
run: |
|
||||
set -euo pipefail
|
||||
pip install pip-audit==2.10.0
|
||||
pip-audit -r requirements.txt -r requirements-optional.txt --strict
|
||||
@@ -0,0 +1,60 @@
|
||||
# Secret scanning
|
||||
#
|
||||
# Purpose: stop credentials (API keys, tokens, passwords, private keys) from
|
||||
# ever living in the Git history. Odysseus deliberately keeps real secrets in
|
||||
# files that are gitignored (.env, data/), but a slip in a future commit -- or a
|
||||
# malicious pull request that sneaks one in -- would otherwise go unnoticed.
|
||||
# This job reads the repository and the full commit history and fails if it
|
||||
# finds anything that looks like a secret.
|
||||
#
|
||||
# It runs the official gitleaks BINARY directly (pinned to an exact version and
|
||||
# verified against the project's published SHA-256 checksum) rather than the
|
||||
# gitleaks GitHub Action, because the Action asks for a paid license on
|
||||
# organization-owned repos. The binary is free and behaves identically.
|
||||
|
||||
name: Secret scan
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
branches: [main]
|
||||
workflow_dispatch:
|
||||
|
||||
# Start with zero permissions; the single job opts back in to read-only.
|
||||
permissions: {}
|
||||
|
||||
concurrency:
|
||||
group: secret-scan-${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
gitleaks:
|
||||
name: gitleaks
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
with:
|
||||
# Full history so a secret committed in an earlier commit (and later
|
||||
# deleted) is still caught -- deletion does not remove it from Git.
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
|
||||
# Pinned version + checksum so a tampered release binary cannot run here.
|
||||
# Bump VERSION/SHA256 together; the checksum comes from the matching
|
||||
# gitleaks_<version>_checksums.txt on the GitHub release.
|
||||
- name: Run gitleaks (pinned, checksum-verified)
|
||||
env:
|
||||
GITLEAKS_VERSION: 8.30.1
|
||||
GITLEAKS_SHA256: 551f6fc83ea457d62a0d98237cbad105af8d557003051f41f3e7ca7b3f2470eb
|
||||
run: |
|
||||
set -euo pipefail
|
||||
TARBALL="gitleaks_${GITLEAKS_VERSION}_linux_x64.tar.gz"
|
||||
curl -fsSL -o "${TARBALL}" \
|
||||
"https://github.com/gitleaks/gitleaks/releases/download/v${GITLEAKS_VERSION}/${TARBALL}"
|
||||
echo "${GITLEAKS_SHA256} ${TARBALL}" | sha256sum -c -
|
||||
tar -xzf "${TARBALL}" gitleaks
|
||||
# Scan the whole history. Findings print to the log and fail the job.
|
||||
./gitleaks git --no-banner --redact --verbose .
|
||||
@@ -0,0 +1,80 @@
|
||||
# Workflow security (CI that audits the CI)
|
||||
#
|
||||
# Purpose: the GitHub Actions workflows themselves are an attack surface. A
|
||||
# poorly written workflow can leak the repository token, run attacker-supplied
|
||||
# code from a pull request, or pull in a tampered third-party action. These two
|
||||
# tools check every workflow file in this repo for those mistakes:
|
||||
#
|
||||
# - actionlint: catches workflow syntax errors and shell-script bugs inside
|
||||
# `run:` steps before they reach main.
|
||||
# - zizmor: a security linter for Actions. Flags template-injection holes,
|
||||
# unpinned actions, credential persistence, and over-broad token
|
||||
# permissions -- exactly the patterns the rest of this CI is built to avoid.
|
||||
#
|
||||
# Add this early: it then audits every workflow added after it.
|
||||
|
||||
name: Workflow security
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
branches: [main]
|
||||
workflow_dispatch:
|
||||
|
||||
# Default-deny token; each job grants only read access to the code.
|
||||
permissions: {}
|
||||
|
||||
concurrency:
|
||||
group: workflow-security-${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
actionlint:
|
||||
name: actionlint
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
# Pinned version + checksum so a tampered binary cannot run here.
|
||||
- name: Run actionlint (pinned, checksum-verified)
|
||||
env:
|
||||
ACTIONLINT_VERSION: 1.7.12
|
||||
ACTIONLINT_SHA256: 8aca8db96f1b94770f1b0d72b6dddcb1ebb8123cb3712530b08cc387b349a3d8
|
||||
run: |
|
||||
set -euo pipefail
|
||||
TARBALL="actionlint_${ACTIONLINT_VERSION}_linux_amd64.tar.gz"
|
||||
curl -fsSL -o "${TARBALL}" \
|
||||
"https://github.com/rhysd/actionlint/releases/download/v${ACTIONLINT_VERSION}/${TARBALL}"
|
||||
echo "${ACTIONLINT_SHA256} ${TARBALL}" | sha256sum -c -
|
||||
tar -xzf "${TARBALL}" actionlint
|
||||
./actionlint -color
|
||||
|
||||
zizmor:
|
||||
name: zizmor (Actions SAST)
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: '3.12'
|
||||
|
||||
# Pinned zizmor release. --offline keeps the audit hermetic (no network
|
||||
# calls about the actions it inspects); --min-severity=low surfaces
|
||||
# everything so nothing slips through under the gate.
|
||||
- name: Run zizmor
|
||||
run: |
|
||||
set -euo pipefail
|
||||
pip install zizmor==1.25.2
|
||||
zizmor --offline --min-severity=low .github/workflows/
|
||||
+13
@@ -14,6 +14,15 @@ venv/
|
||||
.env
|
||||
.env.bak.*
|
||||
!.env.example
|
||||
# Local uv lockfile (optional, per-platform — see "Faster installs with uv" in README)
|
||||
requirements.lock
|
||||
|
||||
# SOPS workflow — encrypted `secrets.env` is intentionally committable,
|
||||
# but every variant (plaintext, manual decrypt copy, editor backup)
|
||||
# must stay out of git. Mirrored in .dockerignore so the same artifacts
|
||||
# also cannot enter image build layers.
|
||||
secrets.env.*
|
||||
!secrets.env.example
|
||||
|
||||
# Data — all user data stays local
|
||||
data/
|
||||
@@ -61,6 +70,9 @@ output.txt.txt
|
||||
*.tiff
|
||||
*.pdf
|
||||
|
||||
# …except shipped static assets
|
||||
!static/icons/*.png
|
||||
|
||||
# …except shipped demo assets in docs/ that the README links to.
|
||||
!docs/*.jpg
|
||||
!docs/*.jpeg
|
||||
@@ -89,3 +101,4 @@ docs/windows-port/
|
||||
compound.config.json
|
||||
*.error.log
|
||||
_scratch/
|
||||
/odysseus/
|
||||
|
||||
+1
-1
@@ -1,4 +1,4 @@
|
||||
FROM python:3.12-slim
|
||||
FROM python:3.14-slim
|
||||
|
||||
# System deps. tmux is required by Cookbook for background downloads/serves.
|
||||
# openssh-client is required for Cookbook remote server tests, setup, probes,
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
|
||||
A self-hosted AI workspace -- meant to be the self-hosted version of the UI experience you get from ChatGPT and Claude. But with more jank and fun. Running on your own hardware, with your own data -- local-first, privacy-first, and no trojan.
|
||||
|
||||
[](https://repology.org/project/odysseus-ai/versions)
|
||||
|
||||
## Features
|
||||
- **Chat** -- chat with any local model or API; adding them is super simple.<br> <sub>vLLM · llama.cpp · Ollama · OpenRouter · OpenAI · GitHub Copilot</sub>
|
||||
- **Agent** -- hand it tools and let it run the whole task itself.<br> <sub>built on [opencode](https://github.com/anomalyco/opencode) · MCP · web · files · shell · skills · memory</sub>
|
||||
@@ -73,6 +75,10 @@ binds the web UI to `127.0.0.1` by default. If the port is taken, set
|
||||
`APP_PORT=7001` in `.env` and recreate the container. Set `APP_BIND=0.0.0.0`
|
||||
only when you intentionally want LAN/reverse-proxy access.
|
||||
|
||||
> **On Apple Silicon (M-series) Macs:** Docker can't reach the Metal GPU, so
|
||||
> Cookbook serves local models on CPU only. For GPU-accelerated model serving,
|
||||
> run natively instead — see [Apple Silicon](#apple-silicon) below.
|
||||
|
||||
### Native Linux / macOS
|
||||
```bash
|
||||
git clone https://github.com/pewdiepie-archdaemon/odysseus.git
|
||||
@@ -218,7 +224,7 @@ docker compose exec odysseus sh -lc 'test -e /dev/kfd && test -d /dev/dri && ls
|
||||
> the CUDA Toolkit at runtime. If Cookbook logs show `Unable to find cudart
|
||||
> library`, `Could NOT find CUDAToolkit`, `CUDA Toolkit not found`, or
|
||||
> tensors/layers assigned to CPU, that is a Cookbook/llama.cpp build issue —
|
||||
> not a Docker passthrough failure. Re-install the serve engine via
|
||||
> not a Docker passthrough failure. Reinstall the serve engine via
|
||||
> **Cookbook → Dependencies** to get a CUDA-enabled build.
|
||||
>
|
||||
> The same split applies to AMD/ROCm: seeing `/dev/kfd` and `/dev/dri` inside
|
||||
@@ -329,10 +335,29 @@ To expose Odysseus on a local network or Tailscale with HTTPS:
|
||||
| Package | Feature unlocked |
|
||||
|---------|-----------------|
|
||||
| `faster-whisper` | Local speech-to-text (microphone -> text) via the "local" STT provider. |
|
||||
| `duckduckgo-search` | DuckDuckGo as a search provider option. |
|
||||
| `ddgs` | DuckDuckGo as a search provider option. |
|
||||
| `PyMuPDF` | PDF page rendering in the side viewer panel and form-filling. (Note: AGPL-3.0) |
|
||||
| `markitdown` | Office/EPUB document text extraction (converts .docx/.xlsx/.pptx/.xls/.epub to Markdown). |
|
||||
|
||||
### Faster, reproducible installs with uv (optional)
|
||||
[uv](https://docs.astral.sh/uv/) works as a drop-in replacement for the
|
||||
venv + pip steps in the native install guides, no project changes are needed but this change results in faster installs along with a lockfile for reproducible environments. After [installing `uv`](https://docs.astral.sh/uv/getting-started/installation/), use:
|
||||
|
||||
```bash
|
||||
uv venv venv --python 3.13
|
||||
uv pip install -r requirements.txt
|
||||
# then continue as usual: python setup.py, uvicorn, ...
|
||||
```
|
||||
|
||||
`requirements.txt` is intentionally unpinned, so two installs at different times can produce different package versions. If you want a reproducible environment (e.g. across your own machines, or to roll back after a bad upgrade), snapshot and restore exact versions with:
|
||||
|
||||
```bash
|
||||
uv pip compile requirements.txt -o requirements.lock # snapshot current resolution
|
||||
uv pip sync requirements.lock # reproduce it exactly later
|
||||
```
|
||||
|
||||
`requirements.lock` is gitignored and platform-specific (compile it on the OS you deploy to). Regenerate it deliberately when you want to take upgrades. The plain `uv pip install -r requirements.txt` keeps following the unpinned requirements like pip does.
|
||||
|
||||
### Outlook / Office 365 email
|
||||
Odysseus email accounts currently use IMAP/SMTP username-password auth. Outlook
|
||||
and Microsoft 365 generally require OAuth instead, so normal Microsoft mailbox
|
||||
@@ -364,6 +389,7 @@ Odysseus serves plain HTTP on its app port. Docker Compose binds Odysseus and th
|
||||
4. Keep raw service and model ports internal-only.
|
||||
|
||||
Cloudflare Access, Tailscale, Caddy, nginx, and Traefik can all fit this pattern; none are required by Odysseus. If your access layer reaches Odysseus on the same host, proxy to `http://127.0.0.1:7000` and keep `AUTH_ENABLED=true`, `LOCALHOST_BYPASS=false`, and `SECURE_COOKIES=true`.
|
||||
`ALLOWED_ORIGINS` lists exact permitted origins for cross-origin browser/API clients; ordinary same-origin reverse-proxy access usually does not need a special CORS entry.
|
||||
|
||||
Common internal-only ports from the default docs/compose setup:
|
||||
|
||||
@@ -395,8 +421,11 @@ Key settings:
|
||||
| `SEARXNG_SECRET` | generated on first Docker boot | Optional SearXNG cookie/CSRF secret. Leave blank unless you need to pin it. |
|
||||
| `APP_BIND` | `127.0.0.1` | Docker Compose host bind address for the web UI. Use `0.0.0.0` only for intentional LAN/reverse-proxy access. |
|
||||
| `APP_PORT` | `7000` | Docker Compose host port for the web UI. |
|
||||
| `APP_DATA_DIR` | `./data` | Docker Compose host directory for application data volumes. |
|
||||
| `APP_LOGS_DIR` | `./logs` | Docker Compose host directory for application logs. |
|
||||
| `AUTH_ENABLED` | `true` | Enable/disable login |
|
||||
| `LOCALHOST_BYPASS` | `false` | Development-only auth bypass for loopback requests. Keep false for shared/network deployments. |
|
||||
| `ALLOWED_ORIGINS` | `http://localhost,http://127.0.0.1` | Comma-separated exact permitted origins for cross-origin browser/API clients. |
|
||||
| `SECURE_COOKIES` | `false` | Set true when serving Odysseus through HTTPS at a trusted proxy or private access gateway. |
|
||||
| `DATABASE_URL` | `sqlite:///./data/app.db` | Database connection string |
|
||||
| `CHROMADB_HOST` | `localhost` | ChromaDB host for vector memory. Docker overrides this to `chromadb`. |
|
||||
@@ -440,6 +469,9 @@ docs/ landing page (index.html) + preview clips
|
||||
All user data lives in `data/` (gitignored): `app.db` (sessions, messages, documents),
|
||||
`memory.json`, `presets.json`, `uploads/`, `personal_docs/`, `chroma/`, `settings.json`.
|
||||
|
||||
To back up or restore everything in `data/`, see the
|
||||
[Backup & Restore guide](docs/backup-restore.md).
|
||||
|
||||
## Star History
|
||||
|
||||
<a href="https://www.star-history.com/?repos=pewdiepie-archdaemon%2Fodysseus&type=date&legend=top-left">
|
||||
|
||||
@@ -47,6 +47,7 @@ from fastapi.responses import JSONResponse, FileResponse, HTMLResponse
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from starlette.middleware.base import BaseHTTPMiddleware
|
||||
from starlette.middleware.gzip import GZipMiddleware
|
||||
|
||||
# Core imports
|
||||
from core.constants import (
|
||||
@@ -55,7 +56,7 @@ from core.constants import (
|
||||
)
|
||||
from core.database import SessionLocal, ApiToken
|
||||
from core.middleware import SecurityHeadersMiddleware, is_cors_preflight
|
||||
from core.auth import AuthManager
|
||||
from core.auth import AuthManager, normalize_known_username
|
||||
from core.exceptions import (
|
||||
SessionNotFoundError, InvalidFileUploadError,
|
||||
LLMServiceError, WebSearchError,
|
||||
@@ -68,10 +69,37 @@ from src.generated_images import GENERATED_IMAGE_HEADERS, resolve_generated_imag
|
||||
from starlette.responses import RedirectResponse
|
||||
|
||||
# ========= LOGGING =========
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
)
|
||||
import logging.handlers
|
||||
from core.constants import DATA_DIR
|
||||
|
||||
_root_logger = logging.getLogger()
|
||||
_root_logger.setLevel(logging.INFO)
|
||||
_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
|
||||
# Clear existing handlers to avoid duplicates
|
||||
for _h in list(_root_logger.handlers):
|
||||
_root_logger.removeHandler(_h)
|
||||
|
||||
_console_h = logging.StreamHandler()
|
||||
_console_h.setFormatter(_formatter)
|
||||
_root_logger.addHandler(_console_h)
|
||||
|
||||
try:
|
||||
_log_dir = os.path.join(DATA_DIR, "logs")
|
||||
os.makedirs(_log_dir, exist_ok=True)
|
||||
_log_file = os.path.join(_log_dir, "app.log")
|
||||
|
||||
# RotatingFileHandler is not multi-process safe (e.g. if uvicorn is run with --workers N).
|
||||
# Odysseus is single-process by convention, so this is acceptable, but be aware that
|
||||
# concurrent log rotation issues can arise if multiple workers are configured.
|
||||
_file_h = logging.handlers.RotatingFileHandler(
|
||||
_log_file, maxBytes=5 * 1024 * 1024, backupCount=3, encoding="utf-8"
|
||||
)
|
||||
_file_h.setFormatter(_formatter)
|
||||
_root_logger.addHandler(_file_h)
|
||||
except Exception as e:
|
||||
_root_logger.warning(f"Failed to initialize file logging handler (falling back to console-only): {e}")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ========= APP =========
|
||||
@@ -104,6 +132,16 @@ app.add_middleware(
|
||||
],
|
||||
)
|
||||
|
||||
# ========= RESPONSE COMPRESSION (gzip) =========
|
||||
# The frontend's text assets (style.css, index.html, the JS bundles) shipped
|
||||
# uncompressed on every cold load. gzip cuts CSS/JS/HTML by ~75-85% on the wire
|
||||
# with no behavioural change. Starlette's GZipMiddleware excludes
|
||||
# `text/event-stream` by default, so the SSE streams (chat, shell, research,
|
||||
# model-probe — all served with media_type="text/event-stream") are never
|
||||
# compressed or buffered; only complete bodies over minimum_size are. The
|
||||
# security-header middleware composes cleanly on top.
|
||||
app.add_middleware(GZipMiddleware, minimum_size=1024, compresslevel=6)
|
||||
|
||||
# ========= SECURITY HEADERS MIDDLEWARE =========
|
||||
app.add_middleware(SecurityHeadersMiddleware)
|
||||
|
||||
@@ -129,6 +167,7 @@ _TIMEOUT_EXEMPT_PREFIXES = (
|
||||
"/api/cookbook/setup", # remote pacman/apt installs
|
||||
"/api/upload", # large files
|
||||
"/api/image", # diffusion proxies (inpaint/harmonize/upscale/etc.) — own 120s httpx timeout
|
||||
"/api/memory/audit", # retains own 120s LLM inactivity timeout
|
||||
)
|
||||
|
||||
|
||||
@@ -217,8 +256,16 @@ if AUTH_ENABLED:
|
||||
try:
|
||||
rows = db.query(ApiToken).filter(ApiToken.is_active == True).all()
|
||||
for r in rows:
|
||||
owner_key = normalize_known_username(auth_manager.users, getattr(r, "owner", None))
|
||||
if not owner_key:
|
||||
logger.warning(
|
||||
"Ignoring active API token '%s' for unknown auth user '%s'",
|
||||
getattr(r, "id", ""),
|
||||
getattr(r, "owner", None),
|
||||
)
|
||||
continue
|
||||
scopes = [s.strip() for s in (getattr(r, "scopes", "") or "chat").split(",") if s.strip()]
|
||||
new_map[r.token_prefix].append((r.id, r.token_hash, getattr(r, "owner", None), scopes))
|
||||
new_map[r.token_prefix].append((r.id, r.token_hash, owner_key, scopes))
|
||||
finally:
|
||||
db.close()
|
||||
_token_cache.clear()
|
||||
@@ -472,14 +519,20 @@ components = initialize_managers(BASE_DIR, rag_manager)
|
||||
session_manager = components["session_manager"]
|
||||
from src.assistant_log import set_session_manager as _set_asst_sm
|
||||
_set_asst_sm(session_manager)
|
||||
# Set the global session manager singleton (used by core.models.Session.add_message)
|
||||
from core.models import set_session_manager_instance
|
||||
set_session_manager_instance(session_manager)
|
||||
app.state.session_manager = session_manager
|
||||
memory_manager = components["memory_manager"]
|
||||
memory_vector = components.get("memory_vector")
|
||||
upload_handler = components["upload_handler"]
|
||||
app.state.upload_handler = upload_handler
|
||||
personal_docs_mgr = components["personal_docs_manager"]
|
||||
api_key_manager = components["api_key_manager"]
|
||||
preset_manager = components["preset_manager"]
|
||||
chat_processor = components["chat_processor"]
|
||||
research_handler = components["research_handler"]
|
||||
app.state.research_handler = research_handler
|
||||
chat_handler = components["chat_handler"]
|
||||
model_discovery = components["model_discovery"]
|
||||
skills_manager = components["skills_manager"]
|
||||
@@ -573,7 +626,7 @@ app.include_router(setup_preset_routes(preset_manager))
|
||||
|
||||
# Diagnostics
|
||||
from routes.diagnostics_routes import setup_diagnostics_routes
|
||||
app.include_router(setup_diagnostics_routes(rag_manager, rag_available, research_handler))
|
||||
app.include_router(setup_diagnostics_routes(rag_manager, rag_available, research_handler, memory_vector))
|
||||
|
||||
# Cleanup
|
||||
from routes.cleanup_routes import setup_cleanup_routes
|
||||
@@ -651,6 +704,9 @@ app.include_router(setup_shell_routes())
|
||||
from routes.cookbook_routes import setup_cookbook_routes
|
||||
app.include_router(setup_cookbook_routes())
|
||||
|
||||
from routes.workspace_routes import setup_workspace_routes
|
||||
app.include_router(setup_workspace_routes())
|
||||
|
||||
# Hardware model fitting (cookbook "What Fits?" tab)
|
||||
from routes.hwfit_routes import setup_hwfit_routes
|
||||
app.include_router(setup_hwfit_routes())
|
||||
@@ -923,16 +979,21 @@ async def _startup_event():
|
||||
async def _warmup_endpoints():
|
||||
try:
|
||||
import httpx
|
||||
endpoints = model_discovery.get_endpoints() if model_discovery else []
|
||||
for ep in endpoints[:5]:
|
||||
url = ep.get("url", "").replace("/chat/completions", "/models")
|
||||
if url:
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
await client.get(url)
|
||||
logger.info(f"Warmup ping OK: {url}")
|
||||
except Exception as e:
|
||||
logger.debug(f"Warmup ping failed for endpoint: {e}")
|
||||
# model_discovery has no get_endpoints(); that call raised
|
||||
# AttributeError every run and silently disabled warmup/keepalive.
|
||||
# Resolve the /models probe URLs via the real discovery API, off the
|
||||
# event loop since discovery does a blocking port scan.
|
||||
urls = (
|
||||
await asyncio.to_thread(model_discovery.warmup_ping_urls)
|
||||
if model_discovery else []
|
||||
)
|
||||
for url in urls:
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5.0) as client:
|
||||
await client.get(url)
|
||||
logger.info(f"Warmup ping OK: {url}")
|
||||
except Exception as e:
|
||||
logger.debug(f"Warmup ping failed for endpoint: {e}")
|
||||
except Exception as e:
|
||||
logger.debug(f"Warmup ping skipped: {e}")
|
||||
|
||||
|
||||
+129
-13
@@ -3,6 +3,7 @@ Authentication module — multi-user password hashing, session tokens, config pe
|
||||
Config stored in data/auth.json. Uses bcrypt directly.
|
||||
"""
|
||||
|
||||
import enum
|
||||
import json
|
||||
import os
|
||||
import secrets
|
||||
@@ -67,6 +68,14 @@ TOKEN_TTL = 60 * 60 * 24 * 7 # 7 days
|
||||
RESERVED_USERNAMES = frozenset({"internal-tool", "api", "demo", "system"})
|
||||
|
||||
|
||||
def normalize_known_username(users: Dict[str, Any], username: str | None) -> Optional[str]:
|
||||
"""Return a normalized username only when it exists in the auth user map."""
|
||||
key = str(username or "").strip().lower()
|
||||
if not key or key not in users:
|
||||
return None
|
||||
return key
|
||||
|
||||
|
||||
def _hash_password(password: str) -> str:
|
||||
return bcrypt.hashpw(password.encode("utf-8"), bcrypt.gensalt()).decode("utf-8")
|
||||
|
||||
@@ -75,6 +84,15 @@ def _verify_password(password: str, hashed: str) -> bool:
|
||||
return bcrypt.checkpw(password.encode("utf-8"), hashed.encode("utf-8"))
|
||||
|
||||
|
||||
class SetAdminResult(enum.Enum):
|
||||
"""Outcome of AuthManager.set_admin, so callers can map each case to a
|
||||
precise response instead of guessing from a bare bool."""
|
||||
OK = "ok"
|
||||
USER_NOT_FOUND = "user_not_found"
|
||||
NOT_AUTHORIZED = "not_authorized" # requester is not an admin
|
||||
LAST_ADMIN = "last_admin" # would remove the last remaining admin
|
||||
|
||||
|
||||
class AuthManager:
|
||||
"""Manages multi-user password + session-token auth system."""
|
||||
|
||||
@@ -96,6 +114,7 @@ class AuthManager:
|
||||
self._load()
|
||||
self._load_sessions()
|
||||
self._migrate_single_user()
|
||||
self._drop_reserved_loaded_users()
|
||||
self._migrate_legacy_admin_role()
|
||||
|
||||
def _load(self):
|
||||
@@ -148,7 +167,13 @@ class AuthManager:
|
||||
def _migrate_single_user(self):
|
||||
"""Migrate old single-user format to multi-user format."""
|
||||
if "password_hash" in self._config and "users" not in self._config:
|
||||
old_user = self._config.get("username", "admin")
|
||||
old_user = str(self._config.get("username", "admin") or "admin").strip().lower()
|
||||
if old_user in RESERVED_USERNAMES:
|
||||
logger.warning(
|
||||
"Migrating legacy single-user reserved username '%s' to 'admin'",
|
||||
old_user,
|
||||
)
|
||||
old_user = "admin"
|
||||
old_hash = self._config["password_hash"]
|
||||
self._config = {
|
||||
"users": {
|
||||
@@ -162,6 +187,30 @@ class AuthManager:
|
||||
self._save()
|
||||
logger.info(f"Migrated single-user auth to multi-user (admin: {old_user})")
|
||||
|
||||
def _drop_reserved_loaded_users(self):
|
||||
"""Fail closed for legacy/manual auth rows that collide with sentinels."""
|
||||
users = self._config.get("users")
|
||||
if not isinstance(users, dict):
|
||||
return
|
||||
normalized = {}
|
||||
removed = []
|
||||
for username, data in users.items():
|
||||
key = str(username or "").strip().lower()
|
||||
if not key:
|
||||
continue
|
||||
if key in RESERVED_USERNAMES:
|
||||
removed.append(key)
|
||||
continue
|
||||
normalized[key] = data
|
||||
if removed or normalized != users:
|
||||
self._config["users"] = normalized
|
||||
self._save()
|
||||
if removed:
|
||||
logger.warning(
|
||||
"Removed reserved username(s) from auth config: %s",
|
||||
", ".join(sorted(set(removed))),
|
||||
)
|
||||
|
||||
def _migrate_legacy_admin_role(self):
|
||||
"""Normalize setup.py's old role='admin' marker to is_admin=True."""
|
||||
changed = False
|
||||
@@ -244,6 +293,22 @@ class AuthManager:
|
||||
return False
|
||||
if not self.users.get(requesting_user, {}).get("is_admin"):
|
||||
return False
|
||||
# Revoke API bearer tokens before removing the auth row. The bearer
|
||||
# path authenticates from ApiToken rows and does not require the
|
||||
# owner to still exist, so a successful delete must not leave active
|
||||
# rows behind. If the token store is unavailable, fail closed and
|
||||
# keep the user/session state intact so the admin can retry.
|
||||
try:
|
||||
from core.database import get_db_session, ApiToken
|
||||
with get_db_session() as db:
|
||||
removed_tokens = db.query(ApiToken).filter(ApiToken.owner == username).delete()
|
||||
if removed_tokens:
|
||||
logger.info(
|
||||
f"Revoked {removed_tokens} API token(s) owned by deleted user '{username}'"
|
||||
)
|
||||
except Exception:
|
||||
logger.warning(f"Failed to revoke API tokens for deleted user '{username}'")
|
||||
return False
|
||||
del self._config["users"][username]
|
||||
self._save()
|
||||
# Purge all sessions belonging to this user. validate_token doesn't
|
||||
@@ -258,18 +323,6 @@ class AuthManager:
|
||||
revoked += 1
|
||||
if revoked:
|
||||
self._save_sessions()
|
||||
# Also revoke API bearer tokens owned by this user. The bearer auth
|
||||
# path authenticates straight against ApiToken rows and never
|
||||
# re-checks that the owner still exists, so leaving the rows behind
|
||||
# would let a deleted user keep full API access indefinitely.
|
||||
try:
|
||||
from core.database import get_db_session, ApiToken
|
||||
with get_db_session() as db:
|
||||
removed = db.query(ApiToken).filter(ApiToken.owner == username).delete()
|
||||
if removed:
|
||||
logger.info(f"Revoked {removed} API token(s) owned by deleted user '{username}'")
|
||||
except Exception:
|
||||
logger.warning(f"Failed to revoke API tokens for deleted user '{username}'")
|
||||
logger.info(f"Deleted user '{username}' (by {requesting_user}); revoked {revoked} active session(s)")
|
||||
return True
|
||||
|
||||
@@ -344,6 +397,69 @@ class AuthManager:
|
||||
logger.info(f"Updated privileges for '{username}': {current}")
|
||||
return True
|
||||
|
||||
def set_admin(self, username: str, is_admin: bool,
|
||||
requesting_user: str) -> SetAdminResult:
|
||||
"""Promote/demote an existing user to/from admin. Admin only.
|
||||
|
||||
Refuses to remove the last remaining admin so the instance can never
|
||||
be locked out of admin access; self-demotion is allowed as long as
|
||||
another admin remains. Admin status is re-checked live on every
|
||||
request, so unlike delete/rename no session or token revocation is
|
||||
needed — a demoted admin simply fails the next is_admin() gate.
|
||||
|
||||
Promotion stashes the user's current privilege map and demotion
|
||||
restores it, so a temporary admin stint can't silently broaden a
|
||||
user's non-admin access; users without a stash (created as admin,
|
||||
or promoted before stashing existed) demote to DEFAULT_PRIVILEGES.
|
||||
|
||||
Counting admins and flipping the flag happen in one critical section
|
||||
so two concurrent demotions can't race the admin count to zero.
|
||||
"""
|
||||
username = (username or "").strip().lower()
|
||||
requesting_user = (requesting_user or "").strip().lower()
|
||||
is_admin = bool(is_admin)
|
||||
with self._config_lock:
|
||||
target = self._config.get("users", {}).get(username)
|
||||
if target is None:
|
||||
return SetAdminResult.USER_NOT_FOUND
|
||||
if not self.users.get(requesting_user, {}).get("is_admin"):
|
||||
return SetAdminResult.NOT_AUTHORIZED
|
||||
currently_admin = bool(target.get("is_admin"))
|
||||
if currently_admin == is_admin:
|
||||
return SetAdminResult.OK # no-op; leave privileges untouched
|
||||
if currently_admin and not is_admin:
|
||||
admin_count = sum(1 for d in self.users.values() if d.get("is_admin"))
|
||||
if admin_count <= 1:
|
||||
return SetAdminResult.LAST_ADMIN
|
||||
# Write order matters for lock-free readers: get_privileges()
|
||||
# reads without _config_lock and trusts is_admin, so the admin
|
||||
# flag must be flipped while the stored map is safe to expose —
|
||||
# before writing admin privileges on promote, after restoring
|
||||
# the pre-admin map on demote.
|
||||
if is_admin:
|
||||
target["is_admin"] = True
|
||||
# Stash the pre-admin map so a later demotion can restore it.
|
||||
# While is_admin is set the stored map is inert: get_privileges
|
||||
# short-circuits to ADMIN_PRIVILEGES and set_privileges refuses
|
||||
# admins, so only set_admin ever touches the stash.
|
||||
target["privileges_before_admin"] = dict(
|
||||
target.get("privileges") or DEFAULT_PRIVILEGES
|
||||
)
|
||||
target["privileges"] = dict(ADMIN_PRIVILEGES)
|
||||
else:
|
||||
# Restore the stashed pre-admin map. Fall back to defaults for
|
||||
# users created as admins (their stored map is ADMIN_PRIVILEGES,
|
||||
# which must not leak past demotion — e.g. can_use_bash) and
|
||||
# for admins promoted before the stash existed.
|
||||
target["privileges"] = dict(
|
||||
target.pop("privileges_before_admin", None)
|
||||
or DEFAULT_PRIVILEGES
|
||||
)
|
||||
target["is_admin"] = False
|
||||
self._save()
|
||||
logger.info("Set is_admin=%s for '%s' (by '%s')", is_admin, username, requesting_user)
|
||||
return SetAdminResult.OK
|
||||
|
||||
def change_password(self, username: str, current_password: str, new_password: str) -> bool:
|
||||
username = username.strip().lower()
|
||||
if username not in self.users:
|
||||
|
||||
+194
-25
@@ -688,6 +688,7 @@ def _migrate_add_last_message_at_column():
|
||||
db_path = DATABASE_URL.replace("sqlite:///", "")
|
||||
if not os.path.exists(db_path):
|
||||
return
|
||||
conn = None
|
||||
try:
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.execute("PRAGMA table_info(sessions)")
|
||||
@@ -713,10 +714,14 @@ def _migrate_add_last_message_at_column():
|
||||
"ON sessions(archived, last_message_at)"
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
logging.getLogger(__name__).info("Migrated: added + backfilled 'last_message_at' on sessions")
|
||||
except Exception as e:
|
||||
logging.getLogger(__name__).warning(f"last_message_at migration failed: {e}")
|
||||
finally:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _migrate_add_document_archived_column():
|
||||
"""Add `archived` to documents (soft-archive flag). Guarded + idempotent."""
|
||||
@@ -724,6 +729,7 @@ def _migrate_add_document_archived_column():
|
||||
db_path = DATABASE_URL.replace("sqlite:///", "")
|
||||
if not os.path.exists(db_path):
|
||||
return
|
||||
conn = None
|
||||
try:
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.execute("PRAGMA table_info(documents)")
|
||||
@@ -732,9 +738,13 @@ def _migrate_add_document_archived_column():
|
||||
conn.execute("ALTER TABLE documents ADD COLUMN archived BOOLEAN DEFAULT 0")
|
||||
conn.commit()
|
||||
logging.getLogger(__name__).info("Migrated: added 'archived' to documents")
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logging.getLogger(__name__).warning(f"documents.archived migration failed: {e}")
|
||||
finally:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _migrate_add_owner_column():
|
||||
@@ -743,6 +753,7 @@ def _migrate_add_owner_column():
|
||||
db_path = DATABASE_URL.replace("sqlite:///", "")
|
||||
if not os.path.exists(db_path):
|
||||
return
|
||||
conn = None
|
||||
try:
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.execute("PRAGMA table_info(sessions)")
|
||||
@@ -752,9 +763,13 @@ def _migrate_add_owner_column():
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS ix_sessions_owner ON sessions(owner)")
|
||||
conn.commit()
|
||||
logging.getLogger(__name__).info("Migrated: added 'owner' column to sessions")
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logging.getLogger(__name__).warning(f"Migration check failed: {e}")
|
||||
finally:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _migrate_model_endpoints():
|
||||
"""Recreate model_endpoints table if schema changed (url->base_url)."""
|
||||
@@ -762,6 +777,7 @@ def _migrate_model_endpoints():
|
||||
db_path = DATABASE_URL.replace("sqlite:///", "")
|
||||
if not os.path.exists(db_path):
|
||||
return
|
||||
conn = None
|
||||
try:
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.execute("PRAGMA table_info(model_endpoints)")
|
||||
@@ -770,9 +786,13 @@ def _migrate_model_endpoints():
|
||||
conn.execute("DROP TABLE IF EXISTS model_endpoints")
|
||||
conn.commit()
|
||||
logging.getLogger(__name__).info("Migrated: dropped old model_endpoints table (schema change)")
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logging.getLogger(__name__).warning(f"model_endpoints migration check failed: {e}")
|
||||
finally:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _migrate_add_hidden_models_column():
|
||||
"""Add hidden_models column to model_endpoints if it doesn't exist."""
|
||||
@@ -780,6 +800,7 @@ def _migrate_add_hidden_models_column():
|
||||
db_path = DATABASE_URL.replace("sqlite:///", "")
|
||||
if not os.path.exists(db_path):
|
||||
return
|
||||
conn = None
|
||||
try:
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.execute("PRAGMA table_info(model_endpoints)")
|
||||
@@ -788,9 +809,13 @@ def _migrate_add_hidden_models_column():
|
||||
conn.execute("ALTER TABLE model_endpoints ADD COLUMN hidden_models TEXT")
|
||||
conn.commit()
|
||||
logging.getLogger(__name__).info("Migrated: added 'hidden_models' column to model_endpoints")
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logging.getLogger(__name__).warning(f"hidden_models migration failed: {e}")
|
||||
finally:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _migrate_add_model_endpoint_owner_column():
|
||||
"""Add owner column to model_endpoints if it doesn't exist.
|
||||
@@ -805,6 +830,7 @@ def _migrate_add_model_endpoint_owner_column():
|
||||
db_path = DATABASE_URL.replace("sqlite:///", "")
|
||||
if not os.path.exists(db_path):
|
||||
return
|
||||
conn = None
|
||||
try:
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.execute("PRAGMA table_info(model_endpoints)")
|
||||
@@ -814,9 +840,13 @@ def _migrate_add_model_endpoint_owner_column():
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS ix_model_endpoints_owner ON model_endpoints(owner)")
|
||||
conn.commit()
|
||||
logging.getLogger(__name__).info("Migrated: added 'owner' column + index to model_endpoints")
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logging.getLogger(__name__).warning(f"model_endpoints.owner migration failed: {e}")
|
||||
finally:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _migrate_add_provider_auth_id_column():
|
||||
@@ -825,6 +855,7 @@ def _migrate_add_provider_auth_id_column():
|
||||
db_path = DATABASE_URL.replace("sqlite:///", "")
|
||||
if not os.path.exists(db_path):
|
||||
return
|
||||
conn = None
|
||||
try:
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.execute("PRAGMA table_info(model_endpoints)")
|
||||
@@ -834,9 +865,13 @@ def _migrate_add_provider_auth_id_column():
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS ix_model_endpoints_provider_auth_id ON model_endpoints(provider_auth_id)")
|
||||
conn.commit()
|
||||
logging.getLogger(__name__).info("Migrated: added 'provider_auth_id' column + index to model_endpoints")
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logging.getLogger(__name__).warning(f"model_endpoints.provider_auth_id migration failed: {e}")
|
||||
finally:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _migrate_add_model_type_column():
|
||||
@@ -845,6 +880,7 @@ def _migrate_add_model_type_column():
|
||||
db_path = DATABASE_URL.replace("sqlite:///", "")
|
||||
if not os.path.exists(db_path):
|
||||
return
|
||||
conn = None
|
||||
try:
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.execute("PRAGMA table_info(model_endpoints)")
|
||||
@@ -853,9 +889,13 @@ def _migrate_add_model_type_column():
|
||||
conn.execute("ALTER TABLE model_endpoints ADD COLUMN model_type TEXT DEFAULT 'llm'")
|
||||
conn.commit()
|
||||
logging.getLogger(__name__).info("Migrated: added 'model_type' column to model_endpoints")
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logging.getLogger(__name__).warning(f"model_type migration failed: {e}")
|
||||
finally:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _migrate_add_model_endpoint_refresh_columns():
|
||||
"""Add endpoint classification / refresh policy columns if missing."""
|
||||
@@ -863,6 +903,7 @@ def _migrate_add_model_endpoint_refresh_columns():
|
||||
db_path = DATABASE_URL.replace("sqlite:///", "")
|
||||
if not os.path.exists(db_path):
|
||||
return
|
||||
conn = None
|
||||
try:
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.execute("PRAGMA table_info(model_endpoints)")
|
||||
@@ -876,9 +917,13 @@ def _migrate_add_model_endpoint_refresh_columns():
|
||||
if columns and "model_refresh_timeout" not in columns:
|
||||
conn.execute("ALTER TABLE model_endpoints ADD COLUMN model_refresh_timeout INTEGER")
|
||||
conn.commit()
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logging.getLogger(__name__).warning(f"model_endpoints refresh-policy migration failed: {e}")
|
||||
finally:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _migrate_add_task_run_model_column():
|
||||
"""Add model column to task_runs if it doesn't exist (records which model ran)."""
|
||||
@@ -886,6 +931,7 @@ def _migrate_add_task_run_model_column():
|
||||
db_path = DATABASE_URL.replace("sqlite:///", "")
|
||||
if not os.path.exists(db_path):
|
||||
return
|
||||
conn = None
|
||||
try:
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.execute("PRAGMA table_info(task_runs)")
|
||||
@@ -894,9 +940,13 @@ def _migrate_add_task_run_model_column():
|
||||
conn.execute("ALTER TABLE task_runs ADD COLUMN model TEXT")
|
||||
conn.commit()
|
||||
logging.getLogger(__name__).info("Migrated: added 'model' column to task_runs")
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logging.getLogger(__name__).warning(f"task_runs model migration failed: {e}")
|
||||
finally:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _migrate_add_supports_tools_column():
|
||||
"""Add supports_tools column to model_endpoints if it doesn't exist."""
|
||||
@@ -904,6 +954,7 @@ def _migrate_add_supports_tools_column():
|
||||
db_path = DATABASE_URL.replace("sqlite:///", "")
|
||||
if not os.path.exists(db_path):
|
||||
return
|
||||
conn = None
|
||||
try:
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.execute("PRAGMA table_info(model_endpoints)")
|
||||
@@ -912,9 +963,13 @@ def _migrate_add_supports_tools_column():
|
||||
conn.execute("ALTER TABLE model_endpoints ADD COLUMN supports_tools BOOLEAN")
|
||||
conn.commit()
|
||||
logging.getLogger(__name__).info("Migrated: added 'supports_tools' column to model_endpoints")
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logging.getLogger(__name__).warning(f"supports_tools migration failed: {e}")
|
||||
finally:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _migrate_add_cached_models_column():
|
||||
@@ -923,6 +978,7 @@ def _migrate_add_cached_models_column():
|
||||
db_path = DATABASE_URL.replace("sqlite:///", "")
|
||||
if not os.path.exists(db_path):
|
||||
return
|
||||
conn = None
|
||||
try:
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.execute("PRAGMA table_info(model_endpoints)")
|
||||
@@ -930,9 +986,13 @@ def _migrate_add_cached_models_column():
|
||||
if columns and "cached_models" not in columns:
|
||||
conn.execute("ALTER TABLE model_endpoints ADD COLUMN cached_models TEXT")
|
||||
conn.commit()
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logging.getLogger(__name__).warning(f"cached_models migration failed: {e}")
|
||||
finally:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _migrate_add_pinned_models_column():
|
||||
"""Add pinned_models column to model_endpoints if it doesn't exist."""
|
||||
@@ -940,6 +1000,7 @@ def _migrate_add_pinned_models_column():
|
||||
db_path = DATABASE_URL.replace("sqlite:///", "")
|
||||
if not os.path.exists(db_path):
|
||||
return
|
||||
conn = None
|
||||
try:
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.execute("PRAGMA table_info(model_endpoints)")
|
||||
@@ -948,9 +1009,13 @@ def _migrate_add_pinned_models_column():
|
||||
conn.execute("ALTER TABLE model_endpoints ADD COLUMN pinned_models TEXT")
|
||||
conn.commit()
|
||||
logging.getLogger(__name__).info("Migrated: added 'pinned_models' column to model_endpoints")
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logging.getLogger(__name__).warning(f"pinned_models migration failed: {e}")
|
||||
finally:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _migrate_add_notes_sort_order():
|
||||
"""Add sort_order, image_url, repeat columns to notes if they don't exist."""
|
||||
@@ -958,6 +1023,7 @@ def _migrate_add_notes_sort_order():
|
||||
db_path = DATABASE_URL.replace("sqlite:///", "")
|
||||
if not os.path.exists(db_path):
|
||||
return
|
||||
conn = None
|
||||
try:
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.execute("PRAGMA table_info(notes)")
|
||||
@@ -975,9 +1041,13 @@ def _migrate_add_notes_sort_order():
|
||||
if columns and "agent_session_id" not in columns:
|
||||
conn.execute("ALTER TABLE notes ADD COLUMN agent_session_id TEXT")
|
||||
conn.commit()
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logging.getLogger(__name__).warning(f"notes migration failed: {e}")
|
||||
finally:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _migrate_add_mode_column():
|
||||
"""Add mode column to sessions table if it doesn't exist."""
|
||||
@@ -985,6 +1055,7 @@ def _migrate_add_mode_column():
|
||||
db_path = DATABASE_URL.replace("sqlite:///", "")
|
||||
if not os.path.exists(db_path):
|
||||
return
|
||||
conn = None
|
||||
try:
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.execute("PRAGMA table_info(sessions)")
|
||||
@@ -993,9 +1064,13 @@ def _migrate_add_mode_column():
|
||||
conn.execute("ALTER TABLE sessions ADD COLUMN mode TEXT")
|
||||
conn.commit()
|
||||
logging.getLogger(__name__).info("Migrated: added 'mode' column to sessions")
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logging.getLogger(__name__).warning(f"Migration check for mode failed: {e}")
|
||||
finally:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _migrate_add_folder_column():
|
||||
"""Add folder column to sessions table if it doesn't exist."""
|
||||
@@ -1003,6 +1078,7 @@ def _migrate_add_folder_column():
|
||||
db_path = DATABASE_URL.replace("sqlite:///", "")
|
||||
if not os.path.exists(db_path):
|
||||
return
|
||||
conn = None
|
||||
try:
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.execute("PRAGMA table_info(sessions)")
|
||||
@@ -1011,9 +1087,13 @@ def _migrate_add_folder_column():
|
||||
conn.execute("ALTER TABLE sessions ADD COLUMN folder TEXT")
|
||||
conn.commit()
|
||||
logging.getLogger(__name__).info("Migrated: added 'folder' column to sessions")
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logging.getLogger(__name__).warning(f"Migration check for folder failed: {e}")
|
||||
finally:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _migrate_add_token_columns():
|
||||
"""Add cumulative token tracking columns to sessions table."""
|
||||
@@ -1021,6 +1101,7 @@ def _migrate_add_token_columns():
|
||||
db_path = DATABASE_URL.replace("sqlite:///", "")
|
||||
if not os.path.exists(db_path):
|
||||
return
|
||||
conn = None
|
||||
try:
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.execute("PRAGMA table_info(sessions)")
|
||||
@@ -1030,9 +1111,13 @@ def _migrate_add_token_columns():
|
||||
conn.execute("ALTER TABLE sessions ADD COLUMN total_output_tokens INTEGER DEFAULT 0")
|
||||
conn.commit()
|
||||
logging.getLogger(__name__).info("Migrated: added token tracking columns to sessions")
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logging.getLogger(__name__).warning(f"Migration check for token columns failed: {e}")
|
||||
finally:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _migrate_add_owner_to_table(table_name: str, index_name: str):
|
||||
"""Generic helper: add owner TEXT column + index to a table if missing."""
|
||||
@@ -1040,6 +1125,7 @@ def _migrate_add_owner_to_table(table_name: str, index_name: str):
|
||||
db_path = DATABASE_URL.replace("sqlite:///", "")
|
||||
if not os.path.exists(db_path):
|
||||
return
|
||||
conn = None
|
||||
try:
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.execute(f"PRAGMA table_info({table_name})")
|
||||
@@ -1049,9 +1135,13 @@ def _migrate_add_owner_to_table(table_name: str, index_name: str):
|
||||
conn.execute(f"CREATE INDEX IF NOT EXISTS {index_name} ON {table_name}(owner)")
|
||||
conn.commit()
|
||||
logging.getLogger(__name__).info(f"Migrated: added 'owner' column to {table_name}")
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logging.getLogger(__name__).warning(f"Migration owner column for {table_name} failed: {e}")
|
||||
finally:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _migrate_add_multiuser_owner_columns():
|
||||
"""Add owner column to memories, gallery_images, user_tools, comparisons."""
|
||||
@@ -1076,6 +1166,7 @@ def _migrate_add_api_token_scopes_column():
|
||||
db_path = DATABASE_URL.replace("sqlite:///", "")
|
||||
if not os.path.exists(db_path):
|
||||
return
|
||||
conn = None
|
||||
try:
|
||||
conn = sqlite3.connect(db_path)
|
||||
columns = [row[1] for row in conn.execute("PRAGMA table_info(api_tokens)").fetchall()]
|
||||
@@ -1084,9 +1175,13 @@ def _migrate_add_api_token_scopes_column():
|
||||
conn.execute("UPDATE api_tokens SET scopes = 'chat' WHERE scopes IS NULL OR scopes = ''")
|
||||
conn.commit()
|
||||
logging.getLogger(__name__).info("Migrated: added scopes column to api_tokens")
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logging.getLogger(__name__).warning(f"api_tokens.scopes migration failed: {e}")
|
||||
finally:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _migrate_assign_legacy_owner():
|
||||
"""Assign all null-owner data to the first (admin) user.
|
||||
@@ -1128,6 +1223,7 @@ def _migrate_assign_legacy_owner():
|
||||
return
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
conn = None
|
||||
try:
|
||||
conn = sqlite3.connect(db_path)
|
||||
# Every table with an `owner` column. New tables added later will be
|
||||
@@ -1152,9 +1248,13 @@ def _migrate_assign_legacy_owner():
|
||||
except Exception as e:
|
||||
logger.warning(f"Legacy owner assignment for {table} failed: {e}")
|
||||
conn.commit()
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logger.warning(f"Legacy owner migration failed: {e}")
|
||||
finally:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Also migrate memory.json
|
||||
mem_path = MEMORY_FILE
|
||||
@@ -1502,6 +1602,7 @@ class CalendarCal(TimestampMixin, Base):
|
||||
# NULL for local calendars and for CalDAV calendars created before
|
||||
# multi-account support was added (treated as "use any configured account").
|
||||
account_id = Column(String, nullable=True, index=True)
|
||||
caldav_base_url = Column(String, nullable=True)
|
||||
|
||||
events = relationship("CalendarEvent", back_populates="calendar", cascade="all, delete-orphan")
|
||||
|
||||
@@ -1532,10 +1633,27 @@ class CalendarEvent(TimestampMixin, Base):
|
||||
# vanishes upstream). NULL/local = created locally (agent, email triage, or
|
||||
# a UI event whose write-back failed) and must NOT be pruned by the sync.
|
||||
origin = Column(String, nullable=True, index=True)
|
||||
remote_href = Column(String, nullable=True) # CalDAV object URL for updates/deletes
|
||||
remote_etag = Column(String, nullable=True) # Last seen CalDAV ETag, when available
|
||||
caldav_sync_pending = Column(String, nullable=True) # create | update | delete retry marker
|
||||
|
||||
calendar = relationship("CalendarCal", back_populates="events")
|
||||
|
||||
|
||||
class CalendarDeletedEvent(TimestampMixin, Base):
|
||||
"""Hidden CalDAV delete tombstone retained until remote delete succeeds."""
|
||||
__tablename__ = "caldav_deleted_events"
|
||||
|
||||
uid = Column(String, primary_key=True, index=True)
|
||||
owner = Column(String, nullable=True, index=True)
|
||||
calendar_id = Column(String, nullable=True, index=True)
|
||||
remote_href = Column(String, nullable=True)
|
||||
remote_etag = Column(String, nullable=True)
|
||||
caldav_base_url = Column(String, nullable=True)
|
||||
summary = Column(String, nullable=True)
|
||||
last_error = Column(Text, nullable=True)
|
||||
|
||||
|
||||
class Integration(TimestampMixin, Base):
|
||||
"""An external service connection (email, RSS, webhook, etc.)."""
|
||||
__tablename__ = "integrations"
|
||||
@@ -1667,6 +1785,7 @@ def init_db():
|
||||
_migrate_add_calendar_is_utc()
|
||||
_migrate_add_calendar_origin()
|
||||
_migrate_add_calendar_account_id()
|
||||
_migrate_add_caldav_sync_columns()
|
||||
_migrate_chat_messages_fts()
|
||||
_migrate_encrypt_email_passwords()
|
||||
_migrate_encrypt_signatures()
|
||||
@@ -1773,6 +1892,7 @@ def _migrate_add_email_smtp_security():
|
||||
db_path = DATABASE_URL.replace("sqlite:///", "")
|
||||
if not os.path.exists(db_path):
|
||||
return
|
||||
conn = None
|
||||
try:
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.execute("PRAGMA table_info(email_accounts)")
|
||||
@@ -1788,9 +1908,13 @@ def _migrate_add_email_smtp_security():
|
||||
)
|
||||
conn.commit()
|
||||
logging.getLogger(__name__).info("Migrated: added smtp_security column to email_accounts")
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logging.getLogger(__name__).warning(f"smtp_security migration skipped: {e}")
|
||||
finally:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _migrate_encrypt_endpoint_keys():
|
||||
@@ -1891,6 +2015,7 @@ def _migrate_add_calendar_is_utc():
|
||||
db_path = DATABASE_URL.replace("sqlite:///", "")
|
||||
if not os.path.exists(db_path):
|
||||
return
|
||||
conn = None
|
||||
try:
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.execute("PRAGMA table_info(calendar_events)")
|
||||
@@ -1899,9 +2024,13 @@ def _migrate_add_calendar_is_utc():
|
||||
conn.execute("ALTER TABLE calendar_events ADD COLUMN is_utc BOOLEAN DEFAULT 0 NOT NULL")
|
||||
conn.commit()
|
||||
logging.getLogger(__name__).info("Migrated: added 'is_utc' column to calendar_events")
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logging.getLogger(__name__).warning(f"is_utc migration failed: {e}")
|
||||
finally:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _migrate_add_calendar_origin():
|
||||
@@ -1912,6 +2041,7 @@ def _migrate_add_calendar_origin():
|
||||
db_path = DATABASE_URL.replace("sqlite:///", "")
|
||||
if not os.path.exists(db_path):
|
||||
return
|
||||
conn = None
|
||||
try:
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.execute("PRAGMA table_info(calendar_events)")
|
||||
@@ -1921,9 +2051,13 @@ def _migrate_add_calendar_origin():
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS ix_calendar_events_origin ON calendar_events(origin)")
|
||||
conn.commit()
|
||||
logging.getLogger(__name__).info("Migrated: added 'origin' column to calendar_events")
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logging.getLogger(__name__).warning(f"calendar_events.origin migration failed: {e}")
|
||||
finally:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _migrate_add_calendar_account_id():
|
||||
@@ -1933,6 +2067,7 @@ def _migrate_add_calendar_account_id():
|
||||
db_path = DATABASE_URL.replace("sqlite:///", "")
|
||||
if not os.path.exists(db_path):
|
||||
return
|
||||
conn = None
|
||||
try:
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.execute("PRAGMA table_info(calendars)")
|
||||
@@ -1942,9 +2077,38 @@ def _migrate_add_calendar_account_id():
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS ix_calendars_account_id ON calendars(account_id)")
|
||||
conn.commit()
|
||||
logging.getLogger(__name__).info("Migrated: added 'account_id' column to calendars")
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logging.getLogger(__name__).warning(f"calendars.account_id migration failed: {e}")
|
||||
finally:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _migrate_add_caldav_sync_columns():
|
||||
"""Add remote CalDAV metadata used for bidirectional sync."""
|
||||
import sqlite3
|
||||
db_path = DATABASE_URL.replace("sqlite:///", "")
|
||||
if not os.path.exists(db_path):
|
||||
return
|
||||
try:
|
||||
conn = sqlite3.connect(db_path)
|
||||
ev_columns = [row[1] for row in conn.execute("PRAGMA table_info(calendar_events)").fetchall()]
|
||||
if ev_columns and "remote_href" not in ev_columns:
|
||||
conn.execute("ALTER TABLE calendar_events ADD COLUMN remote_href TEXT")
|
||||
if ev_columns and "remote_etag" not in ev_columns:
|
||||
conn.execute("ALTER TABLE calendar_events ADD COLUMN remote_etag TEXT")
|
||||
if ev_columns and "caldav_sync_pending" not in ev_columns:
|
||||
conn.execute("ALTER TABLE calendar_events ADD COLUMN caldav_sync_pending TEXT")
|
||||
|
||||
cal_columns = [row[1] for row in conn.execute("PRAGMA table_info(calendars)").fetchall()]
|
||||
if cal_columns and "caldav_base_url" not in cal_columns:
|
||||
conn.execute("ALTER TABLE calendars ADD COLUMN caldav_base_url TEXT")
|
||||
conn.commit()
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logging.getLogger(__name__).warning(f"CalDAV sync metadata migration failed: {e}")
|
||||
|
||||
|
||||
def _migrate_add_calendar_metadata():
|
||||
@@ -1953,6 +2117,7 @@ def _migrate_add_calendar_metadata():
|
||||
db_path = DATABASE_URL.replace("sqlite:///", "")
|
||||
if not os.path.exists(db_path):
|
||||
return
|
||||
conn = None
|
||||
try:
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.execute("PRAGMA table_info(calendar_events)")
|
||||
@@ -1964,9 +2129,13 @@ def _migrate_add_calendar_metadata():
|
||||
if columns and "last_pinged" not in columns:
|
||||
conn.execute("ALTER TABLE calendar_events ADD COLUMN last_pinged DATETIME")
|
||||
conn.commit()
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logging.getLogger(__name__).warning(f"calendar_events migration failed: {e}")
|
||||
finally:
|
||||
try:
|
||||
conn.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def get_db():
|
||||
"""
|
||||
|
||||
+48
-13
@@ -11,14 +11,24 @@ from typing import Dict, List, Any, Optional, TYPE_CHECKING
|
||||
if TYPE_CHECKING:
|
||||
from .session_manager import SessionManager
|
||||
|
||||
# Module-level session manager reference (set at app startup)
|
||||
_session_manager: Optional["SessionManager"] = None
|
||||
# Module-level session manager singleton (single source of truth)
|
||||
_SESSION_MANAGER_INSTANCE: Optional["SessionManager"] = None
|
||||
|
||||
|
||||
def set_session_manager(manager: "SessionManager"):
|
||||
"""Set the global session manager reference."""
|
||||
global _session_manager
|
||||
_session_manager = manager
|
||||
def set_session_manager_instance(manager: "SessionManager"):
|
||||
"""Set the global SessionManager singleton."""
|
||||
global _SESSION_MANAGER_INSTANCE
|
||||
_SESSION_MANAGER_INSTANCE = manager
|
||||
|
||||
|
||||
def get_session_manager_instance() -> Optional["SessionManager"]:
|
||||
"""Get the global SessionManager singleton."""
|
||||
return _SESSION_MANAGER_INSTANCE
|
||||
|
||||
|
||||
# Keep legacy name for backward compatibility
|
||||
set_session_manager = set_session_manager_instance
|
||||
get_session_manager = get_session_manager_instance
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -42,7 +52,17 @@ class ChatMessage:
|
||||
|
||||
@dataclass
|
||||
class Session:
|
||||
"""A chat session — pure data container."""
|
||||
"""A chat session — pure data container.
|
||||
|
||||
``.history`` is the authoritative mutable message list. Callers may
|
||||
read, append, pop, or reassign it directly — these changes take
|
||||
effect immediately. ``_history`` remains a compatibility alias that
|
||||
always resolves to the authoritative ``history`` list.
|
||||
|
||||
Each session gets its own unique history list at construction time
|
||||
(the dataclass default is never shared between instances).
|
||||
"""
|
||||
|
||||
id: str
|
||||
name: str
|
||||
endpoint_url: str
|
||||
@@ -56,24 +76,35 @@ class Session:
|
||||
message_count: int = 0
|
||||
|
||||
def __post_init__(self):
|
||||
if self.history is None:
|
||||
self.history = []
|
||||
if self.headers is None:
|
||||
self.headers = {}
|
||||
# Ensure each session gets its OWN list (not the shared dataclass default)
|
||||
if self.history is None:
|
||||
self.history = []
|
||||
|
||||
@property
|
||||
def _history(self) -> List[ChatMessage]:
|
||||
"""Compatibility alias for callers that still reference ``_history``."""
|
||||
return self.history
|
||||
|
||||
@_history.setter
|
||||
def _history(self, messages: List[ChatMessage]):
|
||||
self.history = messages
|
||||
|
||||
def add_message(self, message: ChatMessage):
|
||||
"""
|
||||
Add a message to this session.
|
||||
|
||||
Delegates to SessionManager for persistence if available,
|
||||
otherwise just appends to history.
|
||||
Appends to the authoritative history list and increments
|
||||
message_count. Delegates to SessionManager for persistence
|
||||
if available.
|
||||
"""
|
||||
self.history.append(message)
|
||||
self.message_count = len(self.history)
|
||||
|
||||
# Delegate to session manager for persistence
|
||||
if _session_manager:
|
||||
_session_manager._persist_message(self.id, message)
|
||||
if _SESSION_MANAGER_INSTANCE:
|
||||
_SESSION_MANAGER_INSTANCE._persist_message(self.id, message)
|
||||
|
||||
def get_context_messages(self) -> List[Dict[str, Any]]:
|
||||
"""Get messages in format for LLM API.
|
||||
@@ -94,3 +125,7 @@ class Session:
|
||||
def get(self, key: str, default=None):
|
||||
"""Dict-like access for compatibility."""
|
||||
return getattr(self, key, default)
|
||||
|
||||
def __getitem__(self, key: str):
|
||||
"""Allow session['field'] syntax."""
|
||||
return getattr(self, key)
|
||||
|
||||
@@ -191,6 +191,8 @@ def _windows_bash_fallbacks() -> List[str]:
|
||||
base = os.environ.get(env_name)
|
||||
if base:
|
||||
roots.append(ntpath.join(base, "Git"))
|
||||
if env_name == "LocalAppData":
|
||||
roots.append(ntpath.join(base, "Programs", "Git"))
|
||||
roots.extend(_WINDOWS_BASH_DEFAULT_ROOTS)
|
||||
|
||||
paths: List[str] = []
|
||||
@@ -298,7 +300,7 @@ def is_wsl() -> bool:
|
||||
import sys
|
||||
if sys.platform.startswith("linux") or os.name == "posix":
|
||||
try:
|
||||
with open("/proc/version", "r") as f:
|
||||
with open("/proc/version", "r", encoding="utf-8", errors="ignore") as f:
|
||||
if "microsoft" in f.read().lower():
|
||||
return True
|
||||
except Exception:
|
||||
@@ -366,6 +368,10 @@ def _ssh_exec_argv(
|
||||
strict_host_key_checking: bool | None = None,
|
||||
) -> list[str]:
|
||||
"""Build a consistent ssh argv for remote command execution."""
|
||||
remote_value = str(remote or "").strip()
|
||||
remote_host = remote_value.rsplit("@", 1)[-1]
|
||||
if not remote_value or remote_value.startswith("-") or not remote_host or remote_host.startswith("-"):
|
||||
raise ValueError("Invalid SSH remote host")
|
||||
argv = ["ssh"]
|
||||
if connect_timeout is not None:
|
||||
argv.extend(["-o", f"ConnectTimeout={int(connect_timeout)}"])
|
||||
|
||||
+45
-4
@@ -17,6 +17,9 @@ from typing import Dict, Optional
|
||||
from .database import Session as DbSession, ChatMessage as DbChatMessage, Document as DbDocument, SessionLocal, utcnow_naive
|
||||
from .models import Session, ChatMessage
|
||||
|
||||
# Re-export singleton accessors from models for convenience
|
||||
from .models import set_session_manager_instance, get_session_manager_instance
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -188,12 +191,17 @@ class SessionManager:
|
||||
"""
|
||||
Add a message to a session and persist to database.
|
||||
|
||||
Updates the authoritative history list and persists through this
|
||||
manager directly so tests and temporary managers do not depend on the
|
||||
process-wide session-manager singleton.
|
||||
|
||||
Args:
|
||||
session_id: Session ID
|
||||
message: ChatMessage to add
|
||||
"""
|
||||
session = self.get_session(session_id)
|
||||
session.history.append(message)
|
||||
session._history = session.history
|
||||
session.message_count = len(session.history)
|
||||
|
||||
self._persist_message(session_id, message)
|
||||
@@ -232,7 +240,10 @@ class SessionManager:
|
||||
)
|
||||
db.add(db_message)
|
||||
|
||||
db_session.message_count = len(self.sessions.get(session_id, {}).history) if session_id in self.sessions else 0
|
||||
if session_id in self.sessions:
|
||||
db_session.message_count = len(self.sessions[session_id].history)
|
||||
else:
|
||||
db_session.message_count = 0
|
||||
_now = datetime.now(timezone.utc)
|
||||
db_session.last_accessed = _now
|
||||
# Clean "last conversation" timestamp — only bumped here on a
|
||||
@@ -283,6 +294,7 @@ class SessionManager:
|
||||
|
||||
# Update in-memory
|
||||
session.history = session.history[:keep_count]
|
||||
session._history = session.history
|
||||
|
||||
logger.info(f"Truncated session {session_id} to {keep_count} messages")
|
||||
return True
|
||||
@@ -333,6 +345,7 @@ class SessionManager:
|
||||
|
||||
db.commit()
|
||||
session.history = list(messages)
|
||||
session._history = session.history
|
||||
session.message_count = len(messages)
|
||||
logger.info("Replaced session %s history with %d messages", session_id, len(messages))
|
||||
return True
|
||||
@@ -608,24 +621,52 @@ class SessionManager:
|
||||
def save_sessions(self):
|
||||
"""No-op for DB compatibility."""
|
||||
|
||||
def ensure_task_session(self, session_id: str, name: str, endpoint_url: str, model: str, owner: str = None, task: object = None) -> Session:
|
||||
"""Create a task session if it doesn't exist, or return the existing one.
|
||||
|
||||
Unlike create_session, this checks the cache first and does NOT
|
||||
overwrite an existing in-memory session. The task scheduler must
|
||||
use this instead of direct dict assignment.
|
||||
"""
|
||||
if session_id in self.sessions:
|
||||
return self.sessions[session_id]
|
||||
|
||||
session = self.create_session(session_id, name, endpoint_url, model, owner=owner)
|
||||
if task is not None:
|
||||
task.session_id = session_id
|
||||
return session
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Cleanup
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def cleanup_empty_sessions(self, auto_archive_days: int = 30) -> dict:
|
||||
"""Clean up empty and old sessions."""
|
||||
def cleanup_empty_sessions(self, auto_archive_days: int = 30, min_age_hours: int = 1) -> dict:
|
||||
"""Clean up empty and old sessions.
|
||||
|
||||
Args:
|
||||
auto_archive_days: Age in days before non-important sessions are archived.
|
||||
min_age_hours: Minimum age in hours before an empty session can be deleted.
|
||||
Prevents deleting sessions that were just created.
|
||||
"""
|
||||
db = SessionLocal()
|
||||
stats = {'deleted_empty': 0, 'archived_old': 0, 'total_checked': 0}
|
||||
|
||||
try:
|
||||
all_sessions = db.query(DbSession).all()
|
||||
cutoff_date = utcnow_naive() - timedelta(days=auto_archive_days)
|
||||
min_age = utcnow_naive() - timedelta(hours=min_age_hours)
|
||||
|
||||
for db_session in all_sessions:
|
||||
stats['total_checked'] += 1
|
||||
|
||||
# Delete empty sessions
|
||||
# Delete empty sessions only if older than min_age_hours
|
||||
if db_session.message_count == 0:
|
||||
if db_session.created_at is not None:
|
||||
created = db_session.created_at
|
||||
if created.tzinfo is None:
|
||||
created = created.replace(tzinfo=timezone.utc)
|
||||
if created > min_age:
|
||||
continue # Too young to delete
|
||||
if db_session.id in self.sessions:
|
||||
del self.sessions[db_session.id]
|
||||
db.delete(db_session)
|
||||
|
||||
@@ -16,18 +16,18 @@ services:
|
||||
ports:
|
||||
- "${APP_BIND:-127.0.0.1}:${APP_PORT:-7000}:7000"
|
||||
volumes:
|
||||
- ./data:/app/data:z
|
||||
- ./logs:/app/logs:z
|
||||
- ${APP_DATA_DIR:-./data}:/app/data:z
|
||||
- ${APP_LOGS_DIR:-./logs}:/app/logs:z
|
||||
# Cookbook remote-server SSH identity. Odysseus can generate a key here;
|
||||
# add the shown public key to each remote server's authorized_keys.
|
||||
- ./data/ssh:/app/.ssh:z
|
||||
- ${APP_DATA_DIR:-./data}/ssh:/app/.ssh:z
|
||||
# Cookbook local model cache. Inside Docker, "Local" means the Odysseus
|
||||
# container, so persist its HuggingFace cache under ./data/huggingface.
|
||||
- ./data/huggingface:/app/.cache/huggingface:z
|
||||
- ${APP_DATA_DIR:-./data}/huggingface:/app/.cache/huggingface:z
|
||||
# Cookbook-installed Python CLIs/packages (vLLM, llama-cpp-python, etc.)
|
||||
# land under /app/.local for the odysseus user. Persist them so a
|
||||
# container recreate does not silently remove installed serve engines.
|
||||
- ./data/local:/app/.local:z
|
||||
- ${APP_DATA_DIR:-./data}/local:/app/.local:z
|
||||
extra_hosts:
|
||||
# Lets the container reach local services on the Docker host, including
|
||||
# Ollama at http://host.docker.internal:11434.
|
||||
|
||||
@@ -15,18 +15,18 @@ services:
|
||||
ports:
|
||||
- "${APP_BIND:-127.0.0.1}:${APP_PORT:-7000}:7000"
|
||||
volumes:
|
||||
- ./data:/app/data:z
|
||||
- ./logs:/app/logs:z
|
||||
- ${APP_DATA_DIR:-./data}:/app/data:z
|
||||
- ${APP_LOGS_DIR:-./logs}:/app/logs:z
|
||||
# Cookbook remote-server SSH identity. Odysseus can generate a key here;
|
||||
# add the shown public key to each remote server's authorized_keys.
|
||||
- ./data/ssh:/app/.ssh:z
|
||||
- ${APP_DATA_DIR:-./data}/ssh:/app/.ssh:z
|
||||
# Cookbook local model cache. Inside Docker, "Local" means the Odysseus
|
||||
# container, so persist its HuggingFace cache under ./data/huggingface.
|
||||
- ./data/huggingface:/app/.cache/huggingface:z
|
||||
- ${APP_DATA_DIR:-./data}/huggingface:/app/.cache/huggingface:z
|
||||
# Cookbook-installed Python CLIs/packages (vLLM, llama-cpp-python, etc.)
|
||||
# land under /app/.local for the odysseus user. Persist them so a
|
||||
# container recreate does not silently remove installed serve engines.
|
||||
- ./data/local:/app/.local:z
|
||||
- ${APP_DATA_DIR:-./data}/local:/app/.local:z
|
||||
extra_hosts:
|
||||
# Lets the container reach local services on the Docker host, including
|
||||
# Ollama at http://host.docker.internal:11434.
|
||||
|
||||
+5
-5
@@ -4,18 +4,18 @@ services:
|
||||
ports:
|
||||
- "${APP_BIND:-127.0.0.1}:${APP_PORT:-7000}:7000"
|
||||
volumes:
|
||||
- ./data:/app/data:z
|
||||
- ./logs:/app/logs:z
|
||||
- ${APP_DATA_DIR:-./data}:/app/data:z
|
||||
- ${APP_LOGS_DIR:-./logs}:/app/logs:z
|
||||
# Cookbook remote-server SSH identity. Odysseus can generate a key here;
|
||||
# add the shown public key to each remote server's authorized_keys.
|
||||
- ./data/ssh:/app/.ssh:z
|
||||
- ${APP_DATA_DIR:-./data}/ssh:/app/.ssh:z
|
||||
# Cookbook local model cache. Inside Docker, "Local" means the Odysseus
|
||||
# container, so persist its HuggingFace cache under ./data/huggingface.
|
||||
- ./data/huggingface:/app/.cache/huggingface:z
|
||||
- ${APP_DATA_DIR:-./data}/huggingface:/app/.cache/huggingface:z
|
||||
# Cookbook-installed Python CLIs/packages (vLLM, llama-cpp-python, etc.)
|
||||
# land under /app/.local for the odysseus user. Persist them so a
|
||||
# container recreate does not silently remove installed serve engines.
|
||||
- ./data/local:/app/.local:z
|
||||
- ${APP_DATA_DIR:-./data}/local:/app/.local:z
|
||||
extra_hosts:
|
||||
# Lets the container reach local services on the Docker host, including
|
||||
# Ollama at http://host.docker.internal:11434.
|
||||
|
||||
@@ -0,0 +1,194 @@
|
||||
# Agent migration manifests
|
||||
|
||||
Odysseus should be able to learn from another agent without blindly trusting
|
||||
that agent's whole state. The safe migration path is:
|
||||
|
||||
```text
|
||||
source agent export -> source adapter -> agent-migration.v1 manifest -> preview -> apply
|
||||
```
|
||||
|
||||
The manifest is intentionally source-neutral. OpenClaw, Hermes, a folder of
|
||||
Markdown notes, or any other agent can have its own adapter, but Odysseus only
|
||||
needs to understand the normalized manifest.
|
||||
|
||||
## Why not import everything as memory?
|
||||
|
||||
Durable memory should stay compact and useful. Long notes, logs, session
|
||||
transcripts, and project archives are useful context, but they are not all
|
||||
memories. A good migration keeps two layers separate:
|
||||
|
||||
- **Archive documents** preserve source material for search, reading, and later
|
||||
extraction.
|
||||
- **Memory candidates** are short facts or preferences that can be reviewed
|
||||
before being saved into Odysseus memory.
|
||||
|
||||
This keeps Odysseus' existing memory-review flow intact while giving it better
|
||||
source material to review.
|
||||
|
||||
## Manifest shape
|
||||
|
||||
`agent-migration.v1` is a JSON object:
|
||||
|
||||
```json
|
||||
{
|
||||
"schema_version": "agent-migration.v1",
|
||||
"generated_at": "2026-06-06T00:00:00Z",
|
||||
"source": {
|
||||
"name": "example-agent",
|
||||
"kind": "generic"
|
||||
},
|
||||
"summary": {
|
||||
"item_count": 3,
|
||||
"counts_by_kind": {
|
||||
"memory": 1,
|
||||
"skill": 1,
|
||||
"conversation_thread": 1,
|
||||
"archive_document": 1
|
||||
},
|
||||
"warning_count": 0
|
||||
},
|
||||
"items": [],
|
||||
"warnings": []
|
||||
}
|
||||
```
|
||||
|
||||
Each item has a stable `id`, a `kind`, source metadata, and enough content for a
|
||||
future importer to preview it before applying.
|
||||
|
||||
Supported item kinds in the first pass:
|
||||
|
||||
- `memory` — a candidate memory with `text`, `category`, `source`, and
|
||||
provenance metadata.
|
||||
- `skill` — a `SKILL.md` file with content and parsed frontmatter metadata.
|
||||
- `conversation_thread` — a normalized transcript thread from an exported chat
|
||||
history. Message content is optional; adapters can preserve only thread
|
||||
metadata, message counts, timestamps, and hashes when a manifest should stay
|
||||
small or avoid embedding private transcript text.
|
||||
- `archive_document` — long-form source material. Content is optional; adapters
|
||||
can preserve only path/hash/size metadata when a manifest should stay small.
|
||||
|
||||
## Build a manifest
|
||||
|
||||
Use the read-only helper:
|
||||
|
||||
```bash
|
||||
python3 scripts/agent_migration_manifest.py \
|
||||
--source-name old-agent \
|
||||
--source-kind generic \
|
||||
--memory-json /path/to/memories.json \
|
||||
--skills-dir /path/to/skills \
|
||||
--conversation-json /path/to/conversations.json \
|
||||
--archive /path/to/notes \
|
||||
--output /tmp/agent-migration.json
|
||||
```
|
||||
|
||||
The helper does not write to `data/`, call an LLM, import Odysseus modules, or
|
||||
modify the source. It only writes JSON.
|
||||
|
||||
Memory JSON may be:
|
||||
|
||||
```json
|
||||
[
|
||||
"A plain memory string",
|
||||
{
|
||||
"text": "A categorized memory",
|
||||
"category": "preference",
|
||||
"source": "old-agent"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
or an object containing a list under `memories`, `memory`, `items`, or `data`.
|
||||
|
||||
Skills are scanned recursively for `SKILL.md`:
|
||||
|
||||
```bash
|
||||
python3 scripts/agent_migration_manifest.py \
|
||||
--source-name hermes \
|
||||
--source-kind hermes \
|
||||
--skills-dir ~/.hermes/skills \
|
||||
--output /tmp/hermes-skills-manifest.json
|
||||
```
|
||||
|
||||
Archive documents are metadata-only by default. To embed text content:
|
||||
|
||||
```bash
|
||||
python3 scripts/agent_migration_manifest.py \
|
||||
--source-name notes-export \
|
||||
--archive /path/to/markdown-notes \
|
||||
--include-archive-content \
|
||||
--output /tmp/notes-manifest.json
|
||||
```
|
||||
|
||||
Conversation exports are also metadata-only by default:
|
||||
|
||||
```bash
|
||||
python3 scripts/agent_migration_manifest.py \
|
||||
--source-name chatgpt-export \
|
||||
--source-kind chatgpt \
|
||||
--conversation-json /path/to/conversations.json \
|
||||
--output /tmp/chatgpt-conversations-manifest.json
|
||||
```
|
||||
|
||||
The first pass supports generic conversation JSON such as:
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"id": "thread-1",
|
||||
"title": "Project plan",
|
||||
"messages": [
|
||||
{"role": "user", "content": "Can we design this?"},
|
||||
{"role": "assistant", "content": "Yes, start with a narrow slice."}
|
||||
]
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
It also recognizes ChatGPT-style `mapping` exports from `conversations.json`.
|
||||
To embed normalized messages:
|
||||
|
||||
```bash
|
||||
python3 scripts/agent_migration_manifest.py \
|
||||
--source-name chatgpt-export \
|
||||
--source-kind chatgpt \
|
||||
--conversation-json /path/to/conversations.json \
|
||||
--include-conversation-content \
|
||||
--max-conversation-messages 2000 \
|
||||
--output /tmp/chatgpt-conversations-with-content.json
|
||||
```
|
||||
|
||||
Content embedding is explicit because exported chat histories can be huge and
|
||||
private. A future source-specific adapter can add ZIP traversal, attachment
|
||||
metadata, and provider-specific project/workspace fields while still emitting
|
||||
the same `conversation_thread` manifest item.
|
||||
|
||||
## Recommended apply behavior
|
||||
|
||||
A future Odysseus importer should treat the manifest as untrusted user-provided
|
||||
data and apply it in stages:
|
||||
|
||||
1. Show a dry-run summary with counts, warnings, duplicates, and sample items.
|
||||
2. Back up current `data/` state before writing anything.
|
||||
3. Import archive documents as documents or another searchable source, not as
|
||||
memory.
|
||||
4. Import conversation threads as searchable archived context first, with
|
||||
citations back to the source thread. Do not turn whole transcripts into
|
||||
memory.
|
||||
5. Show memory candidates for review before saving through the normal memory
|
||||
path.
|
||||
6. Import skills only after name/category conflict checks.
|
||||
7. Skip secrets by default. Credentials need explicit, provider-specific flows.
|
||||
|
||||
## What belongs in source adapters?
|
||||
|
||||
Adapters can be source-specific. The core manifest should not be.
|
||||
|
||||
For example, an OpenClaw adapter may know about OpenClaw's workspace files. A
|
||||
Hermes adapter may know about `~/.hermes/config.yaml` and `~/.hermes/skills`.
|
||||
A ChatGPT adapter may know about `conversations.json`, uploaded-file metadata,
|
||||
and image attachment directories. A Claude adapter may know about Claude's
|
||||
export shape and project boundaries. A generic adapter may only know about
|
||||
memory JSON, conversation JSON, `SKILL.md`, and Markdown folders.
|
||||
|
||||
Nonstandard folders should be adapter details, not required Odysseus concepts.
|
||||
@@ -0,0 +1,129 @@
|
||||
# Backup & Restore
|
||||
|
||||
Odysseus keeps all of your state in the `data/` directory — the SQLite database
|
||||
(`app.db`), the Fernet encryption key (`data/.app_key`), the vault, memory, RAG
|
||||
indexes, personal documents, and uploads. The `scripts/odysseus-backup` tool
|
||||
snapshots that directory into a single gzip tarball and restores it later.
|
||||
|
||||
Snapshots are safe to take while the app is running: SQLite databases are copied
|
||||
through SQLite's own `.backup` API rather than a raw file copy, so an in-flight
|
||||
write can't corrupt the snapshot.
|
||||
|
||||
> **A snapshot contains your secrets.** The tarball includes the Fernet
|
||||
> encryption key (`data/.app_key`), the vault, sessions, and any stored
|
||||
> provider/API tokens — so treat it like a password. Store backups somewhere
|
||||
> private, never commit them to Git, and prefer an encrypted destination when
|
||||
> copying them offsite.
|
||||
|
||||
## Quick start
|
||||
|
||||
Run the tool from the repository root:
|
||||
|
||||
```bash
|
||||
# Create a snapshot → backups/odysseus-backup-<YYYYMMDD-HHMMSS>.tar.gz
|
||||
./scripts/odysseus-backup snapshot
|
||||
|
||||
# List existing snapshots (most recent first)
|
||||
./scripts/odysseus-backup list
|
||||
|
||||
# Check a tarball's integrity without extracting it
|
||||
./scripts/odysseus-backup verify backups/odysseus-backup-20260101-120000.tar.gz
|
||||
|
||||
# Restore (destructive — see the warning below)
|
||||
./scripts/odysseus-backup restore backups/odysseus-backup-20260101-120000.tar.gz --yes
|
||||
```
|
||||
|
||||
The script depends only on the Python standard library, so any `python3` on your
|
||||
`PATH` will run it — you don't need the app's virtualenv active.
|
||||
|
||||
Every command prints a JSON result. Add `--pretty` for indented output.
|
||||
|
||||
## Commands
|
||||
|
||||
### `snapshot`
|
||||
|
||||
Writes a `tar.gz` of `data/` to `backups/<timestamp>.tar.gz`.
|
||||
|
||||
| Flag | Effect |
|
||||
| --- | --- |
|
||||
| `--out PATH` | Write to a specific path instead of the default `backups/` location. Must be **outside** `data/`. |
|
||||
| `--include-research` | Include `data/deep_research/` (skipped by default — research runs are large). |
|
||||
| `--include-attachments` | Include `data/mail-attachments/` (skipped by default — cached IMAP extractions, re-derivable). |
|
||||
|
||||
By default the snapshot includes everything under `data/` **except**
|
||||
`deep_research/` and `mail-attachments/`. Personal uploads and documents are
|
||||
included.
|
||||
|
||||
```bash
|
||||
# Snapshot straight to a mounted NAS path
|
||||
./scripts/odysseus-backup snapshot --out /mnt/nas/odysseus-$(date +%F).tar.gz
|
||||
|
||||
# Full snapshot including research runs and mail attachments
|
||||
./scripts/odysseus-backup snapshot --include-research --include-attachments
|
||||
```
|
||||
|
||||
### `list`
|
||||
|
||||
Lists the tarballs in `backups/`, most recent first, with size and modification
|
||||
time.
|
||||
|
||||
### `verify PATH`
|
||||
|
||||
Opens the tarball read-only and walks every member to confirm it is intact and
|
||||
safe to restore. Nothing is extracted. Use this before relying on an old backup
|
||||
or after copying one across machines.
|
||||
|
||||
### `restore PATH --yes`
|
||||
|
||||
Overwrites `data/` from a tarball.
|
||||
|
||||
> **Restore is destructive.** It replaces the current `data/` directory. `--yes`
|
||||
> is required so a mistyped command can't wipe your live state.
|
||||
|
||||
Restore is not a blind delete: before extracting, the tool **renames your current
|
||||
`data/` to `data.before-restore-<timestamp>`** in the repository root. If a
|
||||
restore turns out to be wrong, your previous state is still there — delete the
|
||||
restored `data/` and rename the stashed directory back. The restore path is also
|
||||
validated entry-by-entry: archives containing absolute paths, `..` segments,
|
||||
symlinks, or anything outside `data/` are rejected.
|
||||
|
||||
## Scheduling offsite backups
|
||||
|
||||
The tarball output composes cleanly with cron and any copy tool. For example, a
|
||||
nightly snapshot copied offsite:
|
||||
|
||||
```cron
|
||||
0 3 * * * cd /path/to/odysseus && ./scripts/odysseus-backup snapshot --out "/mnt/nas/odysseus-$(date +\%F).tar.gz"
|
||||
```
|
||||
|
||||
Swap the `--out` target for `scp`, `rclone`, `s3cmd`, or similar to push the
|
||||
snapshot to remote storage.
|
||||
|
||||
## Docker vs native installs
|
||||
|
||||
The tool reads `data/` and writes `backups/` relative to the repository root, so
|
||||
where you run it matters:
|
||||
|
||||
- **Native installs** — run it from the repo root as shown above. `data/` and
|
||||
`backups/` are both in the repo directory.
|
||||
- **Docker** — `docker-compose.yml` bind-mounts the host's `./data` to
|
||||
`/app/data`, so the live data is also present on the host. **Run the tool on
|
||||
the host** from the repo root; the snapshot reads the bind-mounted `./data` and
|
||||
writes to `./backups` on the host. Running it *inside* the container is not
|
||||
recommended, because `backups/` is not a mounted volume and the tarball would
|
||||
be lost when the container is recreated.
|
||||
|
||||
> **ChromaDB caveat (Docker only).** In the Docker setup, ChromaDB stores its
|
||||
> vectors in a separate Compose-managed volume (declared as `chromadb-data`),
|
||||
> **not** under `./data`. `odysseus-backup` therefore does not capture the Docker
|
||||
> ChromaDB store. Back it up separately if you need it. Compose prefixes the
|
||||
> volume with the project name, so find the real name first
|
||||
> (`docker volume ls | grep chromadb`), then archive it — for example:
|
||||
>
|
||||
> ```bash
|
||||
> docker run --rm -v <project>_chromadb-data:/data -v "$PWD":/backup \
|
||||
> alpine tar czf /backup/chromadb.tar.gz -C /data .
|
||||
> ```
|
||||
>
|
||||
> On native installs ChromaDB lives at `data/chroma/` and is included in the
|
||||
> snapshot normally.
|
||||
+10
-3
@@ -25,9 +25,16 @@
|
||||
--radius: 8px;
|
||||
}
|
||||
* { box-sizing: border-box; }
|
||||
html { scroll-behavior: smooth; scroll-snap-type: y proximity; scroll-padding-top: 60px; }
|
||||
/* Each section is a full-viewport "page" with its content centered, so only
|
||||
one shows at a time and the snap is obvious. */
|
||||
html { scroll-behavior: smooth; scroll-padding-top: 60px; }
|
||||
/* REMOVED: "scroll-snap-type: y proximity"
|
||||
The idea was: >>Each section is a full-viewport "page" with its content centered,
|
||||
so only one shows at a time and the snap is obvious.<<
|
||||
|
||||
PROBLEM: sections easily grow taller than 100vh IRL
|
||||
This cause forced jumps mid-read. It's intrusive UX.
|
||||
The landing-page is not a PowerPoint presentation!
|
||||
|
||||
Preserved: CSS snap-points to avoid destroying code meta-data*/
|
||||
.hero, section {
|
||||
scroll-snap-align: start; min-height: 100vh;
|
||||
display: flex; flex-direction: column; justify-content: center;
|
||||
|
||||
@@ -0,0 +1,102 @@
|
||||
# Security CI guide
|
||||
|
||||
This project runs a set of automated security checks on every pull request and
|
||||
on every push to `main`. This page explains what each one does, whether it can
|
||||
block a merge, and the few one-time settings you should turn on to get the full
|
||||
benefit.
|
||||
|
||||
## What runs, and why
|
||||
|
||||
Each check lives in its own file under `.github/workflows/`. They run
|
||||
automatically; you do not start them.
|
||||
|
||||
| Check | What it protects against | Blocks a merge? |
|
||||
|---|---|---|
|
||||
| **Secret scan** (gitleaks) | An API key, token, or password being committed by mistake or on purpose | Yes |
|
||||
| **Workflow security** (actionlint + zizmor) | A broken or insecure automation file that could leak the repo's access token | Yes |
|
||||
| **Dependency review** | A pull request that adds a software library with a known security hole | Yes |
|
||||
| **pip-audit** | Known security holes in the Python libraries already used | No (advisory) |
|
||||
| **Container scan: hadolint** | Mistakes and insecure patterns in the `Dockerfile` | Yes |
|
||||
| **Container scan: Trivy** | Known security holes in the Docker image | No (advisory) |
|
||||
| **CodeQL** | Real bugs in the app's own code: injection, auth mistakes, path traversal | No (advisory) |
|
||||
|
||||
"Blocks a merge" means a red X appears on the pull request and, once you enable
|
||||
the setting below, the **Merge** button is disabled until it is fixed.
|
||||
|
||||
"Advisory" means it reports problems into the repository's **Security** tab so
|
||||
you can review them on your own schedule, but it never stops a merge. These are
|
||||
advisory on purpose: they often flag long-standing issues in other people's
|
||||
libraries, not something a given pull request introduced.
|
||||
|
||||
## Where results appear
|
||||
|
||||
- **Checks tab of a pull request**: the pass/fail of each check. A green tick is
|
||||
good; a red X needs attention.
|
||||
- **Security tab of the repository**: detailed findings from the advisory
|
||||
scanners (Trivy and CodeQL). This is your dashboard.
|
||||
|
||||
## If a check fails
|
||||
|
||||
- **Secret scan failed**: a real credential may have been committed. Treat it as
|
||||
leaked: rotate (regenerate) that key or token immediately, then remove it from
|
||||
the file. Do not just delete the commit; assume it was seen.
|
||||
- **Dependency review failed**: the pull request adds a library with a known
|
||||
vulnerability. Ask the contributor to use a patched version, or decline the
|
||||
change.
|
||||
- **hadolint / workflow security failed**: the contributor changed the
|
||||
`Dockerfile` or an automation file in a way the linter rejects. Ask them to
|
||||
address the message shown in the failed check.
|
||||
|
||||
## One-time settings to turn on
|
||||
|
||||
These two settings unlock the full value. You only do them once.
|
||||
|
||||
### 1. Require the blocking checks before merging
|
||||
|
||||
This makes the **Merge** button refuse to work until the gating checks pass.
|
||||
|
||||
1. Go to the repository on GitHub.
|
||||
2. Click **Settings** (top right of the repo).
|
||||
3. In the left sidebar, click **Branches**.
|
||||
4. Under **Branch protection rules**, click **Add branch ruleset** (or **Add
|
||||
rule**), and set the branch name pattern to `dev` (this is the branch all
|
||||
pull requests target; `main` is fast-forwarded at releases).
|
||||
5. Enable **Require status checks to pass before merging**.
|
||||
6. In the search box that appears, add these checks by name:
|
||||
- `Python syntax (compileall)`
|
||||
- `JS syntax (node --check)`
|
||||
- `gitleaks`
|
||||
- `actionlint`
|
||||
- `zizmor (Actions SAST)`
|
||||
- `hadolint (Dockerfile lint)`
|
||||
- `dependency-review (PR gate)`
|
||||
|
||||
The first two come from the correctness CI (`ci.yml`); the rest are this
|
||||
security suite. Leave pytest, pip-audit, Trivy, and CodeQL unchecked so they
|
||||
stay advisory.
|
||||
7. Also enable **Require a pull request before merging** and **Require review
|
||||
from Code Owners** (this uses the `.github/CODEOWNERS` file so every change
|
||||
needs your sign-off).
|
||||
8. Click **Create** / **Save changes**.
|
||||
|
||||
Note: a check name only appears in the list after it has run at least once, so
|
||||
let the workflows run on one pull request first, then add them here.
|
||||
|
||||
### 2. Turn on the Security tab features
|
||||
|
||||
1. **Settings -> Code security** (or **Code security and analysis**).
|
||||
2. Turn on **Dependency graph** (usually on by default for public repos) -- this
|
||||
powers Dependency review and Dependabot.
|
||||
3. Turn on **Dependabot alerts** and **Dependabot security updates**.
|
||||
4. Under **Code scanning**, you have two ways to scan the app code with CodeQL:
|
||||
- The included `codeql.yml` workflow already scans `main` and runs weekly.
|
||||
- To also scan **pull requests** (recommended, since most contributions come
|
||||
from forks), click **Set up -> Default** under Code scanning. GitHub then
|
||||
runs CodeQL on pull requests for you, with no token limitations.
|
||||
|
||||
## Keeping it current
|
||||
|
||||
`.github/dependabot.yml` opens small weekly pull requests to update Python and
|
||||
npm packages, the Docker base image, and the pinned automation actions
|
||||
themselves. Review and merge those like any other pull request; they keep the
|
||||
project patched without manual tracking.
|
||||
+28
-3
@@ -30,14 +30,26 @@ function Fail($msg) {
|
||||
exit 1
|
||||
}
|
||||
|
||||
function Test-WindowsBashStub($path) {
|
||||
if (-not $path) { return $false }
|
||||
$lowered = $path.ToLowerInvariant()
|
||||
foreach ($stub in @("system32\bash.exe", "sysnative\bash.exe", "windowsapps\bash.exe")) {
|
||||
if ($lowered.Contains($stub)) { return $true }
|
||||
}
|
||||
return $false
|
||||
}
|
||||
|
||||
function Find-GitBash {
|
||||
$cmd = Get-Command bash -ErrorAction SilentlyContinue
|
||||
if ($cmd) { return $cmd.Source }
|
||||
if ($cmd -and -not (Test-WindowsBashStub $cmd.Source)) { return $cmd.Source }
|
||||
|
||||
$roots = @()
|
||||
foreach ($name in @("ProgramFiles", "ProgramW6432", "ProgramFiles(x86)", "LocalAppData")) {
|
||||
$base = [Environment]::GetEnvironmentVariable($name)
|
||||
if ($base) { $roots += (Join-Path $base "Git") }
|
||||
if ($base) {
|
||||
$roots += (Join-Path $base "Git")
|
||||
if ($name -eq "LocalAppData") { $roots += (Join-Path $base "Programs\Git") }
|
||||
}
|
||||
}
|
||||
$roots += @("C:\Program Files\Git", "C:\Program Files (x86)\Git")
|
||||
|
||||
@@ -129,7 +141,20 @@ if (-not (Find-GitBash)) {
|
||||
Write-Host " https://git-scm.com/download/win" -ForegroundColor Yellow
|
||||
}
|
||||
|
||||
# 6. Start the server (use `python -m uvicorn` - bare `uvicorn` may not be on PATH)
|
||||
# 6. Point CUDA_PATH at a real CUDA toolkit so GPU llama-cpp-python can import.
|
||||
$cudaBase = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA"
|
||||
if (Test-Path $cudaBase) {
|
||||
$cudaBest = Get-ChildItem $cudaBase -Directory -ErrorAction SilentlyContinue |
|
||||
Where-Object { Test-Path (Join-Path $_.FullName "bin") } |
|
||||
Sort-Object { try { [version]($_.Name -replace "^v", "") } catch { [version]"0.0" } } -Descending |
|
||||
Select-Object -First 1
|
||||
if ($cudaBest) {
|
||||
$env:CUDA_PATH = $cudaBest.FullName
|
||||
Write-Host ("Using CUDA_PATH = " + $cudaBest.FullName) -ForegroundColor Cyan
|
||||
}
|
||||
}
|
||||
|
||||
# 7. Start the server (use `python -m uvicorn` - bare `uvicorn` may not be on PATH)
|
||||
Write-Step ("Starting Odysseus at http://{0}:{1}" -f $BindHost, $Port)
|
||||
Write-Host "Press Ctrl+C to stop."
|
||||
Write-Host ""
|
||||
|
||||
@@ -93,16 +93,15 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
|
||||
if category_filter:
|
||||
msg += f" in category '{category_filter}'"
|
||||
return [TextContent(type="text", text=msg + ".")]
|
||||
|
||||
lines = [f"Found {len(memories)} memory entries:\n"]
|
||||
for m in memories[:100]:
|
||||
for m in memories:
|
||||
cat = m.get("category", "fact")
|
||||
mid = m.get("id", "?")[:8]
|
||||
text = m.get("text", "")
|
||||
if len(text) > 150:
|
||||
text = text[:150] + "..."
|
||||
lines.append(f"- [{cat}] `{mid}` — {text}")
|
||||
if len(memories) > 100:
|
||||
lines.append(f"... and {len(memories) - 100} more")
|
||||
return [TextContent(type="text", text="\n".join(lines))]
|
||||
|
||||
elif action == "add":
|
||||
|
||||
Generated
+12
-9
@@ -5,16 +5,16 @@
|
||||
"packages": {
|
||||
"": {
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": "^0.98.0"
|
||||
"@anthropic-ai/sdk": "^0.104.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@antithesishq/bombadil": "^0.3.2"
|
||||
"@antithesishq/bombadil": "^0.5.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@anthropic-ai/sdk": {
|
||||
"version": "0.98.0",
|
||||
"resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.98.0.tgz",
|
||||
"integrity": "sha512-N7aXtCvC5g6T1Y4V29lJjceu/zTkVkIZF0jdBvagr0TRFHuKeImffalGWEfqZKrvjH+IQbzJWw6TmSmUzrlMgg==",
|
||||
"version": "0.104.1",
|
||||
"resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.104.1.tgz",
|
||||
"integrity": "sha512-gGACa/+IaiXzRRmF96aOhamoBgapKRBiFWbmmTFP8aMkpaEcuStF+Q61bjo4vPxBM7gqWJNZqsngslRdnLHv0Q==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"json-schema-to-ts": "^3.1.1",
|
||||
@@ -33,11 +33,14 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@antithesishq/bombadil": {
|
||||
"version": "0.3.2",
|
||||
"resolved": "https://registry.npmjs.org/@antithesishq/bombadil/-/bombadil-0.3.2.tgz",
|
||||
"integrity": "sha512-ATy1w9ZY5gbny1H8DFc7rxZitT7DLLLFDiGcRZe+8TQiUrV5tLO+IJGOVNNLp3RpCqjZqSsxGiKoQsx31ipV1g==",
|
||||
"version": "0.5.0",
|
||||
"resolved": "https://registry.npmjs.org/@antithesishq/bombadil/-/bombadil-0.5.0.tgz",
|
||||
"integrity": "sha512-s0zImmr0iyvSP6QcVLvf40CUiZYIdWBAxiq20uhzujwvfitYa3PGJN652k/pLtVccHM/JrGQxZdvLnihZpltHA==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
"license": "MIT",
|
||||
"bin": {
|
||||
"bombadil": "bin/bombadil.js"
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/runtime": {
|
||||
"version": "7.29.7",
|
||||
|
||||
+2
-2
@@ -4,9 +4,9 @@
|
||||
"url": "https://github.com/pewdiepie-archdaemon/odysseus.git"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@antithesishq/bombadil": "^0.3.2"
|
||||
"@antithesishq/bombadil": "^0.5.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": "^0.98.0"
|
||||
"@anthropic-ai/sdk": "^0.104.1"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,4 +15,8 @@ markers = [
|
||||
"area_helpers: self-tests for the shared test helpers in tests/helpers/",
|
||||
"area_unit: pure parser / utility tests that do not clearly belong elsewhere",
|
||||
"area_uncategorized: tests not yet matched by the taxonomy (fallback)",
|
||||
# Fast-lane marker (issue #3443). Opt-in and orthogonal to the area_*/sub_*
|
||||
# taxonomy. The fast lane runs `not slow`; mark a test slow only with
|
||||
# duration evidence (see tests/run_focus.py --durations and tests/README.md).
|
||||
"slow: opt-in marker for known-slow tests; excluded by the fast lane (not slow)",
|
||||
]
|
||||
|
||||
@@ -15,7 +15,7 @@ faster-whisper
|
||||
# DuckDuckGo as a search provider option.
|
||||
# Install if you want DDG in the search-provider dropdown.
|
||||
# Alternatives: SearXNG, Brave, Tavily, Serper, Google PSE.
|
||||
duckduckgo-search
|
||||
ddgs
|
||||
|
||||
# PDF form-filling feature (fillable AcroForm detection, field extraction,
|
||||
# value/annotation/signature stamping, page rendering for the form overlay).
|
||||
@@ -33,4 +33,4 @@ PyMuPDF
|
||||
# magika (onnxruntime), already a core dep via fastembed. We avoid the
|
||||
# [all]/Azure/audio extras (cloud + heavy). Pinned to a release >30 days old per
|
||||
# the dependency-age discussion in issue #485.
|
||||
markitdown[docx,pptx,xlsx,xls]==0.1.5
|
||||
markitdown[docx,pptx,xlsx,xls]==0.1.6
|
||||
|
||||
+6
-2
@@ -3,8 +3,8 @@ uvicorn
|
||||
python-multipart
|
||||
python-dotenv
|
||||
httpx
|
||||
pydantic>=2.0
|
||||
pydantic-settings>=2.0
|
||||
pydantic>=2.13.4
|
||||
pydantic-settings>=2.14.1
|
||||
SQLAlchemy
|
||||
pypdf
|
||||
beautifulsoup4
|
||||
@@ -43,3 +43,7 @@ qrcode[pil]
|
||||
croniter
|
||||
pytest
|
||||
pytest-asyncio
|
||||
# starlette.testclient prefers httpx2 since Starlette 1.2.0 and warns on every
|
||||
# TestClient import when only classic httpx is present. Runtime code keeps
|
||||
# using `httpx` above; this is test-client only.
|
||||
httpx2
|
||||
|
||||
@@ -0,0 +1,31 @@
|
||||
import re
|
||||
|
||||
from fastapi import HTTPException
|
||||
|
||||
|
||||
_REMOTE_HOST_RE = re.compile(
|
||||
r"^(?:[A-Za-z0-9][A-Za-z0-9._-]*@)?[A-Za-z0-9][A-Za-z0-9._-]*$"
|
||||
)
|
||||
_SSH_PORT_RE = re.compile(r"^\d{1,5}$")
|
||||
|
||||
|
||||
def validate_remote_host(v: str | None) -> str | None:
|
||||
if v is None or v == "":
|
||||
return None
|
||||
if not _REMOTE_HOST_RE.match(v):
|
||||
raise HTTPException(
|
||||
400,
|
||||
"Invalid remote_host — must be host or user@host, no SSH option syntax",
|
||||
)
|
||||
return v
|
||||
|
||||
|
||||
def validate_ssh_port(v: str | None) -> str | None:
|
||||
if v is None or v == "":
|
||||
return None
|
||||
if not _SSH_PORT_RE.fullmatch(str(v)):
|
||||
raise HTTPException(400, "Invalid ssh_port")
|
||||
port = int(v)
|
||||
if port < 1 or port > 65535:
|
||||
raise HTTPException(400, "Invalid ssh_port")
|
||||
return str(port)
|
||||
@@ -68,6 +68,7 @@ def _normalize_scopes(scopes: str | list[str] | None = None, profile: str | None
|
||||
ensure_before("calendar:write", "calendar:read")
|
||||
ensure_before("memory:write", "memory:read")
|
||||
ensure_before("email:draft", "email:read")
|
||||
ensure_before("cookbook:launch", "cookbook:read")
|
||||
|
||||
return normalized or [DEFAULT_SCOPES]
|
||||
|
||||
@@ -154,6 +155,7 @@ def setup_api_token_routes() -> APIRouter:
|
||||
@router.patch("/tokens/{token_id}")
|
||||
async def update_token(request: Request, token_id: str):
|
||||
require_admin(request)
|
||||
current_user = get_current_user(request)
|
||||
try:
|
||||
payload = await request.json()
|
||||
except Exception:
|
||||
@@ -162,6 +164,8 @@ def setup_api_token_routes() -> APIRouter:
|
||||
token = db.query(ApiToken).filter(ApiToken.id == token_id).first()
|
||||
if not token:
|
||||
raise HTTPException(404, "Token not found")
|
||||
if current_user and token.owner != current_user:
|
||||
raise HTTPException(403, "Not your token")
|
||||
if isinstance(payload.get("name"), str) and payload["name"].strip():
|
||||
token.name = payload["name"].strip()[:MAX_NAME_LEN]
|
||||
# Only touch scopes when the caller actually sent them. A partial
|
||||
@@ -189,10 +193,14 @@ def setup_api_token_routes() -> APIRouter:
|
||||
@router.delete("/tokens/{token_id}")
|
||||
def delete_token(request: Request, token_id: str):
|
||||
require_admin(request)
|
||||
current_user = get_current_user(request)
|
||||
with get_db_session() as db:
|
||||
deleted = db.query(ApiToken).filter(ApiToken.id == token_id).delete()
|
||||
if not deleted:
|
||||
token = db.query(ApiToken).filter(ApiToken.id == token_id).first()
|
||||
if not token:
|
||||
raise HTTPException(404, "Token not found")
|
||||
if current_user and token.owner != current_user:
|
||||
raise HTTPException(403, "Not your token")
|
||||
db.delete(token)
|
||||
_invalidate_cache(request)
|
||||
return {"status": "deleted"}
|
||||
|
||||
|
||||
+193
-13
@@ -7,7 +7,13 @@ import asyncio
|
||||
import logging
|
||||
import os
|
||||
|
||||
from core.auth import AuthManager
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
from core.atomic_io import atomic_write_json, atomic_write_text
|
||||
from core.auth import AuthManager, SetAdminResult
|
||||
from src.constants import DEEP_RESEARCH_DIR, MEMORY_FILE, SKILLS_DIR
|
||||
from src.rate_limiter import RateLimiter
|
||||
from src.settings_scrub import scrub_settings
|
||||
from src.settings import (
|
||||
@@ -67,6 +73,11 @@ class DeleteUserRequest(BaseModel):
|
||||
class RenameUserRequest(BaseModel):
|
||||
username: str
|
||||
|
||||
|
||||
class SetAdminRequest(BaseModel):
|
||||
is_admin: bool
|
||||
|
||||
|
||||
class SetOpenRegistrationRequest(BaseModel):
|
||||
enabled: bool
|
||||
|
||||
@@ -291,9 +302,30 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
|
||||
if new_username in auth_manager.users:
|
||||
raise HTTPException(409, "Username already taken")
|
||||
|
||||
# Gate on auth first. Every mutation below is contingent on this
|
||||
# succeeding — doing it last meant a rejected rename (e.g. reserved
|
||||
# username) left file-backed owner fields already rewritten with no
|
||||
# way to roll them back.
|
||||
ok = auth_manager.rename_user(old_username, new_username, user)
|
||||
if not ok:
|
||||
raise HTTPException(400, "Cannot rename user")
|
||||
|
||||
def _rollback_auth_rename() -> bool:
|
||||
# On self-rename the admin session has already moved to the new
|
||||
# username, so the rollback must authenticate as the new user.
|
||||
rollback_user = new_username if user == old_username else user
|
||||
try:
|
||||
return bool(auth_manager.rename_user(new_username, old_username, rollback_user))
|
||||
except Exception as rollback_err:
|
||||
logger.error(
|
||||
"Failed to roll back auth rename %s -> %s after owner migration failure: %s",
|
||||
new_username, old_username, rollback_err,
|
||||
)
|
||||
return False
|
||||
|
||||
# Usernames are ownership keys for user data. Rename the common
|
||||
# owner-scoped DB rows before changing auth so the account keeps
|
||||
# access to its sessions, docs, email accounts, tasks, etc.
|
||||
# owner-scoped DB rows so the account keeps access to its sessions,
|
||||
# docs, email accounts, tasks, etc.
|
||||
try:
|
||||
from sqlalchemy import func
|
||||
from core.database import Base, SessionLocal
|
||||
@@ -316,6 +348,11 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
|
||||
db.close()
|
||||
except Exception as e:
|
||||
logger.error("Failed to rename owner references %s -> %s: %s", old_username, new_username, e)
|
||||
if not _rollback_auth_rename():
|
||||
logger.error(
|
||||
"Auth rename %s -> %s could not be rolled back after owner migration failure",
|
||||
old_username, new_username,
|
||||
)
|
||||
raise HTTPException(500, "Failed to rename user data")
|
||||
|
||||
# Per-user prefs are JSON-backed, not SQL-backed.
|
||||
@@ -335,9 +372,116 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
|
||||
except Exception as e:
|
||||
logger.warning("Failed to rename user prefs %s -> %s: %s", old_username, new_username, e)
|
||||
|
||||
ok = auth_manager.rename_user(old_username, new_username, user)
|
||||
if not ok:
|
||||
raise HTTPException(400, "Cannot rename user")
|
||||
# In-flight deep-research tasks live in the process-local
|
||||
# ResearchHandler registry. They are not covered by the persisted JSON
|
||||
# migration above, but the research routes filter and cancel by this
|
||||
# owner field while the job is running. Do this before sweeping
|
||||
# completed JSON files so a job that finishes during the rename saves
|
||||
# with the new owner or is caught by the disk sweep below.
|
||||
try:
|
||||
rh = getattr(request.app.state, "research_handler", None)
|
||||
rename_owner = getattr(rh, "rename_owner", None)
|
||||
if callable(rename_owner):
|
||||
rename_owner(old_username, new_username)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to rename active research tasks %s -> %s: %s", old_username, new_username, e)
|
||||
|
||||
# deep_research: each completed report is a standalone JSON file with
|
||||
# an `owner` field. research_routes filters by d.get("owner") == user,
|
||||
# so a stale owner makes every report invisible to the renamed user.
|
||||
try:
|
||||
dr_dir = Path(DEEP_RESEARCH_DIR)
|
||||
if dr_dir.is_dir():
|
||||
for p in dr_dir.glob("*.json"):
|
||||
try:
|
||||
d = json.loads(p.read_text(encoding="utf-8"))
|
||||
if str(d.get("owner", "")).strip().lower() == old_username:
|
||||
d["owner"] = new_username
|
||||
atomic_write_json(str(p), d)
|
||||
except Exception as err:
|
||||
logger.warning("Failed to update research owner in %s: %s", p.name, err)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to rename research owner references %s -> %s: %s", old_username, new_username, e)
|
||||
|
||||
# memory.json: a flat JSON array where each entry carries an `owner`
|
||||
# field. memory_manager.load(owner=user) filters on it, so stale
|
||||
# entries disappear from the memory panel.
|
||||
try:
|
||||
if os.path.isfile(MEMORY_FILE):
|
||||
with open(MEMORY_FILE, encoding="utf-8") as fh:
|
||||
entries = json.loads(fh.read())
|
||||
if isinstance(entries, list):
|
||||
changed = False
|
||||
for entry in entries:
|
||||
if isinstance(entry, dict) and str(entry.get("owner", "")).strip().lower() == old_username:
|
||||
entry["owner"] = new_username
|
||||
changed = True
|
||||
if changed:
|
||||
atomic_write_json(MEMORY_FILE, entries)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to rename memory.json owner references %s -> %s: %s", old_username, new_username, e)
|
||||
|
||||
# uploads.json: upload rows use owner metadata for access checks and
|
||||
# owner-prefixed index keys for dedupe. Rename both so attachments keep
|
||||
# resolving after the account username changes.
|
||||
try:
|
||||
upload_handler = getattr(request.app.state, "upload_handler", None)
|
||||
rename_owner = getattr(upload_handler, "rename_owner", None)
|
||||
if callable(rename_owner):
|
||||
rename_owner(old_username, new_username)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to rename upload owner references %s -> %s: %s", old_username, new_username, e)
|
||||
|
||||
# skills: SKILL.md frontmatter carries owner: <username>; the usage
|
||||
# sidecar (_usage.json) keys entries as owner::skill-name. Both must
|
||||
# be updated or the renamed user's Skills panel goes empty.
|
||||
try:
|
||||
skills_root = Path(SKILLS_DIR)
|
||||
if skills_root.is_dir():
|
||||
_owner_re = re.compile(
|
||||
r'(?m)^(owner:\s*)' + re.escape(old_username) + r'\s*$',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
for p in skills_root.rglob("SKILL.md"):
|
||||
try:
|
||||
text = p.read_text(encoding="utf-8")
|
||||
new_text = _owner_re.sub(r'\g<1>' + new_username, text)
|
||||
if new_text != text:
|
||||
atomic_write_text(str(p), new_text)
|
||||
except Exception as err:
|
||||
logger.warning("Failed to update skill owner in %s: %s", p, err)
|
||||
usage_path = skills_root / "_usage.json"
|
||||
if usage_path.is_file():
|
||||
try:
|
||||
usage = json.loads(usage_path.read_text(encoding="utf-8"))
|
||||
if isinstance(usage, dict):
|
||||
new_usage = {}
|
||||
changed = False
|
||||
for k, v in usage.items():
|
||||
owner_part, sep, skill_part = k.partition("::")
|
||||
if sep and owner_part.lower() == old_username:
|
||||
new_usage[new_username + "::" + skill_part] = v
|
||||
changed = True
|
||||
else:
|
||||
new_usage[k] = v
|
||||
if changed:
|
||||
atomic_write_json(str(usage_path), new_usage)
|
||||
except Exception as err:
|
||||
logger.warning("Failed to update skills usage keys %s -> %s: %s", old_username, new_username, err)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to rename skills owner references %s -> %s: %s", old_username, new_username, e)
|
||||
|
||||
# The in-memory session cache (session_manager.sessions) stores each
|
||||
# session's owner at load time. Without this patch the renamed user's
|
||||
# sessions are invisible on the next /api/sessions call because
|
||||
# get_sessions_for_user does an exact `s.owner == username` comparison
|
||||
# against stale in-memory values.
|
||||
sm = getattr(request.app.state, "session_manager", None)
|
||||
if sm is not None:
|
||||
for sess in list(getattr(sm, "sessions", {}).values()):
|
||||
if str(getattr(sess, "owner", None) or "").strip().lower() == old_username:
|
||||
sess.owner = new_username
|
||||
|
||||
# The owner-rename loop above updated ApiToken.owner in the DB, but the
|
||||
# bearer-token cache still maps each token to the OLD owner. Without
|
||||
# refreshing it, the renamed user's API tokens resolve to the old (now
|
||||
@@ -348,6 +492,31 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
|
||||
invalidator()
|
||||
return {"ok": True, "username": new_username, "renamed_self": old_username == user}
|
||||
|
||||
@router.put("/users/{username}/admin")
|
||||
async def set_user_admin(username: str, body: SetAdminRequest, request: Request):
|
||||
"""Promote/demote a user to/from admin. Admin only.
|
||||
|
||||
The last remaining admin can't be demoted (no lockout). Self-demotion
|
||||
is allowed while another admin exists; the `self` flag tells the UI to
|
||||
reload the acting user into the normal-user view.
|
||||
"""
|
||||
user = _get_current_user(request)
|
||||
if not user or not auth_manager.is_admin(user):
|
||||
raise HTTPException(403, "Admin only")
|
||||
result = auth_manager.set_admin(username, body.is_admin, user)
|
||||
if result is SetAdminResult.USER_NOT_FOUND:
|
||||
raise HTTPException(404, "User not found")
|
||||
if result is SetAdminResult.NOT_AUTHORIZED:
|
||||
raise HTTPException(403, "Admin only")
|
||||
if result is SetAdminResult.LAST_ADMIN:
|
||||
raise HTTPException(400, "Cannot demote the last admin")
|
||||
target = (username or "").strip().lower()
|
||||
return {
|
||||
"ok": True,
|
||||
"is_admin": body.is_admin,
|
||||
"self": target == (user or "").strip().lower(),
|
||||
}
|
||||
|
||||
@router.post("/signup-toggle", deprecated=True)
|
||||
async def toggle_signup(request: Request):
|
||||
"""
|
||||
@@ -378,7 +547,23 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
|
||||
user = _get_current_user(request)
|
||||
if not user or not auth_manager.is_admin(user):
|
||||
raise HTTPException(403, "Admin only")
|
||||
ok = auth_manager.delete_user(body.username, user)
|
||||
|
||||
def _invalidate_api_token_cache():
|
||||
try:
|
||||
invalidator = getattr(request.app.state, "invalidate_token_cache", None)
|
||||
if invalidator:
|
||||
invalidator()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
ok = auth_manager.delete_user(body.username, user)
|
||||
except Exception:
|
||||
# delete_user can touch ApiToken rows before a later auth-store write
|
||||
# fails. Dirty the bearer cache anyway so a partial token purge does
|
||||
# not leave already-cached tokens authenticating until restart.
|
||||
_invalidate_api_token_cache()
|
||||
raise
|
||||
if not ok:
|
||||
raise HTTPException(400, "Cannot delete user")
|
||||
# delete_user removes the user's ApiToken rows, but the bearer-auth
|
||||
@@ -386,12 +571,7 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
|
||||
# rebuilds when flagged dirty. Without this, a deleted user's already
|
||||
# cached token keeps authenticating until some other token op or a
|
||||
# restart clears the cache. Mirror what the token routes do.
|
||||
try:
|
||||
invalidator = getattr(request.app.state, "invalidate_token_cache", None)
|
||||
if invalidator:
|
||||
invalidator()
|
||||
except Exception:
|
||||
pass
|
||||
_invalidate_api_token_cache()
|
||||
return {"ok": True}
|
||||
|
||||
# ---- Feature visibility (admin-managed) ----
|
||||
|
||||
@@ -101,11 +101,17 @@ def setup_backup_routes(memory_manager, preset_manager, skills_manager) -> APIRo
|
||||
# ── Skills ──
|
||||
if "skills" in body and isinstance(body["skills"], list):
|
||||
existing = skills_manager.load_all()
|
||||
existing_names = {s.get("name") for s in existing if s.get("name")}
|
||||
existing_ids = {s.get("id") for s in existing if s.get("id")}
|
||||
# Dedup against THIS user's own skills only. Using every tenant's
|
||||
# rows (load_all) meant a skill whose id/name/title matched any
|
||||
# other user's was silently skipped, so the importing user lost
|
||||
# their own data — same cross-tenant bug fixed for memories above.
|
||||
# The full store is still saved back below.
|
||||
own = [s for s in existing if s.get("owner") == user]
|
||||
existing_names = {s.get("name") for s in own if s.get("name")}
|
||||
existing_ids = {s.get("id") for s in own if s.get("id")}
|
||||
existing_titles = {
|
||||
(s.get("title") or s.get("description") or "").strip().lower()
|
||||
for s in existing
|
||||
for s in own
|
||||
}
|
||||
added = 0
|
||||
for skill in body["skills"]:
|
||||
|
||||
+71
-53
@@ -11,7 +11,7 @@ from pydantic import BaseModel
|
||||
from sqlalchemy import or_, and_
|
||||
from dateutil.rrule import rrulestr
|
||||
|
||||
from core.database import SessionLocal, CalendarCal, CalendarEvent
|
||||
from core.database import SessionLocal, CalendarCal, CalendarDeletedEvent, CalendarEvent
|
||||
from src.auth_helpers import require_user
|
||||
from src.upload_limits import read_upload_limited, ICS_MAX_BYTES
|
||||
|
||||
@@ -126,6 +126,54 @@ def _resolve_base_uid(uid: str) -> str:
|
||||
raise ValueError("malformed compound UID: missing base before ::")
|
||||
return base
|
||||
|
||||
|
||||
async def _push_caldav_event_after_commit(owner: str, uid: str, action: str):
|
||||
"""Best-effort CalDAV write-through. Local writes stay authoritative if
|
||||
the remote server is unreachable; pending flags let /sync retry later."""
|
||||
try:
|
||||
result = {"ok": True}
|
||||
if action == "create":
|
||||
from src.caldav_sync import push_event_create
|
||||
result = await push_event_create(owner, uid)
|
||||
elif action == "update":
|
||||
from src.caldav_sync import push_event_update
|
||||
result = await push_event_update(owner, uid)
|
||||
elif action == "delete":
|
||||
from src.caldav_sync import push_event_delete
|
||||
result = await push_event_delete(owner, uid)
|
||||
if result and not result.get("ok") and not result.get("skipped"):
|
||||
raise RuntimeError(result.get("error") or result)
|
||||
except Exception as e:
|
||||
logger.warning("CalDAV %s push failed for uid=%s: %s", action, uid, e)
|
||||
if action in {"create", "update"}:
|
||||
db = SessionLocal()
|
||||
try:
|
||||
ev = _get_or_404_event(db, uid, owner)
|
||||
ev.caldav_sync_pending = action
|
||||
db.commit()
|
||||
except Exception:
|
||||
db.rollback()
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
def _record_caldav_delete_tombstone(db, ev: CalendarEvent, owner: str) -> None:
|
||||
if not (ev.calendar and ev.calendar.source == "caldav"):
|
||||
return
|
||||
tombstone = db.query(CalendarDeletedEvent).filter(
|
||||
CalendarDeletedEvent.uid == ev.uid,
|
||||
CalendarDeletedEvent.owner == owner,
|
||||
).first()
|
||||
if not tombstone:
|
||||
tombstone = CalendarDeletedEvent(uid=ev.uid, owner=owner)
|
||||
db.add(tombstone)
|
||||
tombstone.calendar_id = ev.calendar_id
|
||||
tombstone.remote_href = ev.remote_href
|
||||
tombstone.remote_etag = ev.remote_etag
|
||||
tombstone.caldav_base_url = getattr(ev.calendar, "caldav_base_url", None)
|
||||
tombstone.summary = ev.summary or ""
|
||||
tombstone.last_error = None
|
||||
|
||||
# ── Pydantic models ──
|
||||
|
||||
class EventCreate(BaseModel):
|
||||
@@ -843,36 +891,35 @@ def setup_calendar_routes() -> APIRouter:
|
||||
return {"ok": False, "error": str(e)[:200]}
|
||||
|
||||
@router.post("/sync")
|
||||
async def sync_caldav_endpoint(request: Request):
|
||||
"""Pull events from the configured CalDAV server into local DB.
|
||||
async def sync_caldav_endpoint(request: Request, direction: str = "pull"):
|
||||
"""Sync events with the configured CalDAV server.
|
||||
Returns counts + any per-calendar errors. Called by the frontend
|
||||
on calendar open and by the periodic scheduler loop."""
|
||||
owner = _require_user(request)
|
||||
from src.caldav_sync import sync_caldav
|
||||
return await sync_caldav(owner)
|
||||
from src.caldav_sync import sync_caldav_direction
|
||||
return await sync_caldav_direction(owner, direction)
|
||||
|
||||
|
||||
@router.delete("/calendars/{cal_id}")
|
||||
async def delete_calendar(cal_id: str, request: Request):
|
||||
async def delete_calendar(request: Request, cal_id: str):
|
||||
owner = _require_user(request)
|
||||
db = SessionLocal()
|
||||
try:
|
||||
cal = db.query(CalendarCal).filter(
|
||||
CalendarCal.id == cal_id,
|
||||
CalendarCal.owner == owner,
|
||||
).first()
|
||||
if not cal:
|
||||
raise HTTPException(404, "Calendar not found")
|
||||
cal = _get_or_404_calendar(db, cal_id, owner)
|
||||
db.query(CalendarEvent).filter(CalendarEvent.calendar_id == cal_id).delete()
|
||||
db.delete(cal)
|
||||
db.commit()
|
||||
return {"ok": True}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
logger.error("Failed to delete calendar %s: %s", cal_id, e)
|
||||
raise HTTPException(500, "Failed to delete calendar")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
@router.get("/calendars")
|
||||
async def list_calendars(request: Request):
|
||||
owner = _require_user(request)
|
||||
@@ -1003,19 +1050,12 @@ def setup_calendar_routes() -> APIRouter:
|
||||
is_utc=_is_utc and not data.all_day,
|
||||
rrule=data.rrule or "",
|
||||
color=data.color or None,
|
||||
caldav_sync_pending="create" if cal.source == "caldav" else None,
|
||||
)
|
||||
db.add(ev)
|
||||
db.commit()
|
||||
if cal.source == "caldav":
|
||||
# Push the new event to the remote so it appears on the user's
|
||||
# other devices — the sync is otherwise pull-only (#800).
|
||||
from src.caldav_writeback import writeback_event
|
||||
await writeback_event(owner, cal.source, cal.id, {
|
||||
"uid": uid, "summary": data.summary, "description": data.description,
|
||||
"location": data.location, "dtstart": dtstart, "dtend": dtend,
|
||||
"all_day": data.all_day, "is_utc": _is_utc and not data.all_day,
|
||||
"rrule": data.rrule or "",
|
||||
})
|
||||
await _push_caldav_event_after_commit(owner, uid, "create")
|
||||
return {"ok": True, "uid": uid}
|
||||
except HTTPException:
|
||||
raise
|
||||
@@ -1061,15 +1101,12 @@ def setup_calendar_routes() -> APIRouter:
|
||||
ev.rrule = data.rrule
|
||||
if data.color is not None:
|
||||
ev.color = data.color if data.color else None
|
||||
is_caldav = ev.calendar and ev.calendar.source == "caldav"
|
||||
if is_caldav:
|
||||
ev.caldav_sync_pending = "update"
|
||||
db.commit()
|
||||
cal = db.query(CalendarCal).filter(CalendarCal.id == ev.calendar_id).first()
|
||||
if cal and cal.source == "caldav":
|
||||
from src.caldav_writeback import writeback_event
|
||||
await writeback_event(owner, cal.source, cal.id, {
|
||||
"uid": ev.uid, "summary": ev.summary, "description": ev.description,
|
||||
"location": ev.location, "dtstart": ev.dtstart, "dtend": ev.dtend,
|
||||
"all_day": ev.all_day, "is_utc": ev.is_utc, "rrule": ev.rrule or "",
|
||||
})
|
||||
if is_caldav:
|
||||
await _push_caldav_event_after_commit(owner, base_uid, "update")
|
||||
return {"ok": True}
|
||||
except HTTPException:
|
||||
raise
|
||||
@@ -1090,15 +1127,13 @@ def setup_calendar_routes() -> APIRouter:
|
||||
db = SessionLocal()
|
||||
try:
|
||||
ev = _get_or_404_event(db, base_uid, owner)
|
||||
# Capture what the remote push needs BEFORE the row is gone.
|
||||
_cal = db.query(CalendarCal).filter(CalendarCal.id == ev.calendar_id).first()
|
||||
_is_caldav = bool(_cal and _cal.source == "caldav")
|
||||
_cal_id, _ev_uid = ev.calendar_id, ev.uid
|
||||
is_caldav = ev.calendar and ev.calendar.source == "caldav"
|
||||
if is_caldav:
|
||||
_record_caldav_delete_tombstone(db, ev, owner)
|
||||
db.delete(ev)
|
||||
db.commit()
|
||||
if _is_caldav:
|
||||
from src.caldav_writeback import writeback_event
|
||||
await writeback_event(owner, "caldav", _cal_id, {"uid": _ev_uid}, delete=True)
|
||||
if is_caldav:
|
||||
await _push_caldav_event_after_commit(owner, base_uid, "delete")
|
||||
return {"ok": True}
|
||||
except HTTPException:
|
||||
raise
|
||||
@@ -1152,23 +1187,6 @@ def setup_calendar_routes() -> APIRouter:
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
@router.delete("/calendars/{cal_id}")
|
||||
async def delete_calendar(request: Request, cal_id: str):
|
||||
owner = _require_user(request)
|
||||
db = SessionLocal()
|
||||
try:
|
||||
cal = _get_or_404_calendar(db, cal_id, owner)
|
||||
db.query(CalendarEvent).filter(CalendarEvent.calendar_id == cal_id).delete()
|
||||
db.delete(cal)
|
||||
db.commit()
|
||||
return {"ok": True}
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
return {"error": str(e)}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
# Hard cap on ICS upload (ICS_MAX_BYTES, default 10 MB). Loading the whole
|
||||
# file into memory is unavoidable with python-icalendar, so an unbounded
|
||||
|
||||
+135
-10
@@ -159,9 +159,17 @@ async def auto_name_session(session_manager, sess):
|
||||
return
|
||||
|
||||
owner = getattr(sess, "owner", None)
|
||||
t_url, t_model, t_headers = resolve_task_endpoint(
|
||||
sess.endpoint_url, sess.model, sess.headers, owner=owner,
|
||||
)
|
||||
t_url, t_model, t_headers = resolve_task_endpoint(owner=owner)
|
||||
if not t_model:
|
||||
# If no task/utility model is configured at all, fall back to
|
||||
# the session's own model so auto-naming still works even on
|
||||
# minimal setups.
|
||||
from src.endpoint_resolver import resolve_endpoint
|
||||
_fallback = resolve_endpoint("default", owner=owner)
|
||||
if _fallback and _fallback[1]:
|
||||
t_url, t_model, t_headers = _fallback
|
||||
else:
|
||||
t_url, t_model, t_headers = sess.endpoint_url, sess.model, sess.headers
|
||||
if not t_model:
|
||||
logger.debug("[auto-name] No model provided, skipping")
|
||||
return
|
||||
@@ -497,6 +505,29 @@ def _normalize_model_id_from_cache(sess) -> Optional[str]:
|
||||
return None
|
||||
|
||||
|
||||
def _session_is_research_spinoff(sess) -> bool:
|
||||
"""True if this session was created via research "Discuss" spin-off.
|
||||
|
||||
Detected by the primer system message the spin-off endpoint seeds into
|
||||
history (metadata ``research_spinoff_from``). Such sessions are grounded
|
||||
on the seeded report, so global memory + personal-doc RAG injection is
|
||||
suppressed for them (the report is the sole knowledge base). Handles both
|
||||
ChatMessage objects and plain dicts.
|
||||
"""
|
||||
for m in getattr(sess, "history", []) or []:
|
||||
role = getattr(m, "role", None)
|
||||
if role is None and isinstance(m, dict):
|
||||
role = m.get("role")
|
||||
if role != "system":
|
||||
continue
|
||||
md = getattr(m, "metadata", None)
|
||||
if md is None and isinstance(m, dict):
|
||||
md = m.get("metadata")
|
||||
if (md or {}).get("research_spinoff_from"):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
async def build_chat_context(
|
||||
sess,
|
||||
request,
|
||||
@@ -562,9 +593,17 @@ async def build_chat_context(
|
||||
mem_enabled, user, incognito, no_memory, uprefs.get("memory_enabled", "NOT_SET"),
|
||||
)
|
||||
|
||||
# Research-spinoff ("Discuss") sessions are grounded on the seeded report:
|
||||
# the primer system message IS the knowledge base. Injecting global memory
|
||||
# or personal-doc RAG on every turn pulls in keyword-matched but off-topic
|
||||
# facts ("wrong data") and competes with the report, so suppress both here.
|
||||
is_research_spinoff = _session_is_research_spinoff(sess)
|
||||
if is_research_spinoff:
|
||||
mem_enabled = False
|
||||
|
||||
# Use RAG?
|
||||
use_rag_val = (str(use_rag).lower() != "false") if use_rag is not None else True
|
||||
if incognito or not allow_tool_preprocessing:
|
||||
if incognito or not allow_tool_preprocessing or is_research_spinoff:
|
||||
use_rag_val = False
|
||||
|
||||
# If pre-fetched search context was provided (compare mode), skip live web search
|
||||
@@ -587,7 +626,7 @@ async def build_chat_context(
|
||||
incognito=incognito,
|
||||
use_skills=skills_enabled,
|
||||
)
|
||||
if use_rag is not None:
|
||||
if use_rag is not None or is_research_spinoff:
|
||||
_preface_kwargs["use_rag"] = use_rag_val
|
||||
preface, rag_sources, web_sources = chat_processor.build_context_preface(**_preface_kwargs)
|
||||
|
||||
@@ -615,6 +654,26 @@ async def build_chat_context(
|
||||
# Build messages
|
||||
messages = preface + sess.get_context_messages()
|
||||
|
||||
# Current date/time — injected as a standalone *user*-role context message
|
||||
# placed immediately before the latest user turn, NOT folded into the
|
||||
# system prompt. Its text changes every minute, and local OpenAI-compatible
|
||||
# backends (llama.cpp / LM Studio) key their KV-cache prefix off the
|
||||
# system message byte-for-byte; mixing ever-changing timestamp text into
|
||||
# it would invalidate the cached prefix on every request (issue #2927).
|
||||
# Placing it at the tail also keeps it out of the stable
|
||||
# preface+history prefix, so that prefix stays byte-identical turn over
|
||||
# turn (modulo the genuinely new history entries) and the cache survives.
|
||||
if not agent_mode:
|
||||
try:
|
||||
from src.user_time import current_datetime_context_message
|
||||
_dt_msg = current_datetime_context_message()
|
||||
if messages and messages[-1].get("role") == "user":
|
||||
messages.insert(len(messages) - 1, _dt_msg)
|
||||
else:
|
||||
messages.append(_dt_msg)
|
||||
except Exception:
|
||||
logger.debug("Failed to add current date/time context", exc_info=True)
|
||||
|
||||
# Auto-compact
|
||||
messages, context_length, was_compacted = await maybe_compact(
|
||||
sess, sess.endpoint_url, sess.model, messages, sess.headers, owner=user,
|
||||
@@ -911,6 +970,54 @@ def save_assistant_response(
|
||||
return None
|
||||
|
||||
|
||||
def _is_session_stream_active(session_id: str) -> bool:
|
||||
"""Best-effort check for "is a chat completion currently streaming for
|
||||
this session?" — used to keep background extraction from overlapping a
|
||||
main completion and competing for the local backend's processing slots
|
||||
(issue #2927). Lazily imports the route module's live registry to avoid
|
||||
a circular import (chat_routes imports this module at load time)."""
|
||||
try:
|
||||
from routes import chat_routes as _cr
|
||||
return session_id in getattr(_cr, "_active_streams", {})
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
async def _run_extraction_jobs_sequentially(session_id: str, jobs: list, max_wait_s: float = 120.0):
|
||||
"""Run queued background-extraction coroutines one at a time, only once
|
||||
no chat completion is actively streaming for this session.
|
||||
|
||||
As diagnosed in issue #2927, firing memory/skill extraction concurrently
|
||||
with the main chat completion (or with each other) makes them compete for
|
||||
the local backend's limited processing slots, evicting the main
|
||||
conversation's cached KV-cache checkpoint and forcing a full prompt
|
||||
re-evaluation on the next turn. Waiting for the stream to go idle and then
|
||||
running the jobs strictly in sequence keeps at most one "side" request in
|
||||
flight against the backend at any time, and never alongside the user's
|
||||
own conversation.
|
||||
"""
|
||||
# Wait for the triggering turn's own stream to finish winding down (it
|
||||
# almost always already has by the time this task gets scheduled — this
|
||||
# is a small safety margin, not the primary mechanism).
|
||||
waited = 0.0
|
||||
poll = 0.25
|
||||
while _is_session_stream_active(session_id) and waited < max_wait_s:
|
||||
await asyncio.sleep(poll)
|
||||
waited += poll
|
||||
|
||||
for name, job in jobs:
|
||||
# Re-check before each job: a fast follow-up message from the user
|
||||
# may have started a new stream for this session while we waited.
|
||||
waited = 0.0
|
||||
while _is_session_stream_active(session_id) and waited < max_wait_s:
|
||||
await asyncio.sleep(poll)
|
||||
waited += poll
|
||||
try:
|
||||
await job
|
||||
except Exception:
|
||||
logger.warning("[bg-extract] %s extraction job failed for session %s", name, session_id, exc_info=True)
|
||||
|
||||
|
||||
def run_post_response_tasks(
|
||||
sess,
|
||||
session_manager,
|
||||
@@ -933,7 +1040,22 @@ def run_post_response_tasks(
|
||||
extract_skills: bool = True,
|
||||
allow_background_extraction: bool = True,
|
||||
):
|
||||
"""Fire background tasks after a completed response: memory extraction, webhooks, auto-name, skill extraction."""
|
||||
"""Fire background tasks after a completed response: memory extraction, webhooks, auto-name, skill extraction.
|
||||
|
||||
Memory/skill extraction are queued to run *sequentially*, after the main
|
||||
completion stream for this session has fully wound down — never
|
||||
concurrently with it or with each other. As diagnosed in issue #2927,
|
||||
firing these "side" LLM calls in parallel with the main chat completion
|
||||
makes them compete for the local backend's limited processing slots
|
||||
(llama.cpp defaults to 4), evicting the main conversation's cached
|
||||
checkpoint and forcing a full prompt re-evaluation on the next turn. By
|
||||
the time this function runs the main response is already saved, but the
|
||||
extraction calls themselves are still async — queuing them through
|
||||
``_queue_background_extraction`` keeps them from overlapping the *next*
|
||||
turn's request too.
|
||||
"""
|
||||
_extraction_jobs: list = []
|
||||
|
||||
# Memory extraction — only every 4th message pair to avoid excess LLM calls
|
||||
_msg_count = len(sess.history) if hasattr(sess, 'history') else 0
|
||||
_should_extract = (_msg_count >= 4) and (_msg_count % 4 == 0)
|
||||
@@ -943,10 +1065,10 @@ def run_post_response_tasks(
|
||||
t_url, t_model, t_headers = resolve_task_endpoint(
|
||||
sess.endpoint_url, sess.model, sess.headers, owner=owner,
|
||||
)
|
||||
asyncio.create_task(extract_and_store(
|
||||
_extraction_jobs.append(("memory", extract_and_store(
|
||||
sess, memory_manager, memory_vector,
|
||||
t_url, t_model, t_headers,
|
||||
))
|
||||
)))
|
||||
|
||||
# Skill extraction from complex agent runs. Only when the user actually
|
||||
# chose agent mode — not a chat we auto-escalated for a notes/calendar
|
||||
@@ -982,12 +1104,15 @@ def run_post_response_tasks(
|
||||
sess.endpoint_url, sess.model, sess.headers, owner=owner,
|
||||
)
|
||||
logger.debug("[skill-extract] dispatching extractor (model=%s)", s_model)
|
||||
asyncio.create_task(maybe_extract_skill(
|
||||
_extraction_jobs.append(("skill", maybe_extract_skill(
|
||||
sess, skills_manager,
|
||||
s_url, s_model, s_headers,
|
||||
agent_rounds, agent_tool_calls,
|
||||
owner=owner,
|
||||
))
|
||||
)))
|
||||
|
||||
if _extraction_jobs:
|
||||
asyncio.create_task(_run_extraction_jobs_sequentially(session_id, _extraction_jobs))
|
||||
|
||||
# Token accumulation
|
||||
if last_metrics:
|
||||
|
||||
+58
-7
@@ -62,6 +62,33 @@ def _stream_set(session_id: str, **fields) -> None:
|
||||
rec.update(fields)
|
||||
|
||||
|
||||
def _resolve_request_workspace(request, raw_value) -> tuple:
|
||||
"""Resolve the posted workspace for this request: (workspace, rejected).
|
||||
|
||||
Privilege is checked BEFORE the path ever touches the filesystem. Only
|
||||
admin/single-user callers can use the workspace-backed file/shell tools,
|
||||
so only they get vet_workspace() and the workspace_rejected signal. For
|
||||
any other caller the submitted value is dropped uniformly, with no vetting
|
||||
and no event: otherwise the presence/absence of workspace_rejected would
|
||||
let a non-admin chat caller probe which host paths exist.
|
||||
|
||||
vet_workspace rejects non-directories, sensitive roots (.ssh, .gnupg,
|
||||
...), and filesystem roots; on rejection there is no confinement and the
|
||||
default tool-path allowlist applies. The rejected value is surfaced so the
|
||||
stream can tell an admin client (which believes a workspace is active)
|
||||
that it was dropped.
|
||||
"""
|
||||
requested = (raw_value or "").strip()
|
||||
if not requested:
|
||||
return "", ""
|
||||
from src.tool_security import owner_is_admin_or_single_user
|
||||
if not owner_is_admin_or_single_user(get_current_user(request)):
|
||||
return "", ""
|
||||
from src.tool_execution import vet_workspace
|
||||
workspace = vet_workspace(requested) or ""
|
||||
return workspace, (requested if not workspace else "")
|
||||
|
||||
|
||||
def _session_url_matches_endpoint(session_url: str, endpoint_base: str) -> bool:
|
||||
if not session_url or not endpoint_base:
|
||||
return False
|
||||
@@ -400,6 +427,7 @@ def setup_chat_routes(
|
||||
temperature=ctx.preset.temperature,
|
||||
max_tokens=ctx.preset.max_tokens,
|
||||
prompt_type=preset_id,
|
||||
session_id=session,
|
||||
)
|
||||
_clean_reply, _clean_md = clean_thinking_for_save(reply, {"model": sess.model})
|
||||
sess.add_message(ChatMessage("assistant", _clean_reply, metadata=_clean_md))
|
||||
@@ -446,8 +474,11 @@ def setup_chat_routes(
|
||||
use_research = form_data.get("use_research")
|
||||
time_filter = form_data.get("time_filter")
|
||||
preset_id = form_data.get("preset_id")
|
||||
allow_bash = form_data.get("allow_bash")
|
||||
allow_web_search = form_data.get("allow_web_search")
|
||||
# Issue #3229: API callers send JSON, not FormData. Read from the
|
||||
# JSON body as fallback so callers who send {"allow_bash": true}
|
||||
# actually get bash enabled.
|
||||
allow_bash = form_data.get("allow_bash") or (body or {}).get("allow_bash")
|
||||
allow_web_search = form_data.get("allow_web_search") or (body or {}).get("allow_web_search")
|
||||
use_rag = form_data.get("use_rag")
|
||||
search_context = form_data.get("search_context") # pre-fetched web search results (compare mode)
|
||||
compare_mode = str(form_data.get("compare_mode", "")).lower() == "true"
|
||||
@@ -456,7 +487,10 @@ def setup_chat_routes(
|
||||
# manual form posts that still send plan_mode=true.
|
||||
plan_mode = False
|
||||
chat_mode = str(form_data.get("mode", "")).lower() # 'chat' or 'agent'
|
||||
workspace = ""
|
||||
# Workspace: confine the agent's file/shell tools to this folder.
|
||||
workspace, workspace_rejected = _resolve_request_workspace(
|
||||
request, form_data.get("workspace")
|
||||
)
|
||||
# Plan mode is a modifier on agent mode — it only makes sense with tools.
|
||||
if plan_mode:
|
||||
chat_mode = "agent"
|
||||
@@ -707,7 +741,7 @@ def setup_chat_routes(
|
||||
# leak a doc that belongs to a DIFFERENT session.
|
||||
if not active_doc:
|
||||
try:
|
||||
from src.tool_implementations import get_active_document
|
||||
from src.agent_tools.document_tools import get_active_document
|
||||
_mem_id = get_active_document()
|
||||
if _mem_id:
|
||||
_mem_q = _doc_db.query(DBDocument).filter(DBDocument.id == _mem_id)
|
||||
@@ -728,9 +762,18 @@ def setup_chat_routes(
|
||||
|
||||
# Build disabled-tools set from frontend toggles + user privileges
|
||||
disabled_tools = set()
|
||||
if str(allow_bash).lower() != "true":
|
||||
# Only disable bash/web_search when the caller *explicitly* set them
|
||||
# to a falsy value. When unset (None), defer to per-user privilege
|
||||
# checks below — this lets admins with can_use_bash=True use bash
|
||||
# by default without having to send allow_bash in every request.
|
||||
if allow_bash is not None and str(allow_bash).lower() != "true":
|
||||
disabled_tools.add("bash")
|
||||
if str(allow_web_search).lower() != "true":
|
||||
_explicit_web_intent = bool(_tool_intent and _tool_intent.category == "web")
|
||||
if (
|
||||
allow_web_search is not None
|
||||
and str(allow_web_search).lower() != "true"
|
||||
and not _explicit_web_intent
|
||||
):
|
||||
disabled_tools.add("web_search")
|
||||
disabled_tools.add("web_fetch")
|
||||
|
||||
@@ -848,6 +891,13 @@ def setup_chat_routes(
|
||||
# Register active stream for partial-save safety net
|
||||
_active_streams[session] = {"status": "streaming", "partial": "", "query": message, "is_research": effective_do_research, "mode": _effective_mode}
|
||||
|
||||
# The client sent a workspace the server refused to bind (deleted
|
||||
# folder, file path, sensitive dir, filesystem root). Tell it up
|
||||
# front so the UI can clear the pill instead of displaying a
|
||||
# confinement that is not actually in effect.
|
||||
if workspace_rejected:
|
||||
yield f"data: {json.dumps({'type': 'workspace_rejected', 'data': {'path': workspace_rejected}})}\n\n"
|
||||
|
||||
if ctx.preprocessed.attachment_meta:
|
||||
yield f"data: {json.dumps({'type': 'attachments', 'data': ctx.preprocessed.attachment_meta})}\n\n"
|
||||
|
||||
@@ -1076,6 +1126,7 @@ def setup_chat_routes(
|
||||
max_tokens=ctx.preset.max_tokens,
|
||||
prompt_type=preset_id,
|
||||
tools=None,
|
||||
session_id=session,
|
||||
):
|
||||
if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
|
||||
try:
|
||||
@@ -1223,9 +1274,9 @@ def setup_chat_routes(
|
||||
tool_policy=tool_policy,
|
||||
owner=_user,
|
||||
fallbacks=_fallback_candidates,
|
||||
workspace=None,
|
||||
plan_mode=plan_mode,
|
||||
approved_plan=approved_plan or None,
|
||||
workspace=workspace or None,
|
||||
):
|
||||
if chunk.startswith("data: ") and not chunk.startswith("data: [DONE]"):
|
||||
try:
|
||||
|
||||
+18
-6
@@ -18,6 +18,7 @@ from fastapi.responses import StreamingResponse
|
||||
from src.auth_helpers import require_authenticated_request, require_user
|
||||
from src.tool_implementations import do_manage_notes
|
||||
from src.constants import COOKBOOK_STATE_FILE
|
||||
from routes._validators import validate_remote_host, validate_ssh_port
|
||||
|
||||
|
||||
COOKBOOK_READ_SCOPES = {"cookbook:read", "cookbook:launch"}
|
||||
@@ -36,6 +37,21 @@ DOCS_WRITE_SCOPES = {"documents:write"}
|
||||
WRITE_ACTIONS = {"add", "create", "new", "save", "remind", "update", "delete", "toggle_item", "remove", "remove_item"}
|
||||
|
||||
|
||||
def _ssh_prefix_for_task(task: dict) -> tuple[str, str]:
|
||||
"""Resolve a cookbook task's stored SSH target into ``(host, port_flag)``.
|
||||
|
||||
``host`` is ``""`` for a local task. ``remoteHost`` / ``sshPort`` come from
|
||||
cookbook_state.json and get interpolated into an ``ssh`` command string, so
|
||||
validate them the same way the cookbook routes do. A tampered entry with
|
||||
shell metacharacters in ``remoteHost`` is rejected with 400 rather than
|
||||
injected.
|
||||
"""
|
||||
host = validate_remote_host((task.get("remoteHost") or "").strip() or None) or ""
|
||||
ssh_port = validate_ssh_port((task.get("sshPort") or "").strip() or None) or ""
|
||||
port_flag = f"-p {ssh_port} " if ssh_port and ssh_port != "22" else ""
|
||||
return host, port_flag
|
||||
|
||||
|
||||
async def _as_owner(request: Request, owner: str, fn, *args, **kwargs):
|
||||
"""Run an existing route handler with request.state.current_user temporarily
|
||||
set to ``owner`` so its internal get_current_user/require_user calls see
|
||||
@@ -550,8 +566,7 @@ def setup_codex_routes(
|
||||
task = next((t for t in tasks if t.get("sessionId") == session_id), None)
|
||||
if task is None:
|
||||
raise HTTPException(404, "task not found")
|
||||
host = (task.get("remoteHost") or "").strip()
|
||||
ssh_port = (task.get("sshPort") or "").strip()
|
||||
host, port_flag = _ssh_prefix_for_task(task)
|
||||
# Prefer the persisted log file over the tmux pane. The pane gets
|
||||
# overwritten by the post-crash neofetch banner + bash prompt the
|
||||
# moment vllm exits; the log file is the raw stdout/stderr and
|
||||
@@ -563,7 +578,6 @@ def setup_codex_routes(
|
||||
f"else tmux capture-pane -t {session_id} -p -S -{tail}; fi"
|
||||
)
|
||||
if host:
|
||||
port_flag = f"-p {ssh_port} " if ssh_port and ssh_port != "22" else ""
|
||||
import shlex
|
||||
cmd = f"ssh {port_flag}{host} {shlex.quote(inner)}"
|
||||
else:
|
||||
@@ -625,10 +639,8 @@ def setup_codex_routes(
|
||||
state = _read_cookbook_state()
|
||||
tasks = state.get("tasks") or []
|
||||
task = next((t for t in tasks if t.get("sessionId") == session_id), None)
|
||||
host = ((task or {}).get("remoteHost") or "").strip()
|
||||
ssh_port = ((task or {}).get("sshPort") or "").strip()
|
||||
host, port_flag = _ssh_prefix_for_task(task or {})
|
||||
if host:
|
||||
port_flag = f"-p {ssh_port} " if ssh_port and ssh_port != "22" else ""
|
||||
cmd = f"ssh {port_flag}{host} \"tmux kill-session -t {session_id}\""
|
||||
else:
|
||||
cmd = f"tmux kill-session -t {session_id}"
|
||||
|
||||
@@ -45,10 +45,14 @@ def _save_settings(settings):
|
||||
def _get_carddav_config():
|
||||
import os
|
||||
settings = _load_settings()
|
||||
password = settings.get("carddav_password", os.environ.get("CARDDAV_PASSWORD", ""))
|
||||
if password and "carddav_password" in settings:
|
||||
from src.secret_storage import decrypt
|
||||
password = decrypt(password)
|
||||
return {
|
||||
"url": settings.get("carddav_url", os.environ.get("CARDDAV_URL", "")),
|
||||
"username": settings.get("carddav_username", os.environ.get("CARDDAV_USERNAME", "")),
|
||||
"password": settings.get("carddav_password", os.environ.get("CARDDAV_PASSWORD", "")),
|
||||
"password": password,
|
||||
}
|
||||
|
||||
|
||||
@@ -769,8 +773,11 @@ def setup_contacts_routes():
|
||||
@router.post("/import")
|
||||
async def import_vcf(data: dict, _admin: str = Depends(require_admin)):
|
||||
"""Import contacts from .vcf or CSV. Body: {"vcf": "..."} or {"csv": "..."}."""
|
||||
text = data.get("vcf") or data.get("text") or ""
|
||||
csv_text = data.get("csv") or ""
|
||||
# Coerce defensively: a non-string vcf/text/csv (e.g. a number or list
|
||||
# in the JSON body) would otherwise reach .strip() and 500 with an
|
||||
# AttributeError instead of degrading to a clean "no data" response.
|
||||
text = str(data.get("vcf") or data.get("text") or "")
|
||||
csv_text = str(data.get("csv") or "")
|
||||
if text.strip():
|
||||
if "BEGIN:VCARD" not in text.upper():
|
||||
return {"success": False, "error": "No vCard data found"}
|
||||
@@ -822,7 +829,11 @@ def setup_contacts_routes():
|
||||
except ValueError as e:
|
||||
raise HTTPException(400, str(e))
|
||||
else:
|
||||
settings[key] = data[key]
|
||||
value = data[key]
|
||||
if key == "carddav_password" and value:
|
||||
from src.secret_storage import encrypt
|
||||
value = encrypt(value)
|
||||
settings[key] = value
|
||||
_save_settings(settings)
|
||||
# Force re-fetch
|
||||
_contact_cache["fetched_at"] = None
|
||||
|
||||
+108
-30
@@ -1,16 +1,19 @@
|
||||
"""cookbook_helpers.py — validators + small helpers shared by the cookbook routes.
|
||||
Extracted from cookbook_routes.py; the routes module imports the symbols it needs."""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import ntpath
|
||||
import os
|
||||
import posixpath
|
||||
import re
|
||||
import shlex
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import HTTPException
|
||||
from pydantic import BaseModel
|
||||
|
||||
from routes._validators import validate_remote_host, validate_ssh_port
|
||||
from core.platform_compat import _ssh_exec_argv
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -30,21 +33,24 @@ _LOCAL_MODEL_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*$")
|
||||
_OLLAMA_MODEL_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._:/-]{0,200}$")
|
||||
# Include pattern is a glob: allow typical safe glyphs only.
|
||||
_INCLUDE_RE = re.compile(r"^[A-Za-z0-9._\-*?/\[\]]+$")
|
||||
# Remote host: either `user@host` or plain `host` (alias is allowed), where host
|
||||
# is a safe DNS-like token or a short SSH config alias.
|
||||
_REMOTE_HOST_RE = re.compile(r"^(?:[A-Za-z0-9._-]+@)?[A-Za-z0-9._-]+$")
|
||||
# HF tokens and API tokens are url-safe base64-like.
|
||||
_TOKEN_RE = re.compile(r"^[A-Za-z0-9._~+/=-]+$")
|
||||
# Session IDs we mint look like "cookbook-deadbeef" or "serve-deadbeef".
|
||||
# Anything beyond plain alphanumerics + dash + underscore could break out
|
||||
# of the shell/PowerShell contexts the value lands in.
|
||||
_SESSION_ID_RE = re.compile(r"^[A-Za-z0-9_-]{1,64}$")
|
||||
_SSH_PORT_RE = re.compile(r"^\d{1,5}$")
|
||||
_GPU_LIST_RE = re.compile(r"^\d+(?:,\d+)*$")
|
||||
# A download target directory. Absolute or ~-relative path; safe path glyphs
|
||||
# only (no quotes, shell metacharacters, or spaces) since it lands in a shell
|
||||
# command. A leading ~ is expanded to $HOME at command-build time.
|
||||
_LOCAL_DIR_RE = re.compile(r"^~?/[A-Za-z0-9._/-]*$|^~$")
|
||||
# only (no quotes or shell metacharacters). Spaces are allowed because command
|
||||
# builders pass the value through quoted shell/Python contexts. The character
|
||||
# class uses ``\w`` — Unicode word characters under Python 3's default str
|
||||
# matching — so non-ASCII folder names pass validation too: Cyrillic, accented
|
||||
# Latin, CJK, e.g. ``/Volumes/Модели`` or ``D:\AI Models\Модели``. This stays
|
||||
# shell-safe: none of ``; & | ` $ '' "" () {}`` newlines etc. are in ``[\w. -]``,
|
||||
# so injection vectors remain rejected. A leading ~ is expanded to $HOME at
|
||||
# command-build time. (Drive letters stay ASCII: ``[A-Za-z]:``.)
|
||||
_LOCAL_DIR_RE = re.compile(r"^~?(?:/[\w. -]*)+$|^~$")
|
||||
_WINDOWS_LOCAL_DIR_RE = re.compile(r"^[A-Za-z]:[\\/](?:[\w. -]+(?:[\\/][\w. -]+)*[\\/]?)?$")
|
||||
_WINDOWS_DRIVE_PATH_RE = re.compile(r"^[A-Za-z]:[\\/]")
|
||||
|
||||
|
||||
@@ -78,14 +84,6 @@ def _validate_include(v: str | None) -> str | None:
|
||||
return v
|
||||
|
||||
|
||||
def _validate_remote_host(v: str | None) -> str | None:
|
||||
if v is None or v == "":
|
||||
return None
|
||||
if not _REMOTE_HOST_RE.match(v):
|
||||
raise HTTPException(400, "Invalid remote_host — must be host or user@host, no SSH option syntax")
|
||||
return v
|
||||
|
||||
|
||||
def _validate_token(v: str | None) -> str | None:
|
||||
if v is None or v == "":
|
||||
return None
|
||||
@@ -94,26 +92,43 @@ def _validate_token(v: str | None) -> str | None:
|
||||
return v
|
||||
|
||||
|
||||
def load_stored_hf_token(*, state_path: Path | str | None = None) -> str:
|
||||
"""Return the decrypted HF token from cookbook_state.json, else env fallback."""
|
||||
path = Path(state_path) if state_path else Path(os.environ.get("DATA_DIR", "data")) / "cookbook_state.json"
|
||||
token = ""
|
||||
if path.exists():
|
||||
try:
|
||||
state = json.loads(path.read_text(encoding="utf-8"))
|
||||
env = state.get("env") if isinstance(state, dict) else {}
|
||||
if isinstance(env, dict) and env.get("hfToken"):
|
||||
from src.secret_storage import decrypt
|
||||
token = decrypt(env.get("hfToken") or "")
|
||||
except Exception:
|
||||
token = ""
|
||||
if not token:
|
||||
token = (os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") or "").strip()
|
||||
return token
|
||||
|
||||
|
||||
def _validate_local_dir(v: str | None) -> str | None:
|
||||
if v is None or v == "":
|
||||
return None
|
||||
if len(v) >= 2 and v[0] == v[-1] and v[0] in {"'", '"'}:
|
||||
v = v[1:-1]
|
||||
v = v.rstrip("/") or "/"
|
||||
if not _LOCAL_DIR_RE.match(v):
|
||||
raise HTTPException(400, "Invalid local_dir — must be an absolute or ~ path with no spaces or shell metacharacters")
|
||||
if not (_LOCAL_DIR_RE.match(v) or _WINDOWS_LOCAL_DIR_RE.match(v)):
|
||||
raise HTTPException(400, "Invalid local_dir — must be an absolute or ~ path with no shell metacharacters")
|
||||
# Reject path segments that start with '-' (option injection). '-' is in the
|
||||
# allowlist, so a dir like ``/models/-rf`` or ``D:\models\-rf`` could be read
|
||||
# as a CLI flag by hf/etc. — and quoting does NOT stop a value from being
|
||||
# parsed as an option. This is the one residual that command-build-time
|
||||
# quoting can't cover, so the guard lives here, keeping the safety wholly
|
||||
# inside the validator rather than relying on consumers.
|
||||
if any(seg.startswith("-") for seg in re.split(r"[\\/]", v) if seg):
|
||||
raise HTTPException(400, "Invalid local_dir — path segments cannot start with '-'")
|
||||
return v
|
||||
|
||||
|
||||
def _validate_ssh_port(v: str | None) -> str | None:
|
||||
if v is None or v == "":
|
||||
return None
|
||||
if not _SSH_PORT_RE.fullmatch(str(v)):
|
||||
raise HTTPException(400, "Invalid ssh_port")
|
||||
port = int(v)
|
||||
if port < 1 or port > 65535:
|
||||
raise HTTPException(400, "Invalid ssh_port")
|
||||
return str(port)
|
||||
|
||||
|
||||
def _validate_gpus(v: str | None) -> str | None:
|
||||
if v is None or v == "":
|
||||
return None
|
||||
@@ -125,7 +140,7 @@ def _validate_gpus(v: str | None) -> str | None:
|
||||
def _shell_path(p: str) -> str:
|
||||
"""Render a validated path for a double-quoted shell context, expanding a
|
||||
leading ~ to $HOME (single quotes wouldn't expand it). Safe because
|
||||
_validate_local_dir already restricts the charset."""
|
||||
_validate_local_dir already rejects quotes and shell metacharacters."""
|
||||
if p == "~":
|
||||
return '"$HOME"'
|
||||
if p.startswith("~/"):
|
||||
@@ -347,7 +362,12 @@ def _user_shell_path_bootstrap() -> list[str]:
|
||||
' ODYSSEUS_USER_PATH="$("$ODYSSEUS_USER_SHELL" -ic \'printf "__ODYSSEUS_PATH__%s\\n" "$PATH"\' 2>/dev/null | sed -n \'s/^__ODYSSEUS_PATH__//p\' | tail -n 1 || true)"',
|
||||
' if [ -n "$ODYSSEUS_USER_PATH" ]; then export PATH="$ODYSSEUS_USER_PATH:$PATH"; fi',
|
||||
'fi',
|
||||
'command -v python3 >/dev/null 2>&1 || python3() { python "$@"; }',
|
||||
# Windows can expose python3 as a Microsoft Store App Execution Alias
|
||||
# under WindowsApps. Git Bash sees that stub as present, but it exits
|
||||
# before running Python. A Windows venv usually has python.exe, not
|
||||
# python3.exe, so treat a missing or WindowsApps python3 as absent.
|
||||
'_odys_py3="$(command -v python3 2>/dev/null || true)"',
|
||||
'case "$_odys_py3" in ""|*[Ww]indows[Aa]pps*) python3() { python "$@"; } ;; esac',
|
||||
'command -v python >/dev/null 2>&1 || python() { python3 "$@"; }',
|
||||
]
|
||||
|
||||
@@ -386,6 +406,7 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None, add_hf_cache:
|
||||
" for root, dirs, fns in safe_walk(base):",
|
||||
" for fn in sorted(fns):",
|
||||
" if not fn.lower().endswith('.gguf'): continue",
|
||||
" if fn.startswith('._'): continue # macOS AppleDouble sidecar, not a real GGUF",
|
||||
" fp = os.path.join(root, fn)",
|
||||
" try: size = os.path.getsize(fp)",
|
||||
" except Exception: size = 0",
|
||||
@@ -557,6 +578,36 @@ _GGUF_PRELUDE_RE = re.compile(
|
||||
_OLLAMA_HOST_ASSIGNMENT_RE = re.compile(r"(?:^|\s)OLLAMA_HOST=([^\s]+)")
|
||||
_OLLAMA_BIND_RE = re.compile(r"^\[([^\]]+)\]:(\d+)$|^([^:]+):(\d+)$")
|
||||
_OLLAMA_BIND_HOST_RE = re.compile(r"^[A-Za-z0-9._:-]+$")
|
||||
_LLAMA_CPP_PYTHON_GGML_TYPES = {
|
||||
"f32": "0",
|
||||
"f16": "1",
|
||||
"q4_0": "2",
|
||||
"q4_1": "3",
|
||||
"q5_0": "6",
|
||||
"q5_1": "7",
|
||||
"q8_0": "8",
|
||||
"q8_1": "9",
|
||||
"q2_k": "10",
|
||||
"q3_k": "11",
|
||||
"q4_k": "12",
|
||||
"q5_k": "13",
|
||||
"q6_k": "14",
|
||||
"q8_k": "15",
|
||||
"iq2_xxs": "16",
|
||||
"iq2_xs": "17",
|
||||
"iq3_xxs": "18",
|
||||
"iq1_s": "19",
|
||||
"iq4_nl": "20",
|
||||
"iq3_s": "21",
|
||||
"iq2_s": "22",
|
||||
"iq4_xs": "23",
|
||||
"mxfp4": "39",
|
||||
"nvfp4": "40",
|
||||
"q1_0": "41",
|
||||
}
|
||||
_LLAMA_CPP_PYTHON_TYPE_FLAG_RE = re.compile(
|
||||
r"(?P<flag>--type_[kv])(?P<sep>\s+|=)(?P<quote>['\"]?)(?P<value>[A-Za-z0-9_]+)(?P=quote)"
|
||||
)
|
||||
|
||||
|
||||
def _ollama_bind_from_cmd(cmd: str | None, *, default_host: str = "127.0.0.1") -> tuple[str, str]:
|
||||
@@ -588,6 +639,22 @@ def _ollama_bind_from_cmd(cmd: str | None, *, default_host: str = "127.0.0.1") -
|
||||
return f"[{host}]" if bracketed_host else host, port
|
||||
|
||||
|
||||
def _normalize_llama_cpp_python_cache_types(cmd: str | None) -> str | None:
|
||||
"""Map llama.cpp KV cache type names to llama-cpp-python's integer enum."""
|
||||
if not cmd or "llama_cpp.server" not in cmd:
|
||||
return cmd
|
||||
|
||||
def repl(match: re.Match[str]) -> str:
|
||||
value = match.group("value")
|
||||
mapped = _LLAMA_CPP_PYTHON_GGML_TYPES.get(value.lower())
|
||||
if not mapped:
|
||||
return match.group(0)
|
||||
quote = match.group("quote")
|
||||
return f"{match.group('flag')}{match.group('sep')}{quote}{mapped}{quote}"
|
||||
|
||||
return _LLAMA_CPP_PYTHON_TYPE_FLAG_RE.sub(repl, cmd)
|
||||
|
||||
|
||||
def _check_serve_binary(seg: str) -> None:
|
||||
"""Validate that a single command segment starts with an allowlisted binary
|
||||
(after skipping leading env-var assignments like `CUDA_VISIBLE_DEVICES=0`)."""
|
||||
@@ -726,6 +793,7 @@ def _append_llama_cpp_linux_accel_build_lines(runner_lines: list[str]) -> None:
|
||||
runner_lines.append(' done')
|
||||
# rm -rf build so a prior poisoned CMakeCache.txt (e.g. from a failed CUDA
|
||||
# or HIP attempt) doesn't cause the next configure to reuse stale settings.
|
||||
runner_lines.append(' mkdir -p ~/bin')
|
||||
runner_lines.append(' cd ~/llama.cpp && rm -rf build')
|
||||
runner_lines.append(' if command -v hipconfig &>/dev/null || [ -d /opt/rocm ] || [ -n "$ROCM_PATH" ] || [ -n "$HIP_PATH" ]; then')
|
||||
runner_lines.append(' if command -v hipconfig &>/dev/null; then')
|
||||
@@ -1030,6 +1098,16 @@ def _diagnose_serve_output(text: str) -> dict | None:
|
||||
"vLLM is not installed or not in PATH on this server.",
|
||||
[{"label": "install vLLM in Cookbook Dependencies", "op": "dependency", "package": "vllm"}],
|
||||
),
|
||||
(
|
||||
r"sgl_kernel[\s\S]*(Python\.h|libnuma\.so\.1|common_ops)|"
|
||||
r"(Python\.h|libnuma\.so\.1|common_ops)[\s\S]*sgl_kernel|"
|
||||
r"Please ensure sgl_kernel is properly installed",
|
||||
"SGLang native dependencies are missing on this server.",
|
||||
[
|
||||
{"label": "install OS packages: libnuma-dev python3.12-dev build-essential", "op": "manual"},
|
||||
{"label": "upgrade sglang-kernel after OS packages are installed", "op": "manual"},
|
||||
],
|
||||
),
|
||||
(
|
||||
r"sglang.*command not found|No module named sglang|SGLang is not installed",
|
||||
"SGLang is not installed or not in PATH on this server.",
|
||||
|
||||
@@ -0,0 +1,75 @@
|
||||
"""Pure helpers for shaping cookbook task output for the status response.
|
||||
|
||||
Kept dependency-free (no FastAPI / SQLAlchemy imports) so the behavior can be
|
||||
unit-tested without standing up the whole app.
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
_FETCHING_ZERO_FILES_RE = re.compile(r"Fetching\s+0\s+files", re.IGNORECASE)
|
||||
|
||||
# Probe scripts for the dead-session download check, run as
|
||||
# `python3 -c <PROBE> <repo_id> <cache_root>` (locally or over SSH).
|
||||
# cache_root is the task's custom download dir, '' for the default HF cache.
|
||||
# It has to be passed explicitly: the download runner exports
|
||||
# HF_HOME=<local_dir>, so that task's cache lives under <local_dir>/hub, and
|
||||
# the probe process's own environment knows nothing about it.
|
||||
HF_CACHE_COMPLETE_PROBE = (
|
||||
"import os,sys;"
|
||||
"repo=sys.argv[1];"
|
||||
"root=os.path.expanduser(sys.argv[2]) if len(sys.argv)>2 and sys.argv[2] else '';"
|
||||
"base=os.path.join(root,'hub') if root else (os.environ.get('HUGGINGFACE_HUB_CACHE') or os.path.join(os.environ.get('HF_HOME', os.path.expanduser('~/.cache/huggingface')), 'hub'));"
|
||||
"d=os.path.join(base,'models--'+repo.replace('/','--'));"
|
||||
"snap=os.path.join(d,'snapshots');"
|
||||
"ok=os.path.isdir(snap) and any(os.path.isdir(os.path.join(snap,x)) and os.listdir(os.path.join(snap,x)) for x in os.listdir(snap));"
|
||||
"inc=False;"
|
||||
"blobs=os.path.join(d,'blobs');"
|
||||
"inc=os.path.isdir(blobs) and any(x.endswith('.incomplete') for x in os.listdir(blobs));"
|
||||
"sys.exit(0 if ok and not inc else 1)"
|
||||
)
|
||||
|
||||
HF_CACHE_INCOMPLETE_PROBE = (
|
||||
"import os,sys;"
|
||||
"repo=sys.argv[1];"
|
||||
"root=os.path.expanduser(sys.argv[2]) if len(sys.argv)>2 and sys.argv[2] else '';"
|
||||
"base=os.path.join(root,'hub') if root else (os.environ.get('HUGGINGFACE_HUB_CACHE') or os.path.join(os.environ.get('HF_HOME', os.path.expanduser('~/.cache/huggingface')), 'hub'));"
|
||||
"d=os.path.join(base,'models--'+repo.replace('/','--'));"
|
||||
"blobs=os.path.join(d,'blobs');"
|
||||
"inc=os.path.isdir(blobs) and any(x.endswith('.incomplete') for x in os.listdir(blobs));"
|
||||
"sys.exit(0 if inc else 1)"
|
||||
)
|
||||
|
||||
|
||||
def classify_dead_download(full_snapshot: str):
|
||||
"""Resolve a dead download session's status from its runner markers.
|
||||
|
||||
The runner prints DOWNLOAD_OK only after exiting 0 (and DOWNLOAD_FAILED
|
||||
otherwise), so the markers stay trustworthy after the tmux pane is gone.
|
||||
Returns (status, zero_files), or None when the snapshot carries no marker
|
||||
and the caller has to fall back to the cache probe. Same precedence as
|
||||
the live-session branch: DOWNLOAD_OK wins, except a "Fetching 0 files"
|
||||
run is an error (nothing matched the include/quant pattern).
|
||||
"""
|
||||
if not full_snapshot:
|
||||
return None
|
||||
if "DOWNLOAD_OK" in full_snapshot:
|
||||
if _FETCHING_ZERO_FILES_RE.search(full_snapshot):
|
||||
return ("error", True)
|
||||
return ("completed", False)
|
||||
if "DOWNLOAD_FAILED" in full_snapshot:
|
||||
return ("error", False)
|
||||
return None
|
||||
|
||||
|
||||
def error_aware_output_tail(full_snapshot: str, status: str) -> str:
|
||||
"""Return the trailing slice of a task log for the status response.
|
||||
|
||||
Failed tasks return the last 50 lines so the "Copy last 50 lines" action
|
||||
surfaces the actual error context (stack traces, build output). Running and
|
||||
other non-error tasks keep the cheaper 12-line tail to limit the payload on
|
||||
the 10s polling interval.
|
||||
"""
|
||||
if not full_snapshot:
|
||||
return ""
|
||||
tail_lines = 50 if status == "error" else 12
|
||||
return "\n".join(full_snapshot.splitlines()[-tail_lines:])
|
||||
+94
-73
@@ -15,9 +15,11 @@ from pathlib import Path
|
||||
from fastapi import APIRouter, HTTPException, Request, Depends
|
||||
|
||||
from src.auth_helpers import require_user
|
||||
from src.constants import COOKBOOK_STATE_FILE
|
||||
from pydantic import BaseModel
|
||||
|
||||
from core.middleware import require_admin
|
||||
from routes._validators import validate_remote_host, validate_ssh_port
|
||||
from core.platform_compat import (
|
||||
IS_WINDOWS,
|
||||
detached_popen_kwargs,
|
||||
@@ -28,18 +30,26 @@ from core.platform_compat import (
|
||||
which_tool,
|
||||
)
|
||||
from routes.shell_routes import TMUX_LOG_DIR
|
||||
from routes.cookbook_output import (
|
||||
error_aware_output_tail, classify_dead_download,
|
||||
HF_CACHE_COMPLETE_PROBE, HF_CACHE_INCOMPLETE_PROBE,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from routes.cookbook_helpers import (
|
||||
_SSH_PORT_RE, _REMOTE_HOST_RE, _SESSION_ID_RE,
|
||||
_validate_repo_id, _validate_serve_model_id, _validate_include, _validate_remote_host, _validate_token,
|
||||
_validate_local_dir, _validate_ssh_port, _validate_gpus, _shell_path,
|
||||
_SESSION_ID_RE, _validate_repo_id, _validate_serve_model_id, _validate_include, _validate_token,
|
||||
_validate_local_dir, _validate_gpus, _shell_path,
|
||||
_ps_squote, _bash_squote, _validate_serve_cmd, _parse_serve_phase,
|
||||
_safe_env_prefix, _local_tooling_path_export, _append_serve_preflight_exit_lines,
|
||||
_append_serve_exit_code_lines, _append_llama_cpp_linux_accel_build_lines, _cached_model_scan_script,
|
||||
load_stored_hf_token,
|
||||
_append_vllm_linux_preflight_lines, _ollama_bind_from_cmd, _pip_install_fallback_chain,
|
||||
_pip_install_no_cache, _user_shell_path_bootstrap, _venv_safe_local_pip_install_cmd,
|
||||
_diagnose_serve_output, run_ssh_command_async,
|
||||
_ollama_bind_from_cmd, _pip_install_fallback_chain, _pip_install_no_cache,
|
||||
_user_shell_path_bootstrap, _venv_safe_local_pip_install_cmd,
|
||||
_normalize_llama_cpp_python_cache_types,
|
||||
ModelDownloadRequest, ServeRequest,
|
||||
)
|
||||
|
||||
@@ -48,13 +58,13 @@ _HF_TOKEN_STATUS_SNIPPET = (
|
||||
'echo "[odysseus] HF token: applied"; '
|
||||
'else '
|
||||
'echo "[odysseus] HF token: NOT SET — gated/private models will be denied. '
|
||||
'Add one in Odysseus Settings -> Cookbook -> HuggingFace Token."; '
|
||||
'Add one in Odysseus Cookbook -> Settings -> HuggingFace Token."; '
|
||||
'fi'
|
||||
)
|
||||
|
||||
def setup_cookbook_routes() -> APIRouter:
|
||||
router = APIRouter(tags=["cookbook"])
|
||||
_cookbook_state_path = Path(os.environ.get("DATA_DIR", "data")) / "cookbook_state.json"
|
||||
_cookbook_state_path = Path(COOKBOOK_STATE_FILE)
|
||||
|
||||
def _mask_secret(value: str) -> str:
|
||||
if not value:
|
||||
@@ -164,6 +174,16 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
"vLLM is not installed or not in PATH on this server.",
|
||||
[{"label": "install vLLM in Cookbook Dependencies", "op": "dependency", "package": "vllm"}],
|
||||
),
|
||||
(
|
||||
r"sgl_kernel[\s\S]*(Python\.h|libnuma\.so\.1|common_ops)|"
|
||||
r"(Python\.h|libnuma\.so\.1|common_ops)[\s\S]*sgl_kernel|"
|
||||
r"Please ensure sgl_kernel is properly installed",
|
||||
"SGLang native dependencies are missing on this server.",
|
||||
[
|
||||
{"label": "install OS packages: libnuma-dev python3.12-dev build-essential", "op": "manual"},
|
||||
{"label": "upgrade sglang-kernel after OS packages are installed", "op": "manual"},
|
||||
],
|
||||
),
|
||||
(
|
||||
r"sglang.*command not found|No module named sglang|SGLang is not installed",
|
||||
"SGLang is not installed or not in PATH on this server.",
|
||||
@@ -232,14 +252,7 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
return state
|
||||
|
||||
def _load_stored_hf_token() -> str:
|
||||
if not _cookbook_state_path.exists():
|
||||
return ""
|
||||
try:
|
||||
state = json.loads(_cookbook_state_path.read_text(encoding="utf-8"))
|
||||
env = state.get("env") if isinstance(state, dict) else {}
|
||||
return _decrypt_secret(env.get("hfToken") if isinstance(env, dict) else "")
|
||||
except Exception:
|
||||
return ""
|
||||
return load_stored_hf_token(state_path=_cookbook_state_path)
|
||||
|
||||
def _cookbook_ssh_dir() -> Path:
|
||||
# The Docker image keeps cookbook keys under /app/.ssh; that path only
|
||||
@@ -354,7 +367,11 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
# all output to the log the poller reads. Paths handed to bash use
|
||||
# POSIX form + shell-quoting so drive paths / spaces survive.
|
||||
inner = TMUX_LOG_DIR / f"{session_id}_run.sh"
|
||||
inner.write_text("\n".join(bash_lines) + "\n", encoding="utf-8")
|
||||
pp = shlex.quote(pid_path.as_posix())
|
||||
inner.write_text(
|
||||
f"printf '%s\\n' \"$$\" > {pp}\n" + "\n".join(bash_lines) + "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
lp = shlex.quote(log_path.as_posix())
|
||||
ip = shlex.quote(inner.as_posix())
|
||||
script_path = TMUX_LOG_DIR / f"{session_id}.sh"
|
||||
@@ -406,8 +423,8 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
else:
|
||||
_validate_repo_id(req.repo_id)
|
||||
_validate_include(req.include)
|
||||
_validate_remote_host(req.remote_host)
|
||||
req.ssh_port = _validate_ssh_port(req.ssh_port)
|
||||
validate_remote_host(req.remote_host)
|
||||
req.ssh_port = validate_ssh_port(req.ssh_port)
|
||||
req.local_dir = _validate_local_dir(req.local_dir)
|
||||
req.hf_token = "" if is_ollama_download else (req.hf_token or _load_stored_hf_token())
|
||||
_validate_token(req.hf_token)
|
||||
@@ -738,9 +755,8 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
# Validate shell-bound inputs, matching the sibling list_gpus endpoint —
|
||||
# `host`/`ssh_port` are interpolated into an ssh command below, so an
|
||||
# unvalidated value (e.g. "x'; rm -rf ~ #") would be command injection.
|
||||
host = _validate_remote_host(host)
|
||||
if ssh_port is not None and ssh_port != "" and not _SSH_PORT_RE.fullmatch(ssh_port):
|
||||
raise HTTPException(400, "Invalid ssh_port")
|
||||
host = validate_remote_host(host)
|
||||
ssh_port = validate_ssh_port(ssh_port)
|
||||
TMUX_LOG_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
model_dirs = []
|
||||
@@ -889,11 +905,16 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
# listening" check without requiring ss/netstat/nmap.
|
||||
ssh_base = ["ssh", "-o", "ConnectTimeout=4", "-o", "StrictHostKeyChecking=no"]
|
||||
if ssh_port and str(ssh_port) != "22":
|
||||
if not _SSH_PORT_RE.match(str(ssh_port)):
|
||||
try:
|
||||
ssh_port = validate_ssh_port(ssh_port)
|
||||
except HTTPException:
|
||||
return None
|
||||
ssh_base.extend(["-p", str(ssh_port)])
|
||||
host_arg = remote
|
||||
if not _REMOTE_HOST_RE.match(host_arg):
|
||||
try:
|
||||
host_arg = validate_remote_host(remote)
|
||||
except HTTPException:
|
||||
return None
|
||||
if not host_arg:
|
||||
return None
|
||||
probe_ports = " ".join(str(start_port + i) for i in range(max_offset + 1))
|
||||
script = (
|
||||
@@ -1196,8 +1217,8 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
"""
|
||||
require_admin(request)
|
||||
# Defence-in-depth: reject values that could break out of shell contexts.
|
||||
_validate_remote_host(req.remote_host)
|
||||
req.ssh_port = _validate_ssh_port(req.ssh_port)
|
||||
validate_remote_host(req.remote_host)
|
||||
req.ssh_port = validate_ssh_port(req.ssh_port)
|
||||
req.gpus = _validate_gpus(req.gpus)
|
||||
req.hf_token = req.hf_token or _load_stored_hf_token()
|
||||
_validate_token(req.hf_token)
|
||||
@@ -1208,6 +1229,7 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
# many downstream `"engine" in req.cmd` membership checks can't hit
|
||||
# `TypeError: argument of type 'NoneType'` (a 500 instead of a clean 400).
|
||||
req.cmd = _validate_serve_cmd(req.cmd) or ""
|
||||
req.cmd = _normalize_llama_cpp_python_cache_types(req.cmd) or ""
|
||||
req.cmd = _venv_safe_local_pip_install_cmd(
|
||||
req.cmd,
|
||||
local=not bool(req.remote_host),
|
||||
@@ -1637,12 +1659,11 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
async def server_setup(request: Request, req: SetupRequest):
|
||||
"""Install required dependencies on a remote server via SSH."""
|
||||
require_admin(request)
|
||||
host = _validate_remote_host(req.host)
|
||||
host = validate_remote_host(req.host)
|
||||
if not host:
|
||||
raise HTTPException(400, "host is required")
|
||||
port = req.ssh_port
|
||||
if port is not None and port != "" and not re.fullmatch(r"\d{1,5}", port):
|
||||
raise HTTPException(400, "Invalid ssh_port")
|
||||
port = validate_ssh_port(port)
|
||||
pf = f"-p {port} " if port and port != "22" else ""
|
||||
|
||||
# Detect platform: Windows first (echo %OS% → Windows_NT), then Termux, then Linux
|
||||
@@ -1886,9 +1907,8 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
`busy` is True when free_mb/total_mb < 0.5.
|
||||
"""
|
||||
require_admin(request)
|
||||
host = _validate_remote_host(host)
|
||||
if ssh_port is not None and ssh_port != "" and not _SSH_PORT_RE.fullmatch(ssh_port):
|
||||
raise HTTPException(400, "Invalid ssh_port")
|
||||
host = validate_remote_host(host)
|
||||
ssh_port = validate_ssh_port(ssh_port)
|
||||
gpu_query = "nvidia-smi --query-gpu=index,name,memory.free,memory.total,memory.used,utilization.gpu,uuid --format=csv,noheader,nounits"
|
||||
nvidia_error = None
|
||||
try:
|
||||
@@ -2045,9 +2065,8 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
sig = (req.signal or "TERM").upper()
|
||||
if sig not in ("TERM", "KILL", "INT"):
|
||||
raise HTTPException(400, "signal must be TERM, KILL, or INT")
|
||||
host = _validate_remote_host(req.host)
|
||||
if req.ssh_port and not _SSH_PORT_RE.fullmatch(req.ssh_port):
|
||||
raise HTTPException(400, "Invalid ssh_port")
|
||||
host = validate_remote_host(req.host)
|
||||
req.ssh_port = validate_ssh_port(req.ssh_port)
|
||||
kill_cmd = f"kill -{sig} {req.pid}"
|
||||
try:
|
||||
if host:
|
||||
@@ -2381,14 +2400,19 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
host = (srv.get("host") or "").strip()
|
||||
if not host:
|
||||
continue # local-only entry; the /proc scan handles it
|
||||
if not _REMOTE_HOST_RE.match(host):
|
||||
try:
|
||||
host = validate_remote_host(host)
|
||||
except HTTPException:
|
||||
continue
|
||||
sport = str(srv.get("port") or "").strip()
|
||||
ssh_base = ["ssh", "-o", "ConnectTimeout=4", "-o", "StrictHostKeyChecking=no"]
|
||||
if sport and sport != "22":
|
||||
if not _SSH_PORT_RE.match(sport):
|
||||
try:
|
||||
sport = validate_ssh_port(sport)
|
||||
except HTTPException:
|
||||
continue
|
||||
ssh_base.extend(["-p", sport])
|
||||
if sport != "22":
|
||||
ssh_base.extend(["-p", sport])
|
||||
|
||||
try:
|
||||
ls = subprocess.run(
|
||||
@@ -2802,30 +2826,20 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
def _cookbook_tasks_status_sync():
|
||||
import subprocess
|
||||
|
||||
def _download_cache_complete(repo_id: str, remote_host: str = "", ssh_port: str = "") -> bool:
|
||||
def _download_cache_complete(repo_id: str, remote_host: str = "", ssh_port: str = "", cache_root: str = "") -> bool:
|
||||
"""Best-effort check for a completed HF cache entry.
|
||||
|
||||
tmux output can stop at a stale progress line if the pane/session
|
||||
disappears before Cookbook captures the final DOWNLOAD_OK marker.
|
||||
In that case, trust the cache shape: a snapshot directory with files
|
||||
and no *.incomplete blobs means HuggingFace finished materializing the
|
||||
model.
|
||||
model. cache_root is the task's custom download dir — the runner
|
||||
pointed HF_HOME there, so the cache lives under <cache_root>/hub,
|
||||
not wherever this probe's environment says.
|
||||
"""
|
||||
if not repo_id or "/" not in repo_id:
|
||||
return False
|
||||
py = (
|
||||
"import os,sys;"
|
||||
"repo=sys.argv[1];"
|
||||
"base=os.environ.get('HUGGINGFACE_HUB_CACHE') or os.path.join(os.environ.get('HF_HOME', os.path.expanduser('~/.cache/huggingface')), 'hub');"
|
||||
"d=os.path.join(base,'models--'+repo.replace('/','--'));"
|
||||
"snap=os.path.join(d,'snapshots');"
|
||||
"ok=os.path.isdir(snap) and any(os.path.isdir(os.path.join(snap,x)) and os.listdir(os.path.join(snap,x)) for x in os.listdir(snap));"
|
||||
"inc=False;"
|
||||
"blobs=os.path.join(d,'blobs');"
|
||||
"inc=os.path.isdir(blobs) and any(x.endswith('.incomplete') for x in os.listdir(blobs));"
|
||||
"sys.exit(0 if ok and not inc else 1)"
|
||||
)
|
||||
cmd = ["python3", "-c", py, repo_id]
|
||||
cmd = ["python3", "-c", HF_CACHE_COMPLETE_PROBE, repo_id, cache_root or ""]
|
||||
try:
|
||||
if remote_host:
|
||||
ssh_base = ["ssh"]
|
||||
@@ -2839,7 +2853,7 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def _download_cache_incomplete(repo_id: str, remote_host: str = "", ssh_port: str = "") -> bool:
|
||||
def _download_cache_incomplete(repo_id: str, remote_host: str = "", ssh_port: str = "", cache_root: str = "") -> bool:
|
||||
"""Best-effort check for resumable HF partial blobs.
|
||||
|
||||
A lost SSH/tmux session can leave a real download still incomplete.
|
||||
@@ -2848,16 +2862,7 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
"""
|
||||
if not repo_id or "/" not in repo_id:
|
||||
return False
|
||||
py = (
|
||||
"import os,sys;"
|
||||
"repo=sys.argv[1];"
|
||||
"base=os.environ.get('HUGGINGFACE_HUB_CACHE') or os.path.join(os.environ.get('HF_HOME', os.path.expanduser('~/.cache/huggingface')), 'hub');"
|
||||
"d=os.path.join(base,'models--'+repo.replace('/','--'));"
|
||||
"blobs=os.path.join(d,'blobs');"
|
||||
"inc=os.path.isdir(blobs) and any(x.endswith('.incomplete') for x in os.listdir(blobs));"
|
||||
"sys.exit(0 if inc else 1)"
|
||||
)
|
||||
cmd = ["python3", "-c", py, repo_id]
|
||||
cmd = ["python3", "-c", HF_CACHE_INCOMPLETE_PROBE, repo_id, cache_root or ""]
|
||||
try:
|
||||
if remote_host:
|
||||
ssh_base = ["ssh"]
|
||||
@@ -2929,12 +2934,18 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
if not _SESSION_ID_RE.match(session_id):
|
||||
logger.warning(f"Skipping task with unsafe session_id: {session_id!r}")
|
||||
continue
|
||||
if remote and not _REMOTE_HOST_RE.match(remote):
|
||||
logger.warning(f"Skipping task with unsafe remoteHost: {remote!r}")
|
||||
continue
|
||||
if _tport and not _SSH_PORT_RE.match(str(_tport)):
|
||||
logger.warning(f"Skipping task with unsafe sshPort: {_tport!r}")
|
||||
continue
|
||||
if remote:
|
||||
try:
|
||||
remote = validate_remote_host(remote)
|
||||
except HTTPException:
|
||||
logger.warning(f"Skipping task with unsafe remoteHost: {remote!r}")
|
||||
continue
|
||||
if _tport:
|
||||
try:
|
||||
_tport = validate_ssh_port(str(_tport))
|
||||
except HTTPException:
|
||||
logger.warning(f"Skipping task with unsafe sshPort: {_tport!r}")
|
||||
continue
|
||||
if task_platform == "windows" and remote:
|
||||
# Windows: check PID file + Get-Process, read log tail
|
||||
sd = "$env:TEMP\\odysseus-sessions"
|
||||
@@ -3047,6 +3058,7 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
# snapshot to classify (DOWNLOAD_OK / exit marker) — evaluate it even
|
||||
# when the PID is gone instead of blindly reporting "stopped".
|
||||
download_zero_files = False
|
||||
exit_code = None
|
||||
status = "unknown"
|
||||
download_has_ok = task_type == "download" and "DOWNLOAD_OK" in full_snapshot
|
||||
download_has_failed = task_type == "download" and "DOWNLOAD_FAILED" in full_snapshot
|
||||
@@ -3055,7 +3067,7 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
and (
|
||||
".incomplete" in full_snapshot
|
||||
or bool(re.search(r'model-\d+-of-\d+\.[A-Za-z0-9_.-]+:\s+(?:[0-9]|[1-8][0-9])%', full_snapshot))
|
||||
or _download_cache_incomplete(_payload.get("repo_id") or model, remote, str(_tport or ""))
|
||||
or _download_cache_incomplete(_payload.get("repo_id") or model, remote, str(_tport or ""), _payload.get("local_dir") or "")
|
||||
)
|
||||
)
|
||||
if is_alive or (local_win_task and full_snapshot):
|
||||
@@ -3096,11 +3108,19 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
else:
|
||||
status = "running"
|
||||
else:
|
||||
# Session is dead — check if it completed or crashed
|
||||
if (
|
||||
# Session is dead — check if it completed or crashed. The
|
||||
# runner markers in the retained output are conclusive
|
||||
# (DOWNLOAD_OK only prints after exit 0), so check them before
|
||||
# the cache probe, which can't see ollama pulls at all.
|
||||
marker = classify_dead_download(full_snapshot) if task_type == "download" else None
|
||||
if marker is not None:
|
||||
status, download_zero_files = marker
|
||||
if status == "completed" and not progress_text:
|
||||
progress_text = "Download complete"
|
||||
elif (
|
||||
task_type == "download"
|
||||
and not download_has_incomplete_evidence
|
||||
and _download_cache_complete(_payload.get("repo_id") or model, remote, str(_tport or ""))
|
||||
and _download_cache_complete(_payload.get("repo_id") or model, remote, str(_tport or ""), _payload.get("local_dir") or "")
|
||||
):
|
||||
status = "completed"
|
||||
if not progress_text:
|
||||
@@ -3120,7 +3140,7 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
status = "error"
|
||||
if download_zero_files:
|
||||
diagnosis = {"message": "No matching files were downloaded. The model repo or filename/quant pattern may be wrong (for example a ':Q4_K_M' tag that does not exist in the repo). Check the repo and the include/quant pattern."}
|
||||
output_tail = "\n".join(full_snapshot.splitlines()[-12:]) if full_snapshot else ""
|
||||
output_tail = error_aware_output_tail(full_snapshot, status)
|
||||
|
||||
results.append({
|
||||
"session_id": session_id,
|
||||
@@ -3131,6 +3151,7 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
"phase": serve_phase,
|
||||
"diagnosis": diagnosis,
|
||||
"output_tail": output_tail,
|
||||
"exit_code": exit_code,
|
||||
"cmd": _payload.get("_cmd") or "",
|
||||
"tps": phase_info.get("tps"),
|
||||
"reqs": phase_info.get("reqs"),
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
"""Diagnostics routes — /api/db/stats, /api/rag/stats, /api/test/youtube, /api/test-research."""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from typing import Dict, Any
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Form, Request
|
||||
|
||||
from services.youtube.youtube_handler import extract_youtube_id, extract_transcript_async
|
||||
from core.constants import DEFAULT_HOST
|
||||
from core.constants import DEFAULT_HOST, DATA_DIR
|
||||
from core.middleware import require_admin
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -16,9 +17,42 @@ def setup_diagnostics_routes(
|
||||
rag_manager,
|
||||
rag_available: bool,
|
||||
research_handler,
|
||||
memory_vector=None,
|
||||
) -> APIRouter:
|
||||
router = APIRouter(tags=["diagnostics"])
|
||||
|
||||
@router.get("/api/diagnostics/services")
|
||||
async def get_service_health(request: Request) -> Dict[str, Any]:
|
||||
"""Consolidated degraded-state report for ChromaDB, SearXNG, email,
|
||||
ntfy, and provider endpoints. Non-intrusive probes — safe to poll."""
|
||||
require_admin(request)
|
||||
from src.service_health import collect_service_health
|
||||
return await collect_service_health(rag_manager, memory_vector)
|
||||
|
||||
@router.get("/api/diagnostics/logs")
|
||||
async def get_diagnostics_logs(request: Request, limit: int = 200) -> Dict[str, Any]:
|
||||
require_admin(request)
|
||||
limit = max(1, min(limit, 1000))
|
||||
try:
|
||||
log_file = os.path.join(DATA_DIR, "logs", "app.log")
|
||||
if not os.path.exists(log_file):
|
||||
return {"status": "success", "logs": []}
|
||||
|
||||
# Safe tail read of the log file (max 5MB via rotation)
|
||||
with open(log_file, "r", encoding="utf-8", errors="ignore") as f:
|
||||
lines = f.readlines()
|
||||
|
||||
tail_lines = lines[-limit:] if len(lines) > limit else lines
|
||||
tail_lines = [line.rstrip('\r\n') for line in tail_lines]
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"logs": tail_lines
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Diagnostics logs retrieval error: {e}")
|
||||
raise HTTPException(500, f"Failed to retrieve logs: {str(e)}")
|
||||
|
||||
@router.get("/api/db/stats")
|
||||
async def get_database_stats(request: Request) -> Dict[str, Any]:
|
||||
require_admin(request)
|
||||
|
||||
@@ -108,10 +108,10 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
|
||||
# to markdown for prose.
|
||||
language = req.language
|
||||
if not language:
|
||||
from src.tool_implementations import _looks_like_email_document, _sniff_doc_language
|
||||
from src.agent_tools.document_tools import _looks_like_email_document, _sniff_doc_language
|
||||
language = _sniff_doc_language(req.content)
|
||||
else:
|
||||
from src.tool_implementations import _looks_like_email_document
|
||||
from src.agent_tools.document_tools import _looks_like_email_document
|
||||
if _looks_like_email_document(req.content, req.title):
|
||||
language = "email"
|
||||
|
||||
@@ -643,7 +643,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
|
||||
# in-memory active-doc pointer so the last-resort injection
|
||||
# path doesn't re-surface this doc in a later chat (#1160).
|
||||
try:
|
||||
from src.tool_implementations import clear_active_document
|
||||
from src.agent_tools.document_tools import clear_active_document
|
||||
clear_active_document(doc_id)
|
||||
except Exception:
|
||||
pass
|
||||
@@ -672,7 +672,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
|
||||
# Closed/deleted — drop the in-memory active-doc pointer so it isn't
|
||||
# re-injected into a later, unrelated chat (#1160).
|
||||
try:
|
||||
from src.tool_implementations import clear_active_document
|
||||
from src.agent_tools.document_tools import clear_active_document
|
||||
clear_active_document(doc_id)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
+60
-16
@@ -304,6 +304,7 @@ OWNER_SCOPED_EMAIL_CACHE_TABLES = {
|
||||
"email_ai_replies",
|
||||
"email_calendar_extractions",
|
||||
"email_urgency_alerts",
|
||||
"sender_signatures",
|
||||
}
|
||||
|
||||
|
||||
@@ -341,6 +342,55 @@ def _ensure_owner_scoped_email_cache_table(conn, table: str, create_sql: str, co
|
||||
_lg.getLogger(__name__).warning(f"{table} owner-migration skipped: {_mig_e}")
|
||||
|
||||
|
||||
def _ensure_sender_signatures_table(conn):
|
||||
"""Create/migrate learned sender signatures to an owner-scoped cache."""
|
||||
create_sql = """
|
||||
CREATE TABLE IF NOT EXISTS sender_signatures (
|
||||
from_address TEXT,
|
||||
owner TEXT DEFAULT '',
|
||||
signature_text TEXT,
|
||||
sample_count INTEGER,
|
||||
last_built_at TEXT NOT NULL,
|
||||
model_used TEXT,
|
||||
source TEXT,
|
||||
PRIMARY KEY (from_address, owner)
|
||||
)
|
||||
"""
|
||||
conn.execute(create_sql)
|
||||
try:
|
||||
info = conn.execute("PRAGMA table_info(sender_signatures)").fetchall()
|
||||
cols = [r[1] for r in info]
|
||||
pk_cols = [r[1] for r in sorted((r for r in info if r[5]), key=lambda r: r[5])]
|
||||
if "owner" in cols and pk_cols == ["from_address", "owner"]:
|
||||
return
|
||||
|
||||
conn.execute("ALTER TABLE sender_signatures RENAME TO sender_signatures__old")
|
||||
conn.execute(create_sql)
|
||||
old_cols = [r[1] for r in conn.execute("PRAGMA table_info(sender_signatures__old)").fetchall()]
|
||||
copy_cols = [
|
||||
c for c in (
|
||||
"from_address",
|
||||
"signature_text",
|
||||
"sample_count",
|
||||
"last_built_at",
|
||||
"model_used",
|
||||
"source",
|
||||
)
|
||||
if c in old_cols
|
||||
]
|
||||
source_owner = "COALESCE(owner, '')" if "owner" in old_cols else "''"
|
||||
conn.execute(
|
||||
f"INSERT OR IGNORE INTO sender_signatures "
|
||||
f"({', '.join([*copy_cols, 'owner'])}) "
|
||||
f"SELECT {', '.join([*copy_cols, source_owner])} "
|
||||
f"FROM sender_signatures__old"
|
||||
)
|
||||
conn.execute("DROP TABLE sender_signatures__old")
|
||||
except Exception as _mig_e:
|
||||
import logging as _lg
|
||||
_lg.getLogger(__name__).warning(f"sender_signatures owner-migration skipped: {_mig_e}")
|
||||
|
||||
|
||||
def attachment_extract_dir(folder: str, uid: str) -> Path:
|
||||
"""Containment-safe extraction directory for an attachment.
|
||||
|
||||
@@ -559,20 +609,10 @@ def _init_scheduled_db():
|
||||
conn.execute("ALTER TABLE email_boundaries ADD COLUMN turns_json TEXT")
|
||||
except Exception:
|
||||
pass
|
||||
# Per-sender signature cache. Populated by `learn_sender_signatures`
|
||||
# action: the LLM extracts the common trailing block across N emails
|
||||
# from each sender; the renderer folds it consistently for every
|
||||
# future email from that address.
|
||||
conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS sender_signatures (
|
||||
from_address TEXT PRIMARY KEY,
|
||||
signature_text TEXT,
|
||||
sample_count INTEGER,
|
||||
last_built_at TEXT NOT NULL,
|
||||
model_used TEXT,
|
||||
source TEXT
|
||||
)
|
||||
""")
|
||||
# Per-sender signature cache. Populated by `learn_sender_signatures`.
|
||||
# Message sender addresses are global, so signatures must be scoped to the
|
||||
# mailbox owner before `/read` returns them to the renderer.
|
||||
_ensure_sender_signatures_table(conn)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
@@ -762,10 +802,14 @@ def _open_imap_connection(host: str, port: int, *, starttls: bool, timeout: int
|
||||
imaplib._MAXLINE = 50_000_000
|
||||
return conn
|
||||
|
||||
def _imap_connect(account_id: str | None = None, owner: str = ""):
|
||||
def _imap_connect(account_id: str | None = None, owner: str = "",
|
||||
timeout: int = _IMAP_TIMEOUT_SECONDS):
|
||||
# SECURITY: passing `owner` scopes the fallback config lookup so a brand
|
||||
# new user doesn't get connected against another user's default mailbox
|
||||
# when they have no account configured.
|
||||
#
|
||||
# `timeout` is overridable so short-lived callers (e.g. the service-health
|
||||
# probe) can impose a tighter budget than the default IMAP timeout.
|
||||
cfg = _get_email_config(account_id, owner=owner)
|
||||
# Connection mode:
|
||||
# STARTTLS on → plain + upgrade
|
||||
@@ -778,7 +822,7 @@ def _imap_connect(account_id: str | None = None, owner: str = ""):
|
||||
cfg["imap_host"],
|
||||
cfg["imap_port"],
|
||||
starttls=bool(cfg.get("imap_starttls")),
|
||||
timeout=_IMAP_TIMEOUT_SECONDS,
|
||||
timeout=timeout,
|
||||
)
|
||||
try:
|
||||
conn.login(cfg["imap_user"], cfg["imap_password"])
|
||||
|
||||
+59
-26
@@ -249,6 +249,41 @@ def _uid_from_fetch_meta(meta_b: bytes) -> str:
|
||||
return m.group(1).decode() if m else ""
|
||||
|
||||
|
||||
_FETCH_SEQ_RE = re.compile(rb"^(\d+)\s+\(")
|
||||
|
||||
|
||||
def _group_uid_fetch_records(msg_data) -> list:
|
||||
"""Group an imaplib UID FETCH response into per-message (meta, payload).
|
||||
|
||||
imaplib yields an interleaved list: ``(meta, literal)`` tuples for
|
||||
attributes that carry a literal (``RFC822.HEADER {n}`` etc.) plus bare
|
||||
``bytes`` elements for everything the server sends outside a literal.
|
||||
Where each attribute lands is server-specific: Dovecot sends FLAGS
|
||||
*before* the header literal (so it ends up inside the tuple meta), while
|
||||
Gmail sends FLAGS *after* it, arriving as a bare ``b' FLAGS (\\Seen))'``
|
||||
element. Dropping bare elements therefore silently loses FLAGS on Gmail
|
||||
and every message renders as unread/unflagged.
|
||||
|
||||
A tuple whose meta starts with a sequence number opens a new record;
|
||||
every other part — continuation tuple or bare bytes — is folded into the
|
||||
current record's meta so attribute regexes see the full meta text.
|
||||
Plain ``b')'`` terminators get folded in too, which is harmless.
|
||||
"""
|
||||
grouped: list = [] # list of (meta_bytes, payload_bytes_or_None)
|
||||
for part in (msg_data or []):
|
||||
if isinstance(part, tuple):
|
||||
meta_b = part[0] if isinstance(part[0], (bytes, bytearray)) else str(part[0]).encode()
|
||||
if _FETCH_SEQ_RE.match(meta_b):
|
||||
grouped.append((meta_b, part[1]))
|
||||
elif grouped:
|
||||
cur_meta, cur_payload = grouped[-1]
|
||||
grouped[-1] = (cur_meta + b" " + meta_b, cur_payload or part[1])
|
||||
elif isinstance(part, (bytes, bytearray)) and grouped:
|
||||
cur_meta, cur_payload = grouped[-1]
|
||||
grouped[-1] = (cur_meta + b" " + bytes(part), cur_payload)
|
||||
return grouped
|
||||
|
||||
|
||||
def _smtp_ready(cfg: dict) -> bool:
|
||||
return bool(cfg.get("smtp_host") and cfg.get("smtp_user") and cfg.get("smtp_password"))
|
||||
|
||||
@@ -799,20 +834,11 @@ def setup_email_routes():
|
||||
except Exception as e:
|
||||
logger.warning(f"Batch fetch failed, falling back to per-UID: {e}")
|
||||
status, msg_data = "NO", []
|
||||
# imaplib batch responses interleave (meta, payload) tuples and
|
||||
# `b')'` terminators. Group by message: each tuple where the
|
||||
# meta begins with a seq number starts a new message record.
|
||||
seq_re = re.compile(rb'^(\d+)\s+\(')
|
||||
grouped = [] # list of (meta_str, payload_bytes)
|
||||
for part in (msg_data or []):
|
||||
if isinstance(part, tuple):
|
||||
meta_b = part[0] if isinstance(part[0], (bytes, bytearray)) else str(part[0]).encode()
|
||||
if seq_re.match(meta_b):
|
||||
grouped.append((meta_b, part[1]))
|
||||
elif grouped:
|
||||
# continuation of previous message — concatenate meta info if any
|
||||
cur_meta, cur_payload = grouped[-1]
|
||||
grouped[-1] = (cur_meta + b" " + meta_b, cur_payload or part[1])
|
||||
# Group the batched response into per-message (meta, payload)
|
||||
# records. Bare bytes parts must be kept: Gmail returns FLAGS
|
||||
# after the header literal as a bare element, and dropping it
|
||||
# rendered every Gmail message as unread/unflagged.
|
||||
grouped = _group_uid_fetch_records(msg_data)
|
||||
|
||||
if status != "OK" and not grouped:
|
||||
conn.logout()
|
||||
@@ -1061,7 +1087,10 @@ def setup_email_routes():
|
||||
return {"contacts": [], "error": "Mail operation failed"}
|
||||
|
||||
@router.get("/search")
|
||||
async def search_emails(
|
||||
# Sync def: the body is blocking IMAP I/O with no awaits. As `async def` it ran
|
||||
# directly on the event loop and stalled the whole app during a search; as a sync
|
||||
# def FastAPI runs it in a threadpool, keeping the loop responsive.
|
||||
def search_emails(
|
||||
q: str = Query(""),
|
||||
folder: str = Query("INBOX"),
|
||||
limit: int = Query(50),
|
||||
@@ -1123,14 +1152,15 @@ def setup_email_routes():
|
||||
continue
|
||||
raw_header = None
|
||||
flags = ""
|
||||
for part in msg_data:
|
||||
if isinstance(part, tuple):
|
||||
meta = part[0].decode() if isinstance(part[0], bytes) else str(part[0])
|
||||
if b"RFC822.HEADER" in part[0] if isinstance(part[0], bytes) else "RFC822.HEADER" in meta:
|
||||
raw_header = part[1]
|
||||
flag_match = re.search(r'FLAGS \(([^)]*)\)', meta)
|
||||
if flag_match:
|
||||
flags = flag_match.group(1)
|
||||
# Same Gmail caveat as the list route: FLAGS may
|
||||
# arrive after the header literal, so group bare
|
||||
# parts back into the message meta before scanning.
|
||||
for meta_b, payload in _group_uid_fetch_records(msg_data):
|
||||
if payload and b"RFC822.HEADER" in meta_b:
|
||||
raw_header = payload
|
||||
flag_match = re.search(rb'FLAGS \(([^)]*)\)', meta_b)
|
||||
if flag_match:
|
||||
flags = flag_match.group(1).decode(errors="replace")
|
||||
if not raw_header:
|
||||
continue
|
||||
msg = email_mod.message_from_bytes(raw_header)
|
||||
@@ -1279,8 +1309,9 @@ def setup_email_routes():
|
||||
try:
|
||||
if sender_addr:
|
||||
_rs = _c.execute(
|
||||
"SELECT signature_text FROM sender_signatures WHERE from_address = ?",
|
||||
(sender_addr.lower().strip(),),
|
||||
f"SELECT signature_text FROM sender_signatures "
|
||||
f"WHERE from_address = ? AND {owner_clause}",
|
||||
(sender_addr.lower().strip(), *owner_params),
|
||||
).fetchone()
|
||||
if _rs and _rs[0]:
|
||||
cached_sender_sig = _rs[0]
|
||||
@@ -1756,7 +1787,9 @@ def setup_email_routes():
|
||||
return {"success": False, "error": "Mail operation failed"}
|
||||
|
||||
@router.post("/archive/{uid}")
|
||||
async def archive_email(uid: str, folder: str = Query("INBOX"), account_id: str | None = Query(None), owner: str = Depends(require_owner)):
|
||||
# Sync def: blocking IMAP I/O with no awaits — see search_emails above. Runs in a
|
||||
# threadpool instead of blocking the event loop.
|
||||
def archive_email(uid: str, folder: str = Query("INBOX"), account_id: str | None = Query(None), owner: str = Depends(require_owner)):
|
||||
"""Move email to Archive folder."""
|
||||
try:
|
||||
with _imap(account_id, owner=owner) as conn:
|
||||
|
||||
@@ -11,6 +11,7 @@ from typing import Dict, Any, Optional
|
||||
from pydantic import BaseModel
|
||||
|
||||
from core.database import GalleryImage
|
||||
from src.auth_helpers import _auth_disabled
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -120,19 +121,18 @@ def _image_to_dict(img: GalleryImage, session_name: str = None) -> Dict[str, Any
|
||||
}
|
||||
|
||||
|
||||
def _owner_filter(q, user):
|
||||
def _owner_filter(q, user, model_cls=GalleryImage):
|
||||
"""Apply owner filtering to a gallery query.
|
||||
|
||||
When auth is disabled (single-user mode) get_current_user returns None
|
||||
and there is no per-user scoping. The main library list and stats already
|
||||
treat None as "show everything" (`if user is not None`), so this helper
|
||||
must too — otherwise the tag/model filter sidebars come back empty and the
|
||||
tag-cleanup endpoints (clear-user-tags, clear-ai-tags, dedupe-tags)
|
||||
silently affect zero rows in the most common self-hosted deployment.
|
||||
``get_current_user`` returns None both in auth-disabled single-user mode
|
||||
and when auth is enabled but no current user was resolved. Preserve the
|
||||
single-user behavior, but fail closed for auth-enabled null-user states.
|
||||
"""
|
||||
if user is None:
|
||||
if user is not None:
|
||||
return q.filter(model_cls.owner == user)
|
||||
if _auth_disabled():
|
||||
return q
|
||||
return q.filter(GalleryImage.owner == user)
|
||||
return q.filter(False)
|
||||
|
||||
|
||||
|
||||
|
||||
+58
-30
@@ -19,6 +19,7 @@ from src.upload_limits import (
|
||||
GALLERY_TRANSFORM_UPLOAD_MAX_BYTES,
|
||||
)
|
||||
from src.constants import GENERATED_IMAGES_DIR
|
||||
from src.optional_deps import patch_realesrgan_torchvision_compat
|
||||
|
||||
from routes.gallery_helpers import (
|
||||
GalleryPatch, _extract_exif, _image_to_dict, _owner_filter, _human_size,
|
||||
@@ -108,6 +109,32 @@ def _visible_image_endpoint_for_base(db, base: str, owner: str | None):
|
||||
return fallback
|
||||
|
||||
|
||||
async def _fetch_result_image_b64(url: str) -> Optional[str]:
|
||||
"""Fetch an image URL returned in an upstream response body, base64-encoded
|
||||
(or None on a non-200).
|
||||
|
||||
The URL comes from the diffusion/OpenAI server's response, not from our own
|
||||
config, so a malicious or compromised endpoint could otherwise steer this
|
||||
fetch at an internal or cloud-metadata address. Validate it the same way the
|
||||
client-supplied endpoint is validated before the first request.
|
||||
"""
|
||||
import base64
|
||||
import httpx
|
||||
from src.url_safety import check_outbound_url
|
||||
|
||||
ok, reason = check_outbound_url(
|
||||
url,
|
||||
block_private=os.getenv("IMAGE_BLOCK_PRIVATE_IPS", "false").lower() == "true",
|
||||
)
|
||||
if not ok:
|
||||
raise HTTPException(502, f"Upstream returned an unsafe image URL: {reason}")
|
||||
async with httpx.AsyncClient(timeout=60) as c2:
|
||||
ir = await c2.get(url)
|
||||
if ir.status_code == 200:
|
||||
return base64.b64encode(ir.content).decode()
|
||||
return None
|
||||
|
||||
|
||||
def setup_gallery_routes() -> APIRouter:
|
||||
router = APIRouter(tags=["gallery"])
|
||||
|
||||
@@ -476,8 +503,7 @@ def setup_gallery_routes() -> APIRouter:
|
||||
.outerjoin(DbSession, GalleryImage.session_id == DbSession.id)
|
||||
.filter(GalleryImage.is_active == True)
|
||||
)
|
||||
if user is not None:
|
||||
q = q.filter(GalleryImage.owner == user)
|
||||
q = _owner_filter(q, user)
|
||||
|
||||
# Search filter (prompt + tags + ai_tags)
|
||||
if search:
|
||||
@@ -579,28 +605,26 @@ def setup_gallery_routes() -> APIRouter:
|
||||
db = SessionLocal()
|
||||
try:
|
||||
q = db.query(GalleryAlbum)
|
||||
if user:
|
||||
q = q.filter(GalleryAlbum.owner == user)
|
||||
q = _owner_filter(q, user, GalleryAlbum)
|
||||
albums = q.order_by(GalleryAlbum.created_at.desc()).all()
|
||||
result = []
|
||||
for a in albums:
|
||||
_count_q = db.query(GalleryImage).filter(
|
||||
GalleryImage.album_id == a.id, GalleryImage.is_active == True
|
||||
)
|
||||
if user:
|
||||
_count_q = _count_q.filter(GalleryImage.owner == user)
|
||||
_count_q = _owner_filter(_count_q, user)
|
||||
count = _count_q.count()
|
||||
cover_url = None
|
||||
if a.cover_id:
|
||||
cover = db.query(GalleryImage).filter(GalleryImage.id == a.cover_id).first()
|
||||
cover_q = db.query(GalleryImage).filter(GalleryImage.id == a.cover_id)
|
||||
cover = _owner_filter(cover_q, user).first()
|
||||
if cover:
|
||||
cover_url = f"/api/generated-image/{cover.filename}"
|
||||
elif count > 0:
|
||||
_cover_q = db.query(GalleryImage).filter(
|
||||
GalleryImage.album_id == a.id, GalleryImage.is_active == True
|
||||
)
|
||||
if user:
|
||||
_cover_q = _cover_q.filter(GalleryImage.owner == user)
|
||||
_cover_q = _owner_filter(_cover_q, user)
|
||||
first = _cover_q.order_by(GalleryImage.created_at.desc()).first()
|
||||
if first:
|
||||
cover_url = f"/api/generated-image/{first.filename}"
|
||||
@@ -643,10 +667,9 @@ def setup_gallery_routes() -> APIRouter:
|
||||
base = db.query(GalleryImage).filter(GalleryImage.is_active == True)
|
||||
size_q = db.query(func.sum(GalleryImage.file_size)).filter(GalleryImage.is_active == True)
|
||||
album_q = db.query(GalleryAlbum)
|
||||
if user:
|
||||
base = base.filter(GalleryImage.owner == user)
|
||||
size_q = size_q.filter(GalleryImage.owner == user)
|
||||
album_q = album_q.filter(GalleryAlbum.owner == user)
|
||||
base = _owner_filter(base, user)
|
||||
size_q = _owner_filter(size_q, user)
|
||||
album_q = _owner_filter(album_q, user, GalleryAlbum)
|
||||
total = base.count()
|
||||
total_size = size_q.scalar() or 0
|
||||
fav_count = base.filter(GalleryImage.favorite == True).count()
|
||||
@@ -674,8 +697,7 @@ def setup_gallery_routes() -> APIRouter:
|
||||
GalleryImage.is_active == True,
|
||||
(GalleryImage.ai_tags == None) | (GalleryImage.ai_tags == ""),
|
||||
)
|
||||
if user:
|
||||
q = q.filter(GalleryImage.owner == user)
|
||||
q = _owner_filter(q, user)
|
||||
if album_id:
|
||||
q = q.filter(GalleryImage.album_id == album_id)
|
||||
untagged = q.count()
|
||||
@@ -909,15 +931,23 @@ def setup_gallery_routes() -> APIRouter:
|
||||
raise HTTPException(404, "Image not found")
|
||||
|
||||
img_filename = img.filename
|
||||
# Remove the file from disk
|
||||
img_path = _gallery_image_path(img_filename)
|
||||
if img_path.exists():
|
||||
img_path.unlink()
|
||||
|
||||
# Soft-delete the record
|
||||
# Soft-delete the record first; the DB is the source of truth.
|
||||
img.is_active = False
|
||||
db.commit()
|
||||
|
||||
# Only after the soft-delete commit succeeds do we remove the file.
|
||||
# If the file were deleted first and the commit then failed/rolled
|
||||
# back, the still-active record would point at a missing file.
|
||||
# Best-effort so a missing or locked file can't 500 a delete that
|
||||
# already succeeded logically. Uses the path-confined resolver so a
|
||||
# malformed stored filename can't escape generated_images.
|
||||
try:
|
||||
img_path = _gallery_image_path(img_filename)
|
||||
if img_path.exists():
|
||||
img_path.unlink()
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not remove gallery image file for {img_filename}: {e}")
|
||||
|
||||
# Strip stale chat-history references so the image bubble
|
||||
# (and its prompt caption) doesn't come back after a server
|
||||
# reboot replays the session. We remove the matching tool
|
||||
@@ -1147,10 +1177,7 @@ def setup_gallery_routes() -> APIRouter:
|
||||
if item.get("b64_json"):
|
||||
raw_b64 = item["b64_json"]
|
||||
elif item.get("url"):
|
||||
async with httpx.AsyncClient(timeout=60) as c2:
|
||||
img_r = await c2.get(item["url"])
|
||||
if img_r.status_code == 200:
|
||||
raw_b64 = base64.b64encode(img_r.content).decode()
|
||||
raw_b64 = await _fetch_result_image_b64(item["url"])
|
||||
if not raw_b64:
|
||||
raise HTTPException(502, "OpenAI returned no image")
|
||||
|
||||
@@ -1211,7 +1238,7 @@ def setup_gallery_routes() -> APIRouter:
|
||||
original and regenerates `strength` fraction. With strength ~0.4
|
||||
you get edge blending + lighting unification while keeping the
|
||||
composition recognisable."""
|
||||
import httpx, base64 as _b64
|
||||
import httpx
|
||||
user = require_privilege(request, "can_generate_images")
|
||||
body = await request.json()
|
||||
|
||||
@@ -1387,10 +1414,9 @@ def setup_gallery_routes() -> APIRouter:
|
||||
if item.get("b64_json"):
|
||||
return {"image": item["b64_json"]}
|
||||
if item.get("url"):
|
||||
async with httpx.AsyncClient(timeout=60) as c2:
|
||||
ir = await c2.get(item["url"])
|
||||
if ir.status_code == 200:
|
||||
return {"image": _b64.b64encode(ir.content).decode()}
|
||||
img_b64 = await _fetch_result_image_b64(item["url"])
|
||||
if img_b64:
|
||||
return {"image": img_b64}
|
||||
last_err = f"{path}: server returned no image"
|
||||
except httpx.ConnectError as e:
|
||||
raise HTTPException(502, f"Can't reach diffusion server at {base}: {e}")
|
||||
@@ -1450,6 +1476,7 @@ def setup_gallery_routes() -> APIRouter:
|
||||
img_bytes = base64.b64decode(image_b64)
|
||||
src = Image.open(io.BytesIO(img_bytes)).convert("RGB")
|
||||
try:
|
||||
patch_realesrgan_torchvision_compat()
|
||||
from realesrgan import RealESRGANer
|
||||
except ImportError:
|
||||
return {"error": "realesrgan not installed. Install it from Cookbook → Dependencies (search 'realesrgan')."}
|
||||
@@ -1499,6 +1526,7 @@ def setup_gallery_routes() -> APIRouter:
|
||||
img_bytes = base64.b64decode(image_b64)
|
||||
src = Image.open(io.BytesIO(img_bytes)).convert("RGB")
|
||||
try:
|
||||
patch_realesrgan_torchvision_compat()
|
||||
from basicsr.archs.rrdbnet_arch import RRDBNet
|
||||
from realesrgan import RealESRGANer
|
||||
except ImportError:
|
||||
|
||||
+23
-3
@@ -1,7 +1,9 @@
|
||||
import re
|
||||
from copy import deepcopy
|
||||
|
||||
from fastapi import APIRouter
|
||||
from fastapi import APIRouter, HTTPException
|
||||
|
||||
from routes._validators import validate_remote_host, validate_ssh_port
|
||||
|
||||
|
||||
# Backends the manual hardware simulator accepts. Must stay a subset of what
|
||||
@@ -11,6 +13,14 @@ from fastapi import APIRouter
|
||||
_MANUAL_BACKENDS = {"cuda", "rocm", "metal", "cpu_x86", "cpu_arm"}
|
||||
|
||||
|
||||
def _validate_detection_target(host: str = "", ssh_port: str = "") -> tuple[str, str]:
|
||||
host_value = validate_remote_host(host) or ""
|
||||
port_value = validate_ssh_port(ssh_port) or ""
|
||||
if port_value and not host_value:
|
||||
raise HTTPException(400, "ssh_port requires host")
|
||||
return host_value, port_value
|
||||
|
||||
|
||||
def _apply_manual_hardware(system, manual_mode="", manual_gpu_count="", manual_vram_gb="", manual_ram_gb="", manual_backend=""):
|
||||
"""Manual hardware is a "what if I had this setup" simulator —
|
||||
REPLACES the detected hardware entirely instead of adding to it.
|
||||
@@ -105,6 +115,7 @@ def setup_hwfit_routes():
|
||||
"""Detect and return current system hardware info. Pass host=user@server for remote.
|
||||
fresh=true bypasses the per-host cache (the Rescan button)."""
|
||||
from services.hwfit.hardware import detect_system
|
||||
host, ssh_port = _validate_detection_target(host, ssh_port)
|
||||
return detect_system(host=host, ssh_port=ssh_port, platform=platform, fresh=fresh)
|
||||
|
||||
@router.get("/models")
|
||||
@@ -118,6 +129,7 @@ def setup_hwfit_routes():
|
||||
from services.hwfit.hardware import detect_system
|
||||
from services.hwfit.fit import rank_models
|
||||
from services.hwfit.models import get_models, model_catalog_path
|
||||
host, ssh_port = _validate_detection_target(host, ssh_port)
|
||||
system = deepcopy(detect_system(host=host, ssh_port=ssh_port, platform=platform, fresh=fresh))
|
||||
if system.get("error"):
|
||||
return {"system": system, "models": [], "error": system["error"]}
|
||||
@@ -165,8 +177,14 @@ def setup_hwfit_routes():
|
||||
system["gpu_name"] = g["name"]
|
||||
system["active_group"] = {**g, "use_count": n}
|
||||
|
||||
if gpu_count != "":
|
||||
n = int(gpu_count)
|
||||
# Parse the optional count defensively (matches the gpu_group guard
|
||||
# above): a non-numeric query param previously raised ValueError ->
|
||||
# HTTP 500. A malformed value is ignored, same as omitting it.
|
||||
try:
|
||||
n = int(gpu_count) if gpu_count != "" else None
|
||||
except ValueError:
|
||||
n = None
|
||||
if n is not None:
|
||||
if n == 0:
|
||||
# RAM-only mode: rank against system memory, offload allowed.
|
||||
system["has_gpu"] = False
|
||||
@@ -229,6 +247,7 @@ def setup_hwfit_routes():
|
||||
from services.hwfit.hardware import detect_system
|
||||
from services.hwfit.models import get_models
|
||||
from services.hwfit.profiles import compute_serve_profiles
|
||||
host, ssh_port = _validate_detection_target(host, ssh_port)
|
||||
system = detect_system(host=host, ssh_port=ssh_port, platform=platform, fresh=fresh)
|
||||
if system.get("error"):
|
||||
return {"system": system, "profiles": [], "error": system["error"]}
|
||||
@@ -279,6 +298,7 @@ def setup_hwfit_routes():
|
||||
"""Rank image generation models against detected hardware."""
|
||||
from services.hwfit.hardware import detect_system
|
||||
from services.hwfit.image_models import rank_image_models
|
||||
host, ssh_port = _validate_detection_target(host, ssh_port)
|
||||
system = deepcopy(detect_system(host=host, ssh_port=ssh_port, platform=platform, fresh=fresh))
|
||||
if system.get("error"):
|
||||
return {"system": system, "models": [], "error": system["error"]}
|
||||
|
||||
+18
-6
@@ -108,6 +108,12 @@ def _load_disabled_map():
|
||||
db.close()
|
||||
|
||||
|
||||
def _mcp_oauth_redirect_uri() -> str:
|
||||
"""Shared callback URL for legacy Google and generic MCP OAuth flows."""
|
||||
from src.mcp_oauth import REDIRECT_URI
|
||||
return REDIRECT_URI
|
||||
|
||||
|
||||
def setup_mcp_routes(mcp_manager: McpManager):
|
||||
"""Setup MCP routes with the provided manager."""
|
||||
|
||||
@@ -445,9 +451,9 @@ def setup_mcp_routes(mcp_manager: McpManager):
|
||||
client_id = keys["client_id"]
|
||||
scopes = oauth_cfg.get("scopes", [])
|
||||
|
||||
# For Desktop App creds, redirect to localhost — the user will
|
||||
# For Desktop App creds, default to localhost — the user will
|
||||
# paste the resulting URL back if they're on a different device.
|
||||
redirect_uri = "http://localhost:7000/api/mcp/oauth/callback"
|
||||
redirect_uri = _mcp_oauth_redirect_uri()
|
||||
|
||||
params = {
|
||||
"client_id": client_id,
|
||||
@@ -469,7 +475,7 @@ def setup_mcp_routes(mcp_manager: McpManager):
|
||||
return RedirectResponse(auth_url)
|
||||
else:
|
||||
# Remote device — show paste-back page
|
||||
return HTMLResponse(_oauth_authorize_page(auth_url, server_id, host))
|
||||
return HTMLResponse(_oauth_authorize_page(auth_url, server_id, host, redirect_uri))
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
@@ -536,7 +542,7 @@ def setup_mcp_routes(mcp_manager: McpManager):
|
||||
client_id = keys["client_id"]
|
||||
client_secret = keys["client_secret"]
|
||||
|
||||
redirect_uri = "http://localhost:7000/api/mcp/oauth/callback"
|
||||
redirect_uri = _mcp_oauth_redirect_uri()
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
resp = await client.post(
|
||||
@@ -603,13 +609,19 @@ def setup_mcp_routes(mcp_manager: McpManager):
|
||||
return router
|
||||
|
||||
|
||||
def _oauth_authorize_page(auth_url: str, server_id: str, host: str) -> str:
|
||||
def _oauth_authorize_page(
|
||||
auth_url: str,
|
||||
server_id: str,
|
||||
host: str,
|
||||
redirect_uri: str = "http://localhost:7000/api/mcp/oauth/callback",
|
||||
) -> str:
|
||||
"""Page with Google sign-in link and URL paste-back form for remote access."""
|
||||
# Escape values interpolated into the page: `host` comes from the request
|
||||
# Host header and `server_id` from the OAuth state — neither is trusted.
|
||||
auth_url = html.escape(auth_url, quote=True)
|
||||
server_id = html.escape(server_id, quote=True)
|
||||
host = html.escape(host, quote=True)
|
||||
redirect_uri = html.escape(redirect_uri, quote=True)
|
||||
return f"""<!DOCTYPE html>
|
||||
<html><head>
|
||||
<meta charset="UTF-8"><title>Authorize — Odysseus</title>
|
||||
@@ -654,7 +666,7 @@ def _oauth_authorize_page(auth_url: str, server_id: str, host: str) -> str:
|
||||
<div class="divider"></div>
|
||||
<form method="POST" action="http://{host}/api/mcp/oauth/exchange/{server_id}">
|
||||
<p>Paste the URL from your browser after signing in:</p>
|
||||
<input type="text" name="callback_url" placeholder="http://localhost:7000/api/mcp/oauth/callback?code=..." required>
|
||||
<input type="text" name="callback_url" placeholder="{redirect_uri}?code=..." required>
|
||||
<br><button type="submit">Connect</button>
|
||||
</form>
|
||||
</div></body></html>"""
|
||||
|
||||
+75
-45
@@ -29,6 +29,7 @@ from src.llm_core import llm_call_async
|
||||
from services.memory.memory_extractor import audit_memories
|
||||
from src.auth_helpers import get_current_user, require_user
|
||||
from src.endpoint_resolver import resolve_endpoint
|
||||
from src.task_endpoint import resolve_task_endpoint
|
||||
from src.upload_limits import read_upload_limited, MEMORY_IMPORT_MAX_BYTES
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -105,6 +106,13 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
|
||||
if memory_manager.find_duplicates(text, user_mem):
|
||||
return {"ok": True, "count": len(user_mem), "message": "Memory already exists"}
|
||||
|
||||
if memory_data.session_id:
|
||||
try:
|
||||
session_obj = session_manager.get_session(memory_data.session_id)
|
||||
except KeyError:
|
||||
raise HTTPException(404, "Session not found")
|
||||
_assert_session_owner(session_obj, user)
|
||||
|
||||
new_entry = memory_manager.add_entry(text, memory_data.source, memory_data.category, owner=user)
|
||||
if memory_data.session_id:
|
||||
new_entry["session_id"] = memory_data.session_id
|
||||
@@ -163,8 +171,17 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
|
||||
|
||||
session_id = memory.get("session_id")
|
||||
if session_id and session_id in session_manager.sessions:
|
||||
session = session_manager.get_session(session_id)
|
||||
memory["session_name"] = session.name if session else f"Session {session_id[:6]}"
|
||||
try:
|
||||
session = session_manager.get_session(session_id)
|
||||
if session:
|
||||
_assert_session_owner(session, user)
|
||||
memory["session_name"] = session.name if session else f"Session {session_id[:6]}"
|
||||
except KeyError:
|
||||
memory["session_name"] = "Unknown"
|
||||
except HTTPException as exc:
|
||||
if exc.status_code != 404:
|
||||
raise
|
||||
memory["session_name"] = "Unknown"
|
||||
else:
|
||||
memory["session_name"] = "Unknown"
|
||||
|
||||
@@ -224,14 +241,18 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
|
||||
}
|
||||
messages = [system_msg] + sess.get_context_messages()
|
||||
|
||||
t_url, t_model, t_headers = resolve_task_endpoint(
|
||||
sess.endpoint_url, sess.model, sess.headers, owner=_owner(request)
|
||||
)
|
||||
|
||||
try:
|
||||
suggestion_text = await llm_call_async(
|
||||
sess.endpoint_url,
|
||||
sess.model,
|
||||
t_url,
|
||||
t_model,
|
||||
messages,
|
||||
temperature=0.2,
|
||||
max_tokens=500,
|
||||
headers=sess.headers,
|
||||
headers=t_headers,
|
||||
)
|
||||
try:
|
||||
suggestions = json.loads(suggestion_text)
|
||||
@@ -262,42 +283,50 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
|
||||
endpoint_url = model = None
|
||||
headers = {}
|
||||
|
||||
# Try default model from settings first
|
||||
settings = _load_settings()
|
||||
ep_id = settings.get("default_endpoint_id", "")
|
||||
default_model = settings.get("default_model", "")
|
||||
if ep_id:
|
||||
db = SessionLocal()
|
||||
try:
|
||||
ep = db.query(ModelEndpoint).filter(
|
||||
ModelEndpoint.id == ep_id, ModelEndpoint.is_enabled == True
|
||||
).first()
|
||||
if ep:
|
||||
base = _normalize_base(ep.base_url)
|
||||
endpoint_url = build_chat_url(base)
|
||||
model = default_model
|
||||
if not model and ep.models:
|
||||
try:
|
||||
models = _json.loads(ep.models) if isinstance(ep.models, str) else ep.models
|
||||
if models:
|
||||
model = models[0]
|
||||
except Exception:
|
||||
pass
|
||||
if ep.api_key:
|
||||
headers = {"Authorization": f"Bearer {ep.api_key}"}
|
||||
finally:
|
||||
db.close()
|
||||
# Try utility model from settings first — memory audit is a background
|
||||
# task and should prefer the lighter utility model over the main chat model.
|
||||
from src.task_endpoint import resolve_task_endpoint
|
||||
user = _owner(request)
|
||||
t_url, t_model, t_headers = resolve_task_endpoint(owner=user)
|
||||
if t_url and t_model:
|
||||
endpoint_url, model, headers = t_url, t_model, t_headers
|
||||
else:
|
||||
# Fall back to default model if no task/utility model configured
|
||||
settings = _load_settings()
|
||||
ep_id = settings.get("default_endpoint_id", "")
|
||||
default_model = settings.get("default_model", "")
|
||||
if ep_id:
|
||||
db = SessionLocal()
|
||||
try:
|
||||
ep = db.query(ModelEndpoint).filter(
|
||||
ModelEndpoint.id == ep_id, ModelEndpoint.is_enabled == True
|
||||
).first()
|
||||
if ep:
|
||||
base = _normalize_base(ep.base_url)
|
||||
endpoint_url = build_chat_url(base)
|
||||
model = default_model
|
||||
if not model and ep.models:
|
||||
try:
|
||||
models = _json.loads(ep.models) if isinstance(ep.models, str) else ep.models
|
||||
if models:
|
||||
model = models[0]
|
||||
except Exception:
|
||||
pass
|
||||
if ep.api_key:
|
||||
headers = {"Authorization": f"Bearer {ep.api_key}"}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
# Fall back to session model if no default configured
|
||||
if not endpoint_url and session:
|
||||
try:
|
||||
sess = session_manager.get_session(session)
|
||||
_assert_session_owner(sess, _owner(request))
|
||||
endpoint_url = sess.endpoint_url
|
||||
model = sess.model
|
||||
headers = sess.headers
|
||||
except KeyError:
|
||||
pass
|
||||
# Fall back to session model if no default configured
|
||||
if not endpoint_url and session:
|
||||
try:
|
||||
sess = session_manager.get_session(session)
|
||||
_assert_session_owner(sess, _owner(request))
|
||||
endpoint_url = sess.endpoint_url
|
||||
model = sess.model
|
||||
headers = sess.headers
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
if not endpoint_url or not model:
|
||||
raise HTTPException(400, "No default model configured — set one in Settings")
|
||||
@@ -344,13 +373,14 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
|
||||
try:
|
||||
sess = session_manager.get_session(session)
|
||||
_assert_session_owner(sess, _owner(request))
|
||||
endpoint_url = sess.endpoint_url
|
||||
model = sess.model
|
||||
headers = sess.headers
|
||||
endpoint_url, model, headers = resolve_task_endpoint(
|
||||
sess.endpoint_url, sess.model, sess.headers, owner=_owner(request)
|
||||
)
|
||||
except KeyError:
|
||||
raise HTTPException(404, "Session not found — needed for LLM config")
|
||||
logger.warning("Session %s not found, falling back to utility endpoint", session)
|
||||
endpoint_url, model, headers = resolve_endpoint("utility", owner=_owner(request))
|
||||
else:
|
||||
endpoint_url, model, headers = resolve_endpoint("utility", owner=_owner(request))
|
||||
endpoint_url, model, headers = resolve_task_endpoint(owner=_owner(request))
|
||||
|
||||
if not endpoint_url or not model:
|
||||
raise HTTPException(400, "No LLM model configured. Set a default model in Settings.")
|
||||
|
||||
+90
-12
@@ -123,6 +123,21 @@ def _clear_user_pref_endpoint_refs(all_prefs: dict, ep_id: str) -> int:
|
||||
return cleared_users
|
||||
|
||||
|
||||
def _default_endpoint_needs_assignment(current_default_id: str, enabled_endpoint_ids) -> bool:
|
||||
"""Whether the global default chat endpoint should be (re)assigned.
|
||||
|
||||
True when nothing is configured yet, or the configured default no longer
|
||||
resolves to an enabled endpoint (e.g. the user disabled it). Without the
|
||||
second case, adding a new endpoint after disabling the previous default
|
||||
leaves `default_endpoint_id` pointing at the disabled endpoint, so features
|
||||
that read the raw setting (Memory → Tidy) fail with "No default model
|
||||
configured" even though an enabled endpoint exists. See #3586.
|
||||
"""
|
||||
if not current_default_id:
|
||||
return True
|
||||
return current_default_id not in enabled_endpoint_ids
|
||||
|
||||
|
||||
# Loopback hosts a user might type for a local model server (LM Studio,
|
||||
# llama.cpp, vLLM, …). Inside Docker these point at the *container*, not the
|
||||
# host the server actually runs on.
|
||||
@@ -233,6 +248,9 @@ _PROVIDER_CURATED = {
|
||||
"zai-coding": [
|
||||
"glm-5.1", "glm-5v-turbo", "glm-5-turbo", "glm-4.7", "glm-4.5-air",
|
||||
],
|
||||
"kimi-code": [
|
||||
"kimi-for-coding",
|
||||
],
|
||||
"deepseek": [
|
||||
"deepseek-chat", "deepseek-reasoner",
|
||||
],
|
||||
@@ -283,6 +301,7 @@ _HOST_TO_CURATED = (
|
||||
("fireworks.ai", "fireworks"),
|
||||
("googleapis.com", "google"),
|
||||
("x.ai", "xai"),
|
||||
("nvidia.com", "nvidia"),
|
||||
("openrouter.ai", "openrouter"),
|
||||
("ollama.com", "ollama"),
|
||||
)
|
||||
@@ -299,6 +318,8 @@ def _match_provider_curated(base_url: str, provider: str) -> str:
|
||||
parsed = urlparse(base_url)
|
||||
if _host_match(base_url, "z.ai") and "/api/coding" in (parsed.path or ""):
|
||||
return "zai-coding"
|
||||
if _host_match(base_url, "kimi.com") and "/coding" in (parsed.path or ""):
|
||||
return "kimi-code"
|
||||
for domain, key in _HOST_TO_CURATED:
|
||||
if _host_match(base_url, domain):
|
||||
return key
|
||||
@@ -477,10 +498,17 @@ _NON_CHAT_PREFIXES = (
|
||||
"dall-e", "tts-", "whisper", "text-embedding", "embedding",
|
||||
"davinci", "babbage", "moderation", "omni-moderation",
|
||||
"sora", "gpt-image", "chatgpt-image",
|
||||
# embedding / retrieval / non-chat models (common across providers)
|
||||
"snowflake/arctic-embed", "nvidia/nv-embed", "embed",
|
||||
)
|
||||
_NON_CHAT_CONTAINS = (
|
||||
"-realtime", "-transcribe", "-tts", "-codex",
|
||||
"codex-",
|
||||
"codex-", "content-safety", "-safety", "-reward", "nvclip",
|
||||
"kosmos", "fuyu", "deplot", "vila", "neva",
|
||||
"gliner", "riva", "-parse", "-embedqa", "-nemoretriever",
|
||||
"topic-control", "calibration",
|
||||
"ai-synthetic-video", "cosmos-reason2",
|
||||
"bge", "llama-guard",
|
||||
)
|
||||
_NON_CHAT_EXACT_PREFIXES = (
|
||||
"gpt-audio", # gpt-audio, gpt-audio-mini etc. (not gpt-4o-audio-preview which is chat)
|
||||
@@ -680,6 +708,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
|
||||
"""Probe a base URL's /models endpoint and return list of model IDs.
|
||||
For Anthropic, queries their /v1/models API, falling back to hardcoded list."""
|
||||
from src.endpoint_resolver import resolve_url
|
||||
from src.llm_core import httpx_get_kimi_aware
|
||||
base = resolve_url(_normalize_base(base_url))
|
||||
provider = _safe_detect_provider(base)
|
||||
if provider == "chatgpt-subscription":
|
||||
@@ -715,7 +744,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
|
||||
url = _safe_build_models_url(base)
|
||||
headers = _safe_build_headers(api_key, base)
|
||||
try:
|
||||
r = httpx.get(url, headers=headers, timeout=timeout, verify=llm_verify())
|
||||
r = httpx_get_kimi_aware(url, headers, timeout=timeout, verify=llm_verify())
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
# OpenAI format: {"data": [{"id": "model-name"}]}
|
||||
@@ -731,7 +760,12 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
|
||||
for _e in _PROVIDER_CURATED.get(_ck, []):
|
||||
if _e not in set(models) and not any(m.startswith(_e) for m in models):
|
||||
models.append(_e)
|
||||
return models
|
||||
if _host_match(base, "kimi.com") and "/coding" in (urlparse(base).path or ""):
|
||||
_ck = _match_provider_curated(base, None)
|
||||
for _e in _PROVIDER_CURATED.get(_ck, []):
|
||||
if _e not in set(models) and not any(m.startswith(_e) for m in models):
|
||||
models.append(_e)
|
||||
return [m for m in models if _is_chat_model(m)]
|
||||
except httpx.HTTPStatusError as e:
|
||||
if api_key:
|
||||
status = e.response.status_code if e.response is not None else "unknown"
|
||||
@@ -755,7 +789,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
|
||||
data = r.json()
|
||||
models = [m.get("name") or m.get("model") for m in (data.get("models") or []) if m.get("name") or m.get("model")]
|
||||
if models:
|
||||
return models
|
||||
return [m for m in models if _is_chat_model(m)]
|
||||
except Exception as e:
|
||||
logger.debug(f"Ollama /api/tags probe failed for {base}: {e}")
|
||||
# Fall back to curated list if the provider has a URL-based match (e.g. z.ai has no /models endpoint)
|
||||
@@ -847,15 +881,52 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
|
||||
|
||||
|
||||
def _model_endpoint_error_message(base_url: str, ping: Dict[str, Any] = None) -> str:
|
||||
"""Return a provider-aware error message for failed endpoint probes."""
|
||||
"""Return a provider-aware error message for failed endpoint probes.
|
||||
|
||||
Surfaces the URL we actually probed and, when the endpoint looks like
|
||||
LM Studio (port 1234 or hostname match), adds a hint about loading a
|
||||
model and confirming the Developer Server is running. The user previously
|
||||
saw a generic "No models found for that provider/key" with no way to
|
||||
tell whether the URL was wrong, the server was down, or the server was
|
||||
reachable but had no model loaded (issue #25).
|
||||
"""
|
||||
ping = ping or {}
|
||||
error = ping.get("error")
|
||||
from src.endpoint_resolver import build_models_url
|
||||
try:
|
||||
probed = build_models_url(base_url) or base_url
|
||||
except Exception:
|
||||
probed = base_url
|
||||
parsed = urlparse(base_url)
|
||||
host = (parsed.hostname or "").lower()
|
||||
is_ollama = parsed.port == 11434 or "ollama" in host or "ollama" in base_url.lower()
|
||||
is_lmstudio = (
|
||||
parsed.port == 1234
|
||||
or "lmstudio" in host
|
||||
or "lm-studio" in host
|
||||
or "lm_studio" in host
|
||||
)
|
||||
|
||||
if is_lmstudio:
|
||||
parts = [
|
||||
"LM Studio is reachable, but no models were reported.",
|
||||
f"Probed {probed}.",
|
||||
]
|
||||
if error:
|
||||
parts.append(f"Last probe error: {error}.")
|
||||
parts.append(
|
||||
"Open LM Studio, load at least one model, and confirm the "
|
||||
"Developer Server is running on port 1234."
|
||||
)
|
||||
parts.append(
|
||||
"Base URL should be http://localhost:1234/v1 (native) or "
|
||||
"http://host.docker.internal:1234/v1 (Docker)."
|
||||
)
|
||||
return " ".join(parts)
|
||||
|
||||
if is_ollama:
|
||||
parts = ["No Ollama models found for that endpoint."]
|
||||
parts.append(f"Probed {probed}.")
|
||||
if error:
|
||||
parts.append(f"Last probe error: {error}.")
|
||||
parts.append("Check that Ollama is running and that the base URL is correct.")
|
||||
@@ -865,9 +936,9 @@ def _model_endpoint_error_message(base_url: str, ping: Dict[str, Any] = None) ->
|
||||
return " ".join(parts)
|
||||
|
||||
if error:
|
||||
return f"No models found for that provider/key. Last probe error: {error}."
|
||||
return f"No models found for that provider/key. Probed {probed}. Last probe error: {error}."
|
||||
|
||||
return "No models found for that provider/key."
|
||||
return f"No models found for that provider/key. Probed {probed}."
|
||||
|
||||
|
||||
def _normalize_model_ids(value):
|
||||
@@ -1719,12 +1790,19 @@ def setup_model_routes(model_discovery):
|
||||
)
|
||||
db.add(ep)
|
||||
db.commit()
|
||||
# Auto-set as default chat endpoint if none configured yet. Seed
|
||||
# the first CHAT model (not raw model_ids[0]) so we don't pin the
|
||||
# global default to an embedding/tts/etc. entry a provider happens
|
||||
# to list first.
|
||||
# Auto-set as default chat endpoint when none is usable yet — either
|
||||
# nothing is configured, or the configured default points at an
|
||||
# endpoint that is now missing/disabled (#3586). Seed the first CHAT
|
||||
# model (not raw model_ids[0]) so we don't pin the global default to
|
||||
# an embedding/tts/etc. entry a provider happens to list first.
|
||||
settings = _load_settings()
|
||||
if not settings.get("default_endpoint_id"):
|
||||
enabled_ids = {
|
||||
e.id
|
||||
for e in db.query(ModelEndpoint).filter(
|
||||
ModelEndpoint.is_enabled == True # noqa: E712
|
||||
).all()
|
||||
}
|
||||
if _default_endpoint_needs_assignment(settings.get("default_endpoint_id") or "", enabled_ids):
|
||||
from src.endpoint_resolver import _first_chat_model
|
||||
settings["default_endpoint_id"] = ep.id
|
||||
settings["default_model"] = _first_chat_model(model_ids) or ""
|
||||
|
||||
@@ -160,8 +160,11 @@ def setup_personal_routes(personal_docs_manager, rag_manager, rag_available):
|
||||
JSON response confirming removal
|
||||
"""
|
||||
try:
|
||||
if not directory:
|
||||
raise HTTPException(400, "Directory path is required")
|
||||
# Confine to PERSONAL_DIR — parity with add_directory_to_rag (which
|
||||
# resolves the path the same way). Without this, an arbitrary or
|
||||
# `..`-escaping path is passed straight to
|
||||
# personal_docs_manager.remove_directory / rag.remove_directory.
|
||||
directory = _resolve_allowed_personal_dir(directory)
|
||||
|
||||
logger.info(f"Removing directory from RAG: {directory}")
|
||||
|
||||
|
||||
+14
-10
@@ -11,7 +11,7 @@ from core.session_manager import SessionManager
|
||||
from core.models import ChatMessage
|
||||
from src.request_models import SessionResponse
|
||||
from core.database import Session as DbSession, SessionLocal, Document, GalleryImage, utcnow_naive
|
||||
from src.auth_helpers import get_current_user, effective_user, _auth_disabled
|
||||
from src.auth_helpers import get_current_user, effective_user, _auth_disabled, owner_filter
|
||||
from src.session_actions import is_session_recently_active
|
||||
|
||||
|
||||
@@ -258,7 +258,9 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
|
||||
last_msg_map = {}
|
||||
mode_map = {}
|
||||
msg_count_map = {}
|
||||
rows = db.query(DbSession.id, DbSession.folder, DbSession.total_input_tokens, DbSession.total_output_tokens, DbSession.is_important, DbSession.created_at, DbSession.updated_at, DbSession.last_message_at, DbSession.mode, DbSession.message_count).filter(DbSession.archived == False, DbSession.owner == user).all()
|
||||
q = db.query(DbSession.id, DbSession.folder, DbSession.total_input_tokens, DbSession.total_output_tokens, DbSession.is_important, DbSession.created_at, DbSession.updated_at, DbSession.last_message_at, DbSession.mode, DbSession.message_count).filter(DbSession.archived == False)
|
||||
q = owner_filter(q, DbSession, user)
|
||||
rows = q.all()
|
||||
for row in rows:
|
||||
folder_map[row.id] = row.folder
|
||||
token_map[row.id] = (row.total_input_tokens or 0) + (row.total_output_tokens or 0)
|
||||
@@ -277,17 +279,19 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
|
||||
# Sessions with active documents that have content
|
||||
from sqlalchemy import func
|
||||
doc_session_ids = set(
|
||||
r[0] for r in db.query(Document.session_id)
|
||||
.filter(Document.is_active == True,
|
||||
Document.current_content != None,
|
||||
func.trim(Document.current_content) != "",
|
||||
Document.owner == user)
|
||||
r[0] for r in owner_filter(
|
||||
db.query(Document.session_id)
|
||||
.filter(Document.is_active == True,
|
||||
Document.current_content != None,
|
||||
func.trim(Document.current_content) != ""),
|
||||
Document, user)
|
||||
.distinct().all()
|
||||
)
|
||||
img_session_ids = set(
|
||||
r[0] for r in db.query(GalleryImage.session_id)
|
||||
.filter(GalleryImage.session_id != None,
|
||||
GalleryImage.owner == user)
|
||||
r[0] for r in owner_filter(
|
||||
db.query(GalleryImage.session_id)
|
||||
.filter(GalleryImage.session_id != None),
|
||||
GalleryImage, user)
|
||||
.distinct().all()
|
||||
)
|
||||
finally:
|
||||
|
||||
+16
-2
@@ -1,6 +1,7 @@
|
||||
"""Shell routes — user-facing command execution endpoint."""
|
||||
|
||||
import asyncio
|
||||
import importlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
@@ -14,6 +15,7 @@ from collections import namedtuple
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any
|
||||
from core.platform_compat import IS_APPLE_SILICON, which_tool
|
||||
from src.optional_deps import prepare_optional_dependency_import
|
||||
|
||||
# POSIX-only: `pty`/`fcntl` transitively import `termios`, which does NOT exist
|
||||
# on Windows, so importing them unconditionally crashed app startup there
|
||||
@@ -149,6 +151,11 @@ def _pip_dist_name(pkg: dict) -> str:
|
||||
return (pkg.get("name") or "").replace("_", "-")
|
||||
|
||||
|
||||
def _import_optional_dependency_for_status(name: str):
|
||||
prepare_optional_dependency_import(name)
|
||||
return importlib.import_module(name)
|
||||
|
||||
|
||||
def _package_installed_from_probe(name: str, probe: dict) -> bool:
|
||||
"""Return whether an optional dependency is usable by Cookbook.
|
||||
|
||||
@@ -970,7 +977,6 @@ def setup_shell_routes() -> APIRouter:
|
||||
"""
|
||||
_require_admin(request)
|
||||
_reject_cross_site(request)
|
||||
import importlib
|
||||
import importlib.metadata as importlib_metadata
|
||||
import shlex
|
||||
import json as _json
|
||||
@@ -1057,6 +1063,13 @@ def setup_shell_routes() -> APIRouter:
|
||||
"category": "Image",
|
||||
"target": "remote",
|
||||
},
|
||||
{
|
||||
"name": "transformers",
|
||||
"pip": "transformers",
|
||||
"desc": "Hugging Face model components used by SD/Flux pipelines and image tools",
|
||||
"category": "Image",
|
||||
"target": "remote",
|
||||
},
|
||||
{
|
||||
"name": "rembg",
|
||||
"pip": "rembg[gpu]",
|
||||
@@ -1202,7 +1215,7 @@ def setup_shell_routes() -> APIRouter:
|
||||
pkg["status_note"] = _package_status_note("vllm", probe)
|
||||
else:
|
||||
try:
|
||||
importlib.import_module(pkg["name"])
|
||||
_import_optional_dependency_for_status(pkg["name"])
|
||||
importlib_metadata.version(_pip_dist_name(pkg))
|
||||
pkg["installed"] = True
|
||||
except ImportError:
|
||||
@@ -1251,6 +1264,7 @@ def setup_shell_routes() -> APIRouter:
|
||||
"sglang[all]",
|
||||
"diffusers",
|
||||
"diffusers[torch]",
|
||||
"transformers",
|
||||
"TTS",
|
||||
"bark",
|
||||
"faster-whisper",
|
||||
|
||||
@@ -198,6 +198,8 @@ def setup_webhook_routes(
|
||||
"opencode-go": "https://opencode.ai/zen/go/v1",
|
||||
"fireworks": "https://api.fireworks.ai/inference/v1",
|
||||
"venice": "https://api.venice.ai/api/v1",
|
||||
"kimi-code": "https://api.kimi.com/coding/v1",
|
||||
"kimicode": "https://api.kimi.com/coding/v1",
|
||||
}
|
||||
|
||||
# Model prefix → provider mapping for auto-detection
|
||||
@@ -210,6 +212,8 @@ def setup_webhook_routes(
|
||||
"mistral": "mistral",
|
||||
"llama": "groq",
|
||||
"mixtral": "groq",
|
||||
"kimi-for-coding": "kimi-code",
|
||||
"kimi": "kimi-code",
|
||||
}
|
||||
|
||||
def _resolve_base_url(model: Optional[str], provider: Optional[str]) -> Optional[str]:
|
||||
|
||||
@@ -0,0 +1,85 @@
|
||||
"""Workspace API - browse server directories to pick a tool workspace folder."""
|
||||
import os
|
||||
from fastapi import APIRouter, Request, HTTPException, Query
|
||||
|
||||
from src.auth_helpers import get_current_user
|
||||
from src.tool_security import owner_is_admin_or_single_user
|
||||
|
||||
# Cap entries returned per directory (mirrors filesystem_tools._CODENAV_MAX_HITS).
|
||||
# A huge directory shouldn't dump thousands of rows into the picker; the user can
|
||||
# type/paste a path to jump straight in instead.
|
||||
_MAX_BROWSE_DIRS = 500
|
||||
|
||||
|
||||
def setup_workspace_routes():
|
||||
router = APIRouter(prefix="/api/workspace", tags=["workspace"])
|
||||
|
||||
@router.get("/browse")
|
||||
def browse(request: Request, path: str = Query(default="")):
|
||||
"""List subdirectories of `path` (default: home) so the UI can navigate
|
||||
the server filesystem and pick a workspace folder. Directories only.
|
||||
|
||||
ADMIN-ONLY: this enumerates the server filesystem, so it is gated the
|
||||
same way the file/shell tools are (read_file/write_file/bash are in
|
||||
NON_ADMIN_BLOCKED_TOOLS). A non-admin who can't use those tools must not
|
||||
be able to map the host's directory tree either.
|
||||
"""
|
||||
owner = get_current_user(request)
|
||||
if not owner_is_admin_or_single_user(owner):
|
||||
raise HTTPException(status_code=403, detail="Workspace browsing is admin-only")
|
||||
|
||||
# Resolve symlinks so the reported path is canonical and the UI navigates
|
||||
# real directories (defends against symlink games in displayed paths).
|
||||
target = os.path.realpath(os.path.expanduser(path.strip() or "~"))
|
||||
if not os.path.isdir(target):
|
||||
target = os.path.realpath(os.path.expanduser("~"))
|
||||
|
||||
dirs = []
|
||||
try:
|
||||
with os.scandir(target) as it:
|
||||
for entry in it:
|
||||
try:
|
||||
# Don't follow symlinks when classifying - a symlinked
|
||||
# dir is skipped rather than letting the browser wander
|
||||
# off via a link. Hidden entries are omitted.
|
||||
if entry.is_dir(follow_symlinks=False) and not entry.name.startswith("."):
|
||||
# Build the child path server-side with os.path.join
|
||||
# so it's correct on Windows (backslashes) and Linux.
|
||||
dirs.append({"name": entry.name, "path": os.path.join(target, entry.name)})
|
||||
except OSError:
|
||||
continue
|
||||
except (PermissionError, OSError):
|
||||
dirs = []
|
||||
|
||||
dirs_sorted = sorted(dirs, key=lambda d: d["name"].lower())
|
||||
truncated = len(dirs_sorted) > _MAX_BROWSE_DIRS
|
||||
parent = os.path.dirname(target)
|
||||
from src.tool_execution import vet_workspace
|
||||
return {
|
||||
"path": target,
|
||||
"parent": parent if parent and parent != target else None,
|
||||
"dirs": dirs_sorted[:_MAX_BROWSE_DIRS],
|
||||
"truncated": truncated,
|
||||
# Whether this directory may be bound as a workspace (filesystem
|
||||
# roots and sensitive dirs may be browsed through but not chosen).
|
||||
"selectable": vet_workspace(target) is not None,
|
||||
}
|
||||
|
||||
@router.get("/vet")
|
||||
def vet(request: Request, path: str = Query(default="")):
|
||||
"""Validate a workspace path without binding it.
|
||||
|
||||
The UI calls this before persisting a manually typed path (/workspace
|
||||
set) so a typo, file path, deleted folder, sensitive dir, or filesystem
|
||||
root is rejected up front with the canonical path returned on success,
|
||||
instead of being stored client-side and silently dropped at chat time.
|
||||
Admin-gated like /browse: it confirms path existence on the host.
|
||||
"""
|
||||
owner = get_current_user(request)
|
||||
if not owner_is_admin_or_single_user(owner):
|
||||
raise HTTPException(status_code=403, detail="Workspace selection is admin-only")
|
||||
from src.tool_execution import vet_workspace
|
||||
resolved = vet_workspace(path)
|
||||
return {"ok": resolved is not None, "path": resolved}
|
||||
|
||||
return router
|
||||
Executable
+635
@@ -0,0 +1,635 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Build a neutral agent migration manifest.
|
||||
|
||||
This helper is intentionally read-only. It does not import the Odysseus
|
||||
application package, write to data/, call an LLM, or apply anything. It turns
|
||||
common agent export shapes into a portable JSON manifest that Odysseus can
|
||||
preview or import later.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import hashlib
|
||||
import json
|
||||
import mimetypes
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterable
|
||||
|
||||
|
||||
SCHEMA_VERSION = "agent-migration.v1"
|
||||
TEXT_EXTENSIONS = {
|
||||
".cfg",
|
||||
".conf",
|
||||
".csv",
|
||||
".json",
|
||||
".log",
|
||||
".md",
|
||||
".markdown",
|
||||
".py",
|
||||
".rst",
|
||||
".toml",
|
||||
".txt",
|
||||
".yaml",
|
||||
".yml",
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class InputWarning:
|
||||
path: str
|
||||
message: str
|
||||
|
||||
|
||||
def utc_now_iso() -> str:
|
||||
return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")
|
||||
|
||||
|
||||
def sha256_text(text: str) -> str:
|
||||
return hashlib.sha256(text.encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
def sha256_bytes(data: bytes) -> str:
|
||||
return hashlib.sha256(data).hexdigest()
|
||||
|
||||
|
||||
def sha256_path(path: Path) -> str:
|
||||
h = hashlib.sha256()
|
||||
with path.open("rb") as f:
|
||||
for chunk in iter(lambda: f.read(65536), b""):
|
||||
h.update(chunk)
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def stable_id(kind: str, source_name: str, *parts: Any) -> str:
|
||||
raw = "\x1f".join([kind, source_name, *[str(part) for part in parts]])
|
||||
return f"{kind}:{hashlib.sha256(raw.encode('utf-8')).hexdigest()[:16]}"
|
||||
|
||||
|
||||
def read_json(path: Path) -> Any:
|
||||
with path.open("r", encoding="utf-8") as handle:
|
||||
return json.load(handle)
|
||||
|
||||
|
||||
def normalize_category(value: Any) -> str:
|
||||
category = str(value or "fact").strip().lower()
|
||||
return category or "fact"
|
||||
|
||||
|
||||
def normalize_memory_text(item: Any) -> str:
|
||||
if isinstance(item, str):
|
||||
return item.strip()
|
||||
if isinstance(item, dict):
|
||||
for key in ("text", "content", "memory", "value"):
|
||||
value = item.get(key)
|
||||
if isinstance(value, str) and value.strip():
|
||||
return value.strip()
|
||||
return ""
|
||||
|
||||
|
||||
def memory_metadata(item: Any, source_path: Path, index: int) -> dict[str, Any]:
|
||||
metadata: dict[str, Any] = {
|
||||
"source_path": str(source_path),
|
||||
"source_index": index,
|
||||
}
|
||||
if isinstance(item, dict):
|
||||
for key in ("id", "timestamp", "created_at", "updated_at", "source", "tags", "pinned"):
|
||||
if key in item:
|
||||
metadata[f"source_{key}"] = item.get(key)
|
||||
return metadata
|
||||
|
||||
|
||||
def payload_items(payload: Any, keys: tuple[str, ...]) -> Any:
|
||||
if isinstance(payload, dict):
|
||||
for key in keys:
|
||||
if isinstance(payload.get(key), list):
|
||||
return payload[key]
|
||||
return payload
|
||||
|
||||
|
||||
def collect_memory_json(path: Path, source_name: str) -> tuple[list[dict[str, Any]], list[InputWarning]]:
|
||||
warnings: list[InputWarning] = []
|
||||
try:
|
||||
payload = read_json(path)
|
||||
except Exception as exc:
|
||||
return [], [InputWarning(str(path), f"could not read JSON: {exc}")]
|
||||
|
||||
payload = payload_items(payload, ("memories", "memory", "items", "data"))
|
||||
|
||||
if not isinstance(payload, list):
|
||||
return [], [InputWarning(str(path), "expected a JSON list or an object containing a memory list")]
|
||||
|
||||
items: list[dict[str, Any]] = []
|
||||
seen: set[str] = set()
|
||||
for index, item in enumerate(payload):
|
||||
text = normalize_memory_text(item)
|
||||
if not text:
|
||||
warnings.append(InputWarning(str(path), f"skipped memory at index {index}: missing text"))
|
||||
continue
|
||||
digest = sha256_text(text.strip().lower())
|
||||
if digest in seen:
|
||||
warnings.append(InputWarning(str(path), f"skipped duplicate memory at index {index}"))
|
||||
continue
|
||||
seen.add(digest)
|
||||
category = normalize_category(item.get("category") if isinstance(item, dict) else "fact")
|
||||
source = str(item.get("source") or source_name) if isinstance(item, dict) else source_name
|
||||
items.append(
|
||||
{
|
||||
"id": stable_id("memory", source_name, path, index, digest),
|
||||
"kind": "memory",
|
||||
"text": text,
|
||||
"category": category,
|
||||
"source": source,
|
||||
"metadata": memory_metadata(item, path, index),
|
||||
}
|
||||
)
|
||||
return items, warnings
|
||||
|
||||
|
||||
def normalize_timestamp(value: Any) -> str | None:
|
||||
if value is None or value == "":
|
||||
return None
|
||||
if isinstance(value, (int, float)):
|
||||
try:
|
||||
return (
|
||||
datetime.fromtimestamp(float(value), timezone.utc)
|
||||
.replace(microsecond=0)
|
||||
.isoformat()
|
||||
.replace("+00:00", "Z")
|
||||
)
|
||||
except (OverflowError, OSError, ValueError):
|
||||
return str(value)
|
||||
return str(value)
|
||||
|
||||
|
||||
def normalize_role(value: Any) -> str:
|
||||
role = str(value or "unknown").strip().lower()
|
||||
if role in {"human", "user"}:
|
||||
return "user"
|
||||
if role in {"assistant", "ai", "bot", "model"}:
|
||||
return "assistant"
|
||||
if role in {"system", "tool"}:
|
||||
return role
|
||||
return role or "unknown"
|
||||
|
||||
|
||||
def content_part_text(part: Any) -> str:
|
||||
if isinstance(part, str):
|
||||
return part
|
||||
if isinstance(part, dict):
|
||||
for key in ("text", "content", "value"):
|
||||
value = part.get(key)
|
||||
if isinstance(value, str):
|
||||
return value
|
||||
if part.get("type") == "text" and isinstance(part.get("text"), str):
|
||||
return part["text"]
|
||||
return ""
|
||||
|
||||
|
||||
def normalize_message_text(message: dict[str, Any]) -> str:
|
||||
content = message.get("content")
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
if isinstance(content, list):
|
||||
return "\n".join(text for text in (content_part_text(part).strip() for part in content) if text)
|
||||
if isinstance(content, dict):
|
||||
parts = content.get("parts")
|
||||
if isinstance(parts, list):
|
||||
return "\n".join(text for text in (content_part_text(part).strip() for part in parts) if text)
|
||||
for key in ("text", "content", "value"):
|
||||
value = content.get(key)
|
||||
if isinstance(value, str):
|
||||
return value
|
||||
for key in ("text", "body", "message"):
|
||||
value = message.get(key)
|
||||
if isinstance(value, str):
|
||||
return value
|
||||
return ""
|
||||
|
||||
|
||||
def normalize_message(message: dict[str, Any]) -> dict[str, Any] | None:
|
||||
author = message.get("author") if isinstance(message.get("author"), dict) else {}
|
||||
role = (
|
||||
message.get("role")
|
||||
or message.get("sender")
|
||||
or message.get("speaker")
|
||||
or author.get("role")
|
||||
or author.get("name")
|
||||
)
|
||||
text = normalize_message_text(message).strip()
|
||||
if not text:
|
||||
return None
|
||||
normalized: dict[str, Any] = {
|
||||
"role": normalize_role(role),
|
||||
"text": text,
|
||||
}
|
||||
timestamp = normalize_timestamp(message.get("created_at") or message.get("create_time") or message.get("timestamp"))
|
||||
if timestamp:
|
||||
normalized["created_at"] = timestamp
|
||||
message_id = message.get("id")
|
||||
if message_id is not None:
|
||||
normalized["source_id"] = str(message_id)
|
||||
return normalized
|
||||
|
||||
|
||||
def chatgpt_mapping_messages(conversation: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
mapping = conversation.get("mapping")
|
||||
if not isinstance(mapping, dict):
|
||||
return []
|
||||
rows: list[tuple[float, int, dict[str, Any]]] = []
|
||||
for index, node in enumerate(mapping.values()):
|
||||
if not isinstance(node, dict) or not isinstance(node.get("message"), dict):
|
||||
continue
|
||||
message = node["message"]
|
||||
sort_value = message.get("create_time")
|
||||
try:
|
||||
sort_key = float(sort_value)
|
||||
except (TypeError, ValueError):
|
||||
sort_key = float(index)
|
||||
normalized = normalize_message(message)
|
||||
if normalized:
|
||||
rows.append((sort_key, index, normalized))
|
||||
return [row[2] for row in sorted(rows, key=lambda row: (row[0], row[1]))]
|
||||
|
||||
|
||||
def conversation_messages(conversation: dict[str, Any]) -> tuple[list[dict[str, Any]], str]:
|
||||
mapped = chatgpt_mapping_messages(conversation)
|
||||
if mapped:
|
||||
return mapped, "chatgpt_mapping"
|
||||
for key in ("messages", "chat_messages", "turns"):
|
||||
raw_messages = conversation.get(key)
|
||||
if isinstance(raw_messages, list):
|
||||
messages = [
|
||||
normalized
|
||||
for raw in raw_messages
|
||||
if isinstance(raw, dict)
|
||||
for normalized in [normalize_message(raw)]
|
||||
if normalized
|
||||
]
|
||||
return messages, key
|
||||
return [], "unknown"
|
||||
|
||||
|
||||
def conversation_title(conversation: dict[str, Any], index: int) -> str:
|
||||
for key in ("title", "name", "summary"):
|
||||
value = conversation.get(key)
|
||||
if isinstance(value, str) and value.strip():
|
||||
return value.strip()
|
||||
return f"Conversation {index + 1}"
|
||||
|
||||
|
||||
def collect_conversation_json(
|
||||
path: Path,
|
||||
source_name: str,
|
||||
*,
|
||||
include_content: bool = False,
|
||||
max_messages: int = 2000,
|
||||
) -> tuple[list[dict[str, Any]], list[InputWarning]]:
|
||||
warnings: list[InputWarning] = []
|
||||
try:
|
||||
payload = read_json(path)
|
||||
except Exception as exc:
|
||||
return [], [InputWarning(str(path), f"could not read JSON: {exc}")]
|
||||
|
||||
payload = payload_items(payload, ("conversations", "conversation", "items", "data"))
|
||||
if isinstance(payload, dict):
|
||||
payload = [payload]
|
||||
if not isinstance(payload, list):
|
||||
return [], [InputWarning(str(path), "expected a JSON list or an object containing a conversation list")]
|
||||
|
||||
items: list[dict[str, Any]] = []
|
||||
for index, conversation in enumerate(payload):
|
||||
if not isinstance(conversation, dict):
|
||||
warnings.append(InputWarning(str(path), f"skipped conversation at index {index}: expected object"))
|
||||
continue
|
||||
messages, format_hint = conversation_messages(conversation)
|
||||
if not messages:
|
||||
warnings.append(InputWarning(str(path), f"skipped conversation at index {index}: no text messages found"))
|
||||
continue
|
||||
title = conversation_title(conversation, index)
|
||||
source_id = conversation.get("id") or conversation.get("uuid") or conversation.get("conversation_id")
|
||||
text_digest = sha256_text("\n".join(f"{msg['role']}:{msg['text']}" for msg in messages))
|
||||
metadata: dict[str, Any] = {
|
||||
"source_path": str(path),
|
||||
"source_index": index,
|
||||
"source_format": format_hint,
|
||||
"message_count": len(messages),
|
||||
"text_sha256": text_digest,
|
||||
"content_included": False,
|
||||
}
|
||||
if source_id is not None:
|
||||
metadata["source_id"] = str(source_id)
|
||||
for key in ("create_time", "created_at", "update_time", "updated_at"):
|
||||
timestamp = normalize_timestamp(conversation.get(key))
|
||||
if timestamp:
|
||||
metadata[f"source_{key}"] = timestamp
|
||||
item: dict[str, Any] = {
|
||||
"id": stable_id("conversation", source_name, path, source_id or index, text_digest),
|
||||
"kind": "conversation_thread",
|
||||
"title": title,
|
||||
"source": source_name,
|
||||
"metadata": metadata,
|
||||
}
|
||||
if include_content:
|
||||
if len(messages) > max_messages:
|
||||
warnings.append(
|
||||
InputWarning(
|
||||
str(path),
|
||||
f"skipped conversation content at index {index}: over {max_messages} messages",
|
||||
)
|
||||
)
|
||||
else:
|
||||
item["messages"] = messages
|
||||
item["metadata"]["content_included"] = True
|
||||
items.append(item)
|
||||
return items, warnings
|
||||
|
||||
|
||||
def parse_skill_frontmatter(text: str) -> dict[str, Any]:
|
||||
if not text.startswith("---"):
|
||||
return {}
|
||||
end = text.find("\n---", 3)
|
||||
if end < 0:
|
||||
return {}
|
||||
frontmatter: dict[str, Any] = {}
|
||||
for line in text[3:end].strip().splitlines():
|
||||
if not line.strip() or line.lstrip().startswith("#") or ":" not in line:
|
||||
continue
|
||||
key, value = line.split(":", 1)
|
||||
key = key.strip()
|
||||
value = value.strip().strip('"').strip("'")
|
||||
if key:
|
||||
frontmatter[key] = value
|
||||
return frontmatter
|
||||
|
||||
|
||||
def collect_skill_dir(path: Path, source_name: str) -> tuple[list[dict[str, Any]], list[InputWarning]]:
|
||||
warnings: list[InputWarning] = []
|
||||
if path.is_symlink():
|
||||
return [], [InputWarning(str(path), "skills path is a symlink; skipped")]
|
||||
if not path.exists():
|
||||
return [], [InputWarning(str(path), "skills directory does not exist")]
|
||||
if not path.is_dir():
|
||||
return [], [InputWarning(str(path), "skills path is not a directory")]
|
||||
|
||||
items: list[dict[str, Any]] = []
|
||||
for skill_path in sorted(path.rglob("SKILL.md")):
|
||||
if skill_path.is_symlink():
|
||||
warnings.append(InputWarning(str(skill_path), "skipped symlinked skill file"))
|
||||
continue
|
||||
try:
|
||||
text = skill_path.read_text(encoding="utf-8")
|
||||
except Exception as exc:
|
||||
warnings.append(InputWarning(str(skill_path), f"could not read skill: {exc}"))
|
||||
continue
|
||||
frontmatter = parse_skill_frontmatter(text)
|
||||
name = str(frontmatter.get("name") or skill_path.parent.name).strip() or skill_path.parent.name
|
||||
items.append(
|
||||
{
|
||||
"id": stable_id("skill", source_name, skill_path, sha256_text(text)),
|
||||
"kind": "skill",
|
||||
"name": name,
|
||||
"category": str(frontmatter.get("category") or "general"),
|
||||
"source": source_name,
|
||||
"format": "SKILL.md",
|
||||
"content": text,
|
||||
"metadata": {
|
||||
"source_path": str(skill_path),
|
||||
"sha256": sha256_text(text),
|
||||
"frontmatter": frontmatter,
|
||||
},
|
||||
}
|
||||
)
|
||||
return items, warnings
|
||||
|
||||
|
||||
def looks_textual(path: Path) -> bool:
|
||||
if path.suffix.lower() in TEXT_EXTENSIONS:
|
||||
return True
|
||||
guessed, _ = mimetypes.guess_type(str(path))
|
||||
return bool(guessed and (guessed.startswith("text/") or guessed in {"application/json"}))
|
||||
|
||||
|
||||
def iter_archive_dir(path: Path) -> Iterable[Path | InputWarning]:
|
||||
try:
|
||||
children = sorted(path.iterdir())
|
||||
except Exception as exc:
|
||||
yield InputWarning(str(path), f"could not scan archive directory: {exc}")
|
||||
return
|
||||
for child in children:
|
||||
if child.is_symlink():
|
||||
yield InputWarning(str(child), "skipped symlinked archive path")
|
||||
continue
|
||||
if child.is_file():
|
||||
yield child
|
||||
elif child.is_dir():
|
||||
yield from iter_archive_dir(child)
|
||||
|
||||
|
||||
def iter_archive_files(paths: Iterable[Path]) -> Iterable[Path | InputWarning]:
|
||||
for path in paths:
|
||||
if path.is_symlink():
|
||||
yield InputWarning(str(path), "skipped symlinked archive path")
|
||||
continue
|
||||
if path.is_file():
|
||||
yield path
|
||||
elif path.is_dir():
|
||||
yield from iter_archive_dir(path)
|
||||
|
||||
|
||||
def collect_archive_paths(
|
||||
paths: list[Path],
|
||||
source_name: str,
|
||||
*,
|
||||
include_content: bool = False,
|
||||
max_bytes: int = 256_000,
|
||||
) -> tuple[list[dict[str, Any]], list[InputWarning]]:
|
||||
warnings: list[InputWarning] = []
|
||||
items: list[dict[str, Any]] = []
|
||||
existing_paths: list[Path] = []
|
||||
for path in paths:
|
||||
if path.is_symlink():
|
||||
warnings.append(InputWarning(str(path), "archive path is a symlink; skipped"))
|
||||
continue
|
||||
if not path.exists():
|
||||
warnings.append(InputWarning(str(path), "archive path does not exist"))
|
||||
continue
|
||||
if not path.is_file() and not path.is_dir():
|
||||
warnings.append(InputWarning(str(path), "archive path is not a file or directory"))
|
||||
continue
|
||||
existing_paths.append(path)
|
||||
|
||||
for entry in iter_archive_files(existing_paths):
|
||||
if isinstance(entry, InputWarning):
|
||||
warnings.append(entry)
|
||||
continue
|
||||
path = entry
|
||||
if not looks_textual(path):
|
||||
warnings.append(InputWarning(str(path), "skipped non-text archive file"))
|
||||
continue
|
||||
try:
|
||||
st = path.stat()
|
||||
except Exception as exc:
|
||||
warnings.append(InputWarning(str(path), f"could not stat archive file: {exc}"))
|
||||
continue
|
||||
size = st.st_size
|
||||
try:
|
||||
file_hash = sha256_path(path)
|
||||
except Exception as exc:
|
||||
warnings.append(InputWarning(str(path), f"could not hash archive file: {exc}"))
|
||||
continue
|
||||
if include_content and size > max_bytes:
|
||||
warnings.append(InputWarning(str(path), f"skipped archive content over {max_bytes} bytes"))
|
||||
archive_item: dict[str, Any] = {
|
||||
"id": stable_id("archive", source_name, path, file_hash),
|
||||
"kind": "archive_document",
|
||||
"title": path.name,
|
||||
"source": source_name,
|
||||
"metadata": {
|
||||
"source_path": str(path),
|
||||
"size_bytes": size,
|
||||
"sha256": file_hash,
|
||||
},
|
||||
}
|
||||
if include_content and size <= max_bytes:
|
||||
try:
|
||||
archive_item["content"] = path.read_text(encoding="utf-8")
|
||||
except UnicodeDecodeError:
|
||||
archive_item["content"] = path.read_text(encoding="utf-8", errors="replace")
|
||||
archive_item["metadata"]["decoded_with_replacement"] = True
|
||||
items.append(archive_item)
|
||||
return items, warnings
|
||||
|
||||
|
||||
def build_manifest(args) -> dict[str, Any]:
|
||||
warnings: list[InputWarning] = []
|
||||
items: list[dict[str, Any]] = []
|
||||
|
||||
for path in args.memory_json:
|
||||
collected, got_warnings = collect_memory_json(path, args.source_name)
|
||||
items.extend(collected)
|
||||
warnings.extend(got_warnings)
|
||||
|
||||
for path in args.skills_dir:
|
||||
collected, got_warnings = collect_skill_dir(path, args.source_name)
|
||||
items.extend(collected)
|
||||
warnings.extend(got_warnings)
|
||||
|
||||
for path in args.conversation_json:
|
||||
collected, got_warnings = collect_conversation_json(
|
||||
path,
|
||||
args.source_name,
|
||||
include_content=args.include_conversation_content,
|
||||
max_messages=args.max_conversation_messages,
|
||||
)
|
||||
items.extend(collected)
|
||||
warnings.extend(got_warnings)
|
||||
|
||||
if args.archive:
|
||||
collected, got_warnings = collect_archive_paths(
|
||||
args.archive,
|
||||
args.source_name,
|
||||
include_content=args.include_archive_content,
|
||||
max_bytes=args.max_archive_bytes,
|
||||
)
|
||||
items.extend(collected)
|
||||
warnings.extend(got_warnings)
|
||||
|
||||
counts: dict[str, int] = {}
|
||||
for item in items:
|
||||
counts[item["kind"]] = counts.get(item["kind"], 0) + 1
|
||||
|
||||
return {
|
||||
"schema_version": SCHEMA_VERSION,
|
||||
"generated_at": utc_now_iso(),
|
||||
"source": {
|
||||
"name": args.source_name,
|
||||
"kind": args.source_kind,
|
||||
},
|
||||
"summary": {
|
||||
"item_count": len(items),
|
||||
"counts_by_kind": counts,
|
||||
"warning_count": len(warnings),
|
||||
},
|
||||
"items": items,
|
||||
"warnings": [{"path": warning.path, "message": warning.message} for warning in warnings],
|
||||
}
|
||||
|
||||
|
||||
def parse_args(argv: list[str] | None = None):
|
||||
parser = argparse.ArgumentParser(description="Build a neutral Odysseus agent migration manifest.")
|
||||
parser.add_argument("--source-name", default="agent-export", help="Human-readable source name.")
|
||||
parser.add_argument("--source-kind", default="generic", help="Source adapter kind, e.g. generic, openclaw, hermes.")
|
||||
parser.add_argument(
|
||||
"--memory-json",
|
||||
action="append",
|
||||
type=Path,
|
||||
default=[],
|
||||
help="JSON memory export. May be a list, or an object containing memories/items/data.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--skills-dir",
|
||||
action="append",
|
||||
type=Path,
|
||||
default=[],
|
||||
help="Directory containing SKILL.md files. Scanned recursively.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--archive",
|
||||
action="append",
|
||||
type=Path,
|
||||
default=[],
|
||||
help="Text/Markdown/JSON file or directory to preserve as archive documents.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--conversation-json",
|
||||
action="append",
|
||||
type=Path,
|
||||
default=[],
|
||||
help="Conversation export JSON. Supports generic message lists and ChatGPT-style conversations.json.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--include-archive-content",
|
||||
action="store_true",
|
||||
help="Embed archive document content in the manifest. By default only metadata is included.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-archive-bytes",
|
||||
type=int,
|
||||
default=256_000,
|
||||
help="Maximum bytes to embed per archive file when --include-archive-content is used.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--include-conversation-content",
|
||||
action="store_true",
|
||||
help="Embed normalized conversation messages. By default only thread metadata is included.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-conversation-messages",
|
||||
type=int,
|
||||
default=2000,
|
||||
help="Maximum messages to embed per conversation when --include-conversation-content is used.",
|
||||
)
|
||||
parser.add_argument("--output", type=Path, help="Write manifest JSON to this path instead of stdout.")
|
||||
parser.add_argument("--compact", action="store_true", help="Write compact JSON without indentation.")
|
||||
return parser.parse_args(argv)
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
args = parse_args(argv)
|
||||
manifest = build_manifest(args)
|
||||
text = json.dumps(manifest, ensure_ascii=False, sort_keys=True, separators=(",", ":")) if args.compact else (
|
||||
json.dumps(manifest, ensure_ascii=False, indent=2, sort_keys=True) + "\n"
|
||||
)
|
||||
if args.output:
|
||||
args.output.parent.mkdir(parents=True, exist_ok=True)
|
||||
args.output.write_text(text, encoding="utf-8")
|
||||
else:
|
||||
sys.stdout.write(text)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -611,6 +611,93 @@ def _cache_key(host: str, ssh_port: str, platform_name: str):
|
||||
)
|
||||
|
||||
|
||||
def _is_containerized():
|
||||
"""Best-effort check for whether the local Odysseus process is running in a container."""
|
||||
if _remote_host:
|
||||
return False
|
||||
|
||||
if os.path.exists("/.dockerenv"):
|
||||
return True
|
||||
|
||||
try:
|
||||
with open("/proc/1/cgroup", encoding="utf-8", errors="replace") as f:
|
||||
text = f.read().lower()
|
||||
return any(marker in text for marker in ("docker", "containerd", "kubepods"))
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _hardware_visibility_warning(result):
|
||||
"""Return a non-blocking UX warning when detected hardware may only be container-visible."""
|
||||
if not isinstance(result, dict):
|
||||
return None
|
||||
|
||||
if result.get("manual_hardware"):
|
||||
return None
|
||||
|
||||
if not result.get("containerized"):
|
||||
return None
|
||||
|
||||
if result.get("gpu_error"):
|
||||
return None
|
||||
|
||||
if not result.get("has_gpu"):
|
||||
return {
|
||||
"code": "container_no_gpu_visible",
|
||||
"severity": "warning",
|
||||
"title": "No GPU visible inside Docker",
|
||||
"message": (
|
||||
"Cookbook is scanning hardware from inside the Odysseus container. "
|
||||
"If your host has a GPU, Docker may not be exposing it to the container, "
|
||||
"so model recommendations may be CPU-only or too conservative."
|
||||
),
|
||||
"actions": [
|
||||
"manual_hardware",
|
||||
"rescan",
|
||||
"copy_diagnostics",
|
||||
],
|
||||
}
|
||||
|
||||
total_ram = result.get("total_ram_gb") or 0
|
||||
if total_ram and total_ram <= 8:
|
||||
return {
|
||||
"code": "container_low_ram_visible",
|
||||
"severity": "info",
|
||||
"title": "Container-visible RAM may be lower than host RAM",
|
||||
"message": (
|
||||
"Cookbook is seeing the RAM available inside the container. "
|
||||
"If your host has more memory, validate host RAM separately or use Manual Hardware."
|
||||
),
|
||||
"actions": [
|
||||
"manual_hardware",
|
||||
"rescan",
|
||||
"copy_diagnostics",
|
||||
],
|
||||
}
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _attach_probe_context(result, host=""):
|
||||
"""Attach probe-scope metadata and optional hardware visibility warning."""
|
||||
if not isinstance(result, dict) or result.get("error"):
|
||||
return result
|
||||
|
||||
is_remote = bool(host)
|
||||
containerized = False if is_remote else _is_containerized()
|
||||
|
||||
result["probe_scope"] = "remote" if is_remote else ("container" if containerized else "native")
|
||||
result["containerized"] = containerized
|
||||
|
||||
warning = _hardware_visibility_warning(result)
|
||||
if warning:
|
||||
result["hardware_visibility_warning"] = warning
|
||||
else:
|
||||
result.pop("hardware_visibility_warning", None)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def detect_system(host="", ssh_port="", platform="", fresh=False):
|
||||
"""Detect system hardware: RAM, CPU, GPU. Cached per host (hardware rarely
|
||||
changes, and probing a remote host over SSH is slow). Pass fresh=True to
|
||||
@@ -635,6 +722,7 @@ def detect_system(host="", ssh_port="", platform="", fresh=False):
|
||||
if _remote_platform == "windows" and _remote_host:
|
||||
result = _detect_windows()
|
||||
if result:
|
||||
result = _attach_probe_context(result, host=host)
|
||||
_remote_host = None
|
||||
_remote_platform = None
|
||||
_cache_by_host[cache_key] = (now, result)
|
||||
@@ -653,6 +741,7 @@ def detect_system(host="", ssh_port="", platform="", fresh=False):
|
||||
if not _remote_host and os.name == "nt":
|
||||
result = _detect_windows()
|
||||
if result:
|
||||
result = _attach_probe_context(result, host=host)
|
||||
_cache_by_host[cache_key] = (now, result)
|
||||
return result
|
||||
# PowerShell probe failed entirely — fall through to the generic path
|
||||
@@ -714,6 +803,7 @@ def detect_system(host="", ssh_port="", platform="", fresh=False):
|
||||
"gpu_error": _last_gpu_error,
|
||||
}
|
||||
|
||||
result = _attach_probe_context(result, host=host)
|
||||
_remote_host = None
|
||||
_remote_platform = None
|
||||
_cache_by_host[cache_key] = (now, result)
|
||||
|
||||
@@ -188,12 +188,18 @@ def compute_serve_profiles(system, model, serve_weights_gb=None, serve_quant=Non
|
||||
# Shrink context if even the chosen KV won't fit alongside weights.
|
||||
# Start from the smaller of the profile's target and the model's limit.
|
||||
cur_ctx = min(ctx, model_ctx_max)
|
||||
while cur_ctx >= 8192:
|
||||
# Floor the context-shrink loop at 8192, but never above the model's own
|
||||
# trained limit. A model with a sub-8192 context (e.g. a 2048-token
|
||||
# SmolLM) starts below 8192, so a hard-coded 8192 guard skipped the loop
|
||||
# entirely and produced NO profile — the serve UI then fell back to
|
||||
# manual flags even though the model fits the GPU trivially.
|
||||
ctx_floor = min(8192, model_ctx_max)
|
||||
while cur_ctx >= ctx_floor:
|
||||
kv = _kv_gb(model, cur_ctx, kv_type)
|
||||
n_cpu_moe, fits = _cpu_moe_for_budget(model, quant, kv, budget, fixed_gb=serve_weights_gb)
|
||||
est = _weights_gb(model, quant, serve_weights_gb) + kv + 0.6
|
||||
# If a non-MoE model can't fit even fully offloaded, try less context.
|
||||
if model.get("is_moe") or fits or cur_ctx <= 8192:
|
||||
if model.get("is_moe") or fits or cur_ctx <= ctx_floor:
|
||||
profiles.append({
|
||||
"key": key,
|
||||
"label": label,
|
||||
|
||||
@@ -66,41 +66,57 @@ def _has_duplicate_title(skills, title: str) -> bool:
|
||||
def _extract_json_object(text: str) -> Optional[dict]:
|
||||
"""Best-effort extraction of a JSON object from an LLM response.
|
||||
|
||||
The response may be wrapped in code fences or surrounded by prose, and some
|
||||
models emit a stray brace in the prose before the real object
|
||||
(e.g. "uses {placeholder} then {...}"). Slicing first-'{' .. last-'}' then
|
||||
grabs an unparseable span and the skill is silently lost. Try the whole
|
||||
string first, then each '{' start position in turn, returning the first
|
||||
candidate that parses to a JSON object (dict). Returns None if none do.
|
||||
The response may be wrapped in code fences or surrounded by prose. Uses
|
||||
json.JSONDecoder().raw_decode() to locate the boundaries of complete JSON
|
||||
objects starting at each '{' position. Nested objects are filtered out to
|
||||
keep only top-level candidates. If multiple non-overlapping valid JSON
|
||||
objects are found, it is treated as ambiguous and returns None. Otherwise,
|
||||
returns the single valid candidate dictionary.
|
||||
"""
|
||||
if not text:
|
||||
return None
|
||||
s = text.strip()
|
||||
if s.startswith("```"):
|
||||
s = s.split("\n", 1)[-1].rsplit("```", 1)[0].strip()
|
||||
end = s.rfind("}")
|
||||
if end == -1:
|
||||
|
||||
decoder = json.JSONDecoder()
|
||||
candidates = []
|
||||
|
||||
start = s.find("{")
|
||||
while start != -1:
|
||||
try:
|
||||
obj, idx = decoder.raw_decode(s[start:])
|
||||
end_pos = start + idx
|
||||
if isinstance(obj, dict):
|
||||
candidates.append((start, end_pos, obj))
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
pass
|
||||
start = s.find("{", start + 1)
|
||||
|
||||
# Filter out nested candidates to identify top-level dictionaries
|
||||
top_level = []
|
||||
for c in candidates:
|
||||
is_nested = False
|
||||
for other in candidates:
|
||||
if other == c:
|
||||
continue
|
||||
if other[0] <= c[0] and c[1] <= other[1]:
|
||||
is_nested = True
|
||||
break
|
||||
if not is_nested:
|
||||
top_level.append(c)
|
||||
|
||||
if not top_level:
|
||||
return None
|
||||
|
||||
def _as_dict(candidate):
|
||||
try:
|
||||
obj = json.loads(candidate)
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
return None
|
||||
return obj if isinstance(obj, dict) else None
|
||||
if len(top_level) > 1:
|
||||
logger.debug(
|
||||
"[skill-extract] Found multiple non-overlapping JSON objects: %s",
|
||||
[item[2].get("title") for item in top_level]
|
||||
)
|
||||
return None
|
||||
|
||||
# The clean, common case: the whole (de-fenced) string is the object.
|
||||
obj = _as_dict(s)
|
||||
if obj is not None:
|
||||
return obj
|
||||
# Otherwise scan each '{' candidate up to the last '}'.
|
||||
start = s.find("{")
|
||||
while 0 <= start < end:
|
||||
obj = _as_dict(s[start : end + 1])
|
||||
if obj is not None:
|
||||
return obj
|
||||
start = s.find("{", start + 1)
|
||||
return None
|
||||
return top_level[0][2]
|
||||
|
||||
|
||||
async def maybe_extract_skill(
|
||||
|
||||
@@ -603,7 +603,6 @@ class SkillsManager:
|
||||
escalation) — those are work-in-progress and pollute the
|
||||
prompt with half-finished procedures.
|
||||
"""
|
||||
active_toolsets = active_toolsets or []
|
||||
out = []
|
||||
for s in self.load(owner=owner):
|
||||
status = s.get("status")
|
||||
@@ -617,13 +616,16 @@ class SkillsManager:
|
||||
# Platform gating
|
||||
if platform and s.get("platforms") and platform not in s["platforms"]:
|
||||
continue
|
||||
# requires_toolsets: hide unless every required toolset is active
|
||||
# requires_toolsets: hide unless every required toolset is active.
|
||||
# active_toolsets=None means the caller doesn't know the active
|
||||
# set (API listings, chat preface) — don't gate in that case;
|
||||
# only an explicit list filters.
|
||||
req = s.get("requires_toolsets") or []
|
||||
if req and not all(t in active_toolsets for t in req):
|
||||
if req and active_toolsets is not None and not all(t in active_toolsets for t in req):
|
||||
continue
|
||||
# fallback_for_toolsets: hide when any of those toolsets is active
|
||||
fb = s.get("fallback_for_toolsets") or []
|
||||
if fb and any(t in active_toolsets for t in fb):
|
||||
if fb and active_toolsets and any(t in active_toolsets for t in fb):
|
||||
continue
|
||||
out.append({
|
||||
"name": s["name"],
|
||||
|
||||
@@ -285,6 +285,7 @@ class ResearchHandler:
|
||||
query, report, stats, elapsed,
|
||||
findings=researcher.findings,
|
||||
evolving_report=researcher.evolving_report,
|
||||
analyzed_urls=getattr(researcher, "analyzed_urls", None),
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
@@ -331,7 +332,8 @@ class ResearchHandler:
|
||||
|
||||
def _format_research_report(
|
||||
self, query: str, full_report: str, stats: dict, elapsed: float,
|
||||
findings: list = None, evolving_report: str = None,
|
||||
findings: Optional[list] = None, evolving_report: Optional[str] = None,
|
||||
analyzed_urls: Optional[list] = None,
|
||||
) -> str:
|
||||
"""Format research report with sources list and expandable raw findings."""
|
||||
summary_lines = [
|
||||
@@ -342,20 +344,34 @@ class ResearchHandler:
|
||||
]
|
||||
summary_text = " | ".join(summary_lines)
|
||||
|
||||
# Build sources list with clickable links
|
||||
# Build sources list with clickable links. Keep the curated Sources
|
||||
# section filtered for citation quality, but also list every unique URL
|
||||
# the research run inspected so the "URLs Analyzed" count is auditable.
|
||||
sources_section = ""
|
||||
if findings:
|
||||
analyzed_urls_section = ""
|
||||
url_items = analyzed_urls if analyzed_urls is not None else findings
|
||||
if findings or url_items:
|
||||
seen_urls = set()
|
||||
source_lines = []
|
||||
for f in findings:
|
||||
analyzed_seen = set()
|
||||
analyzed_lines = []
|
||||
for f in findings or []:
|
||||
url = f.get("url", "")
|
||||
title = f.get("title", "") or url
|
||||
summary = f.get("summary", "") or f.get("evidence", "")
|
||||
if url and url not in seen_urls and not is_low_quality(summary):
|
||||
seen_urls.add(url)
|
||||
source_lines.append(f"- [{title}]({url})")
|
||||
for item in url_items or []:
|
||||
url = item.get("url", "")
|
||||
title = item.get("title", "") or url
|
||||
if url and url not in analyzed_seen:
|
||||
analyzed_seen.add(url)
|
||||
analyzed_lines.append(f"{len(analyzed_lines) + 1}. [{title}]({url})")
|
||||
if source_lines:
|
||||
sources_section = "\n### Sources\n\n" + "\n".join(source_lines) + "\n"
|
||||
if analyzed_lines:
|
||||
analyzed_urls_section = "\n### Analyzed URLs\n\n" + "\n".join(analyzed_lines) + "\n"
|
||||
|
||||
# Build raw findings section (individual extractions per source)
|
||||
raw_findings_section = ""
|
||||
@@ -391,6 +407,7 @@ class ResearchHandler:
|
||||
{full_report}
|
||||
|
||||
{sources_section}
|
||||
{analyzed_urls_section}
|
||||
{collected_section}
|
||||
---
|
||||
|
||||
|
||||
@@ -299,6 +299,40 @@ def fetch_webpage_content(url: str, timeout: int = 5, retry_attempt: int = 0) ->
|
||||
_cache_result(cache_file, cache_key, result, url)
|
||||
return result
|
||||
|
||||
# Plain-text / Markdown / JSON handling. Sources like
|
||||
# raw.githubusercontent.com serve Markdown as `text/plain`, JSON APIs and
|
||||
# raw config files serve `application/json`, and a lot of code and tool
|
||||
# docs live in `.md` / `.txt`. These have no HTML structure, so the HTML
|
||||
# branch below would extract nothing and report "no readable text content".
|
||||
# Return the body verbatim instead. The `is_html` guard keeps real HTML
|
||||
# (including `application/xhtml+xml`) on the parsing path; the `json` check
|
||||
# covers `application/json` and `+json` suffixes; the URL-suffix fallback
|
||||
# catches servers that mislabel text files as `application/octet-stream`.
|
||||
is_html = "html" in content_type
|
||||
is_json = "json" in content_type
|
||||
url_path = url.lower().split("?", 1)[0].split("#", 1)[0]
|
||||
looks_like_text_file = url_path.endswith(
|
||||
(".md", ".markdown", ".txt", ".text", ".json", ".jsonl")
|
||||
)
|
||||
if not is_html and (content_type.startswith("text/") or is_json or looks_like_text_file):
|
||||
text_body = (response.text or "").strip()
|
||||
result = {
|
||||
"url": url,
|
||||
"title": os.path.basename(url_path) or url,
|
||||
"content": text_body,
|
||||
"lists": [],
|
||||
"tables": [],
|
||||
"code_blocks": [],
|
||||
"meta_description": "",
|
||||
"meta_keywords": "",
|
||||
"js_rendered": False,
|
||||
"js_message": "",
|
||||
"success": bool(text_body),
|
||||
"error": "" if text_body else "Empty response body",
|
||||
}
|
||||
_cache_result(cache_file, cache_key, result, url)
|
||||
return result
|
||||
|
||||
# HTML handling
|
||||
try:
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
@@ -134,9 +134,10 @@ _NEWS_HINTS = ("news", "nyheter", "headlines", "breaking", "latest", "today", "i
|
||||
_GENERAL_ENGINES = os.environ.get("SEARXNG_GENERAL_ENGINES", "bing,mojeek,presearch")
|
||||
|
||||
|
||||
def searxng_search_api(query: str, count: int = 10, categories: str = "general",
|
||||
def searxng_search_api(query: str, count: Optional[int] = None, categories: str = "general",
|
||||
time_filter: Optional[str] = None) -> List[dict]:
|
||||
"""Search using SearXNG JSON API. Returns list of {title, url, snippet}."""
|
||||
count = count if count is not None else _get_result_count()
|
||||
instance = _get_search_instance()
|
||||
api_key = ""
|
||||
headers = {"User-Agent": "Mozilla/5.0"}
|
||||
@@ -282,8 +283,9 @@ def searxng_search(query, max_results=10):
|
||||
|
||||
# ── Brave ──
|
||||
|
||||
def brave_search(query: str, count: int = 10, time_filter: Optional[str] = None) -> List[dict]:
|
||||
def brave_search(query: str, count: Optional[int] = None, time_filter: Optional[str] = None) -> List[dict]:
|
||||
"""Search using Brave API with key from admin settings or env var."""
|
||||
count = count if count is not None else _get_result_count()
|
||||
api_key = _get_provider_key("brave") or os.environ.get("DATA_BRAVE_API_KEY") or ""
|
||||
return _brave_search_impl(query, count, time_filter, search_config={"brave_api_key": api_key})
|
||||
|
||||
@@ -381,9 +383,9 @@ def _resolve_ddg_redirect(raw: str) -> str:
|
||||
return resolved
|
||||
|
||||
|
||||
def duckduckgo_search(query: str, count: int = 10, time_filter: Optional[str] = None) -> List[dict]:
|
||||
def duckduckgo_search(query: str, count: Optional[int] = None, time_filter: Optional[str] = None) -> List[dict]:
|
||||
"""Search using DuckDuckGo via the duckduckgo-search library. No API key needed."""
|
||||
|
||||
count = count if count is not None else _get_result_count()
|
||||
def _html_fallback() -> List[dict]:
|
||||
try:
|
||||
response = httpx.get(
|
||||
@@ -415,7 +417,7 @@ def duckduckgo_search(query: str, count: int = 10, time_filter: Optional[str] =
|
||||
return []
|
||||
|
||||
try:
|
||||
from duckduckgo_search import DDGS
|
||||
from ddgs import DDGS
|
||||
except ImportError:
|
||||
logger.warning("duckduckgo-search package not installed; using HTML fallback")
|
||||
return _html_fallback()
|
||||
@@ -452,7 +454,7 @@ def duckduckgo_search(query: str, count: int = 10, time_filter: Optional[str] =
|
||||
|
||||
# ── Google Programmable Search Engine ──
|
||||
|
||||
def google_pse_search(query: str, count: int = 10, time_filter: Optional[str] = None) -> List[dict]:
|
||||
def google_pse_search(query: str, count: Optional[int] = None, time_filter: Optional[str] = None) -> List[dict]:
|
||||
"""Search using Google PSE (Custom Search JSON API).
|
||||
|
||||
Requires two keys in settings:
|
||||
@@ -460,6 +462,7 @@ def google_pse_search(query: str, count: int = 10, time_filter: Optional[str] =
|
||||
- google_pse_cx: Programmable Search Engine ID (cx)
|
||||
Or env vars GOOGLE_API_KEY and GOOGLE_PSE_CX.
|
||||
"""
|
||||
count = count if count is not None else _get_result_count()
|
||||
settings = _get_search_settings()
|
||||
api_key = _get_provider_key("google_pse") or os.environ.get("GOOGLE_API_KEY", "")
|
||||
cx = (settings.get("google_pse_cx") or "").strip() or os.environ.get("GOOGLE_PSE_CX", "")
|
||||
@@ -522,8 +525,9 @@ def google_pse_search(query: str, count: int = 10, time_filter: Optional[str] =
|
||||
|
||||
# ── Tavily ──
|
||||
|
||||
def tavily_search(query: str, count: int = 10, time_filter: Optional[str] = None) -> List[dict]:
|
||||
def tavily_search(query: str, count: Optional[int] = None, time_filter: Optional[str] = None) -> List[dict]:
|
||||
"""Search using Tavily API. Requires search_api_key or TAVILY_API_KEY env var."""
|
||||
count = count if count is not None else _get_result_count()
|
||||
api_key = _get_provider_key("tavily") or os.environ.get("TAVILY_API_KEY", "")
|
||||
if not api_key:
|
||||
logger.warning("Tavily: no API key configured")
|
||||
@@ -580,8 +584,9 @@ def tavily_search(query: str, count: int = 10, time_filter: Optional[str] = None
|
||||
|
||||
# ── Serper.dev ──
|
||||
|
||||
def serper_search(query: str, count: int = 10, time_filter: Optional[str] = None) -> List[dict]:
|
||||
def serper_search(query: str, count: Optional[int] = None, time_filter: Optional[str] = None) -> List[dict]:
|
||||
"""Search using Serper.dev API. Requires search_api_key or SERPER_API_KEY env var."""
|
||||
count = count if count is not None else _get_result_count()
|
||||
api_key = _get_provider_key("serper") or os.environ.get("SERPER_API_KEY", "")
|
||||
if not api_key:
|
||||
logger.warning("Serper: no API key configured")
|
||||
|
||||
@@ -64,20 +64,40 @@ def is_youtube_url(url: str) -> bool:
|
||||
return "youtube.com" in url or "youtu.be" in url
|
||||
|
||||
|
||||
# youtube.com-shaped hosts. music.youtube.com serves the same /watch and
|
||||
# /shorts paths, so links shared from YouTube Music must resolve too.
|
||||
_YT_HOSTS = ("www.youtube.com", "youtube.com", "m.youtube.com", "music.youtube.com")
|
||||
# Path prefixes whose first following segment is the video id. Covers the
|
||||
# /embed/ player, Shorts (/shorts/), live streams (/live/), and the legacy
|
||||
# /v/ embed — all of which `is_youtube_url` already treats as YouTube, so
|
||||
# they must be extractable or the link is silently dropped (neither web-fetched
|
||||
# nor transcript-fetched) by the chat pipeline.
|
||||
_YT_PATH_PREFIXES = ("/embed/", "/shorts/", "/live/", "/v/")
|
||||
|
||||
|
||||
def extract_youtube_id(url: str) -> Optional[str]:
|
||||
"""Extract YouTube video ID from various URL formats."""
|
||||
"""Extract a YouTube video ID from the common URL shapes:
|
||||
watch?v=, youtu.be/<id>, /embed/<id>, /shorts/<id>, /live/<id>, /v/<id>,
|
||||
across youtube.com / m.youtube.com / music.youtube.com / youtu.be."""
|
||||
if not isinstance(url, str):
|
||||
return None
|
||||
parsed = urllib.parse.urlparse(url)
|
||||
if parsed.hostname in ("www.youtube.com", "youtube.com", "m.youtube.com"):
|
||||
host = (parsed.hostname or "").lower()
|
||||
if host in _YT_HOSTS:
|
||||
if parsed.path == "/watch":
|
||||
params = urllib.parse.parse_qs(parsed.query)
|
||||
if "v" in params:
|
||||
if params.get("v"):
|
||||
return params["v"][0]
|
||||
elif parsed.path.startswith("/embed/"):
|
||||
return parsed.path.split("/")[-1]
|
||||
elif parsed.hostname == "youtu.be":
|
||||
return parsed.path[1:]
|
||||
else:
|
||||
for prefix in _YT_PATH_PREFIXES:
|
||||
if parsed.path.startswith(prefix):
|
||||
vid = parsed.path[len(prefix):].split("/")[0]
|
||||
if vid:
|
||||
return vid
|
||||
elif host == "youtu.be":
|
||||
vid = parsed.path.lstrip("/").split("/")[0]
|
||||
if vid:
|
||||
return vid
|
||||
return None
|
||||
|
||||
|
||||
@@ -170,6 +190,8 @@ def format_transcript_for_context(
|
||||
if segments:
|
||||
ctx += "Timestamped Transcript:\n"
|
||||
for seg in segments:
|
||||
if not isinstance(seg, dict):
|
||||
continue
|
||||
ctx += f"[{seg['timestamp']}] {seg['text']}\n"
|
||||
# Check length — fall back to plain text if too long
|
||||
if len(ctx) > 12000:
|
||||
@@ -202,15 +224,24 @@ async def fetch_youtube_comments(
|
||||
f"https://www.youtube.com/watch?v={video_id}",
|
||||
]
|
||||
|
||||
proc = await asyncio.wait_for(
|
||||
asyncio.create_subprocess_exec(
|
||||
*cmd,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
),
|
||||
timeout=timeout,
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*cmd,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
stdout, stderr = await proc.communicate()
|
||||
# Bound the wait on the process actually finishing, not on spawning it.
|
||||
# create_subprocess_exec returns as soon as the child starts, so wrapping
|
||||
# it in wait_for never enforces the timeout — proc.communicate() is the
|
||||
# blocking step. Kill and reap the child if it overruns so it does not
|
||||
# linger after we return.
|
||||
try:
|
||||
stdout, stderr = await asyncio.wait_for(
|
||||
proc.communicate(), timeout=timeout
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
proc.kill()
|
||||
await proc.wait()
|
||||
raise
|
||||
|
||||
if proc.returncode != 0:
|
||||
return {"success": False, "error": f"yt-dlp failed: {stderr.decode()[:200]}", "comments": []}
|
||||
|
||||
@@ -91,6 +91,9 @@ _ROUTING_PATTERNS: tuple[tuple[str, str, Pattern[str]], ...] = tuple(
|
||||
("ui", "tool or feature toggle request", r"\b(?:disable|enable|turn\s+(?:on|off))\s+(?:the\s+)?(?:shell|search|web|browser|documents?|memory|skills|images?|calendar|email|mail|research|incognito)\b"),
|
||||
|
||||
# Deep research jobs, not quick conceptual mentions of research.
|
||||
("web", "explicit web search request", rf"{_PLEASE}(?:do|run|use|perform|make)\s+(?:a\s+)?(?:web\s+search|search\s+the\s+web)\b.+"),
|
||||
("web", "web lookup imperative request", rf"{_PLEASE}(?:web\s+search|search\s+the\s+web|search\s+online|look\s+up|google)\b.+"),
|
||||
("web", "assistant web lookup request", rf"{_ACTION_QUESTION}(?:web\s+search|search\s+the\s+web|search\s+online|look\s+up|google)\b.+"),
|
||||
("research", "deep research imperative request", rf"{_PLEASE}(?:research|deep\s+dive|look\s+into|investigate)\s+.+"),
|
||||
("research", "assistant deep research request", rf"{_ACTION_QUESTION}(?:research|do\s+research|deep\s+dive|look\s+into|investigate)\s+.+"),
|
||||
|
||||
|
||||
+179
-53
@@ -21,7 +21,7 @@ from src.settings import get_setting
|
||||
from src.prompt_security import untrusted_context_message
|
||||
from src.tool_security import blocked_tools_for_owner, plan_mode_disabled_tools
|
||||
from src.tool_policy import GUIDE_ONLY_DIRECTIVE, ToolPolicy
|
||||
from src.tool_utils import get_mcp_manager
|
||||
from src.tool_utils import _truncate, get_mcp_manager
|
||||
from src.agent_tools import (
|
||||
parse_tool_blocks,
|
||||
strip_tool_blocks,
|
||||
@@ -262,6 +262,11 @@ _DOMAIN_RULES = {
|
||||
- Use `manage_settings` for preferences and tool enable/disable.
|
||||
- Use named tools over `app_api` when a named wrapper exists.
|
||||
- `app_api` is only for safe UI/API actions without a named tool; do not use it for shell, package installs, engine rebuilds, or sensitive auth/admin paths.""",
|
||||
"contacts": """\
|
||||
## Contacts rules
|
||||
- Use `resolve_contact` to look up a contact's email or phone number by name. Searches the CardDAV address book and sent email history.
|
||||
- Use `manage_contact` to list, add, update, or delete contacts in the address book.
|
||||
- Do NOT use `manage_memory` for contact lookups — contact details live in the address book, not memory.""",
|
||||
}
|
||||
|
||||
_DOMAIN_TOOL_MAP = {
|
||||
@@ -272,8 +277,9 @@ _DOMAIN_TOOL_MAP = {
|
||||
"notes_calendar_tasks": {"manage_notes", "manage_calendar", "manage_tasks"},
|
||||
"ui": {"ui_control"},
|
||||
"sessions": {"create_session", "list_sessions", "manage_session", "send_to_session", "search_chats"},
|
||||
"files": {"bash", "python", "read_file", "write_file", "edit_file", "grep", "glob", "ls"},
|
||||
"files": {"bash", "python", "read_file", "write_file", "edit_file", "grep", "glob", "ls", "get_workspace"},
|
||||
"settings": {"manage_settings", "manage_endpoints", "manage_mcp", "manage_webhooks", "manage_tokens", "app_api"},
|
||||
"contacts": {"resolve_contact", "manage_contact"},
|
||||
}
|
||||
|
||||
def _domain_rules_for_tools(tool_names: set) -> list[str]:
|
||||
@@ -309,6 +315,7 @@ NEVER pipe multi-line Python through `python -c "..."` — shell quoting eats re
|
||||
<python code>
|
||||
```
|
||||
Execute Python code. Use for computation, data processing, scripting. NOT for writing code for the user (use create_document for that). Same sandbox limits as bash — no TTY, no GUI, no `input()`; for anything the user should interact with, generate a single HTML file with inline JS instead.
|
||||
Prefer a dedicated tool whenever one fits the job (reading, searching, or writing files); use python only for computation/processing no dedicated tool covers - not for reading or writing files.
|
||||
Do NOT use Python/requests for web lookup/search/latest/current requests when `web_search` or `web_fetch` is available.""",
|
||||
|
||||
"web_search": """\
|
||||
@@ -347,6 +354,11 @@ Write content to a file. First line is the path, rest is the content.""",
|
||||
```
|
||||
Edit an EXISTING file by exact string replacement. PREFER this over bash (sed/echo/redirects) for changing files — it shows a before/after diff. `old_string` must match the file exactly and be unique unless `replace_all` is true. Use write_file to create a new file.""",
|
||||
|
||||
"get_workspace": """\
|
||||
```get_workspace
|
||||
```
|
||||
Return the absolute path of the active workspace folder. File tools are CONFINED to it (paths can be RELATIVE to it); the shell starts there (cwd) but is NOT sandboxed. Call this first when the user says "the project"/"the code"/"this folder" without a path, instead of asking them. No arguments.""",
|
||||
|
||||
"create_document": """\
|
||||
```create_document
|
||||
<title>
|
||||
@@ -598,7 +610,7 @@ _API_HOSTS = frozenset([
|
||||
"api.deepseek.com", "deepseek.com",
|
||||
"api.together.xyz", "api.fireworks.ai",
|
||||
"api.perplexity.ai", "api.x.ai",
|
||||
"ollama.com", "api.venice.ai",
|
||||
"ollama.com", "api.venice.ai", "api.kimi.com",
|
||||
"api.githubcopilot.com",
|
||||
# Local OpenAI-compatible endpoints (llama.cpp, vLLM, LM Studio, etc.).
|
||||
# Without these, `_is_api_model` falls back to keyword sniffing on the
|
||||
@@ -785,6 +797,12 @@ def _classify_agent_request(messages: List[Dict], last_user: str) -> Dict[str, o
|
||||
domains.add("documents")
|
||||
if has(r"\b(search|web|google|look up|latest|news|current|weather|forecast|stock price|price of|website|url|https?://|www\.)\b"):
|
||||
domains.add("web")
|
||||
if has(
|
||||
r"\b(wyszukaj|wyszukać|wyszukac)\b.*\b(internet|internecie|online|web)\b",
|
||||
r"\b(sprawd[zź]|znajd[zź])\b.*\b(internet|internecie|online|web)\b",
|
||||
r"\b(aktualn\w*|bieżąc\w*|biezac\w*|dzisiaj|teraz)\b.*\b(pogod\w*|temperatur\w*)\b",
|
||||
):
|
||||
domains.add("web")
|
||||
if has(r"\b(research|deep dive|investigate|look into)\b"):
|
||||
domains.add("web")
|
||||
if has(r"\b(open|show|toggle|turn on|turn off|disable|enable|switch model|change model|settings|theme|panel)\b"):
|
||||
@@ -795,6 +813,8 @@ def _classify_agent_request(messages: List[Dict], last_user: str) -> Dict[str, o
|
||||
domains.add("files")
|
||||
if has(r"\b(endpoint|api token|mcp|webhook|preference|configure|config|setting)\b"):
|
||||
domains.add("settings")
|
||||
if has(r"\b(contact|contacts|phone|phone number|address book|vcard)\b"):
|
||||
domains.add("contacts")
|
||||
|
||||
low_signal = not continuation and not domains
|
||||
return {
|
||||
@@ -860,7 +880,7 @@ def _build_system_prompt(
|
||||
_ov_sig = _hl.sha256(_json.dumps(get_builtin_overrides() or {}, sort_keys=True).encode()).hexdigest()
|
||||
except Exception:
|
||||
_ov_sig = ""
|
||||
cache_key = (frozenset(disabled_tools or []), bool(mcp_mgr), needs_admin, _rt_key, compact, _ov_sig, suppress_local_context)
|
||||
cache_key = (frozenset(disabled_tools or []), bool(mcp_mgr), needs_admin, _rt_key, compact, _ov_sig, owner, suppress_local_context)
|
||||
if _cached_base_prompt and _cached_base_prompt_key == cache_key and not active_document:
|
||||
agent_prompt = _cached_base_prompt
|
||||
# Skill index is user-editable (name + description), so it must never
|
||||
@@ -868,7 +888,7 @@ def _build_system_prompt(
|
||||
# when the cache hits.
|
||||
_, _skill_index_block = _build_base_prompt(
|
||||
disabled_tools, mcp_mgr, needs_admin, relevant_tools,
|
||||
mcp_disabled_map=mcp_disabled_map, compact=compact,
|
||||
mcp_disabled_map=mcp_disabled_map, compact=compact, owner=owner,
|
||||
suppress_local_context=suppress_local_context,
|
||||
)
|
||||
else:
|
||||
@@ -879,6 +899,7 @@ def _build_system_prompt(
|
||||
relevant_tools,
|
||||
mcp_disabled_map=mcp_disabled_map,
|
||||
compact=compact,
|
||||
owner=owner,
|
||||
suppress_local_context=suppress_local_context,
|
||||
)
|
||||
if not active_document:
|
||||
@@ -894,9 +915,20 @@ def _build_system_prompt(
|
||||
|
||||
# Current date/time for every agent request. This is user-local when the
|
||||
# browser provided timezone headers, with a server-local fallback.
|
||||
#
|
||||
# IMPORTANT: this is intentionally NOT prepended into agent_prompt (the
|
||||
# system message) anymore. Its text changes every minute, and local
|
||||
# OpenAI-compatible backends (llama.cpp / LM Studio) key their KV-cache
|
||||
# prefix off the system message byte-for-byte — mixing ever-changing
|
||||
# timestamp text into the (already large, tool-laden) agent system prompt
|
||||
# would invalidate the cached prefix on every single request, forcing a
|
||||
# full prompt re-evaluation each turn (issue #2927). It's built here as a
|
||||
# standalone *user*-role message and inserted near the end of the array,
|
||||
# right alongside _doc_message / _skills_message, below.
|
||||
_datetime_message = None
|
||||
try:
|
||||
from src.user_time import current_datetime_prompt
|
||||
agent_prompt = current_datetime_prompt() + agent_prompt
|
||||
from src.user_time import current_datetime_context_message
|
||||
_datetime_message = current_datetime_context_message()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -1296,6 +1328,9 @@ def _build_system_prompt(
|
||||
last_user_idx += 1
|
||||
if _skills_message:
|
||||
merged.insert(last_user_idx, _skills_message)
|
||||
last_user_idx += 1
|
||||
if _datetime_message:
|
||||
merged.insert(last_user_idx, _datetime_message)
|
||||
|
||||
return merged, mcp_schemas
|
||||
|
||||
@@ -1314,6 +1349,7 @@ def _build_base_prompt(
|
||||
relevant_tools=None,
|
||||
mcp_disabled_map=None,
|
||||
compact: bool = False,
|
||||
owner: Optional[str] = None,
|
||||
suppress_local_context: bool = False,
|
||||
):
|
||||
"""Build the agent prompt with only relevant tools included.
|
||||
@@ -1373,7 +1409,7 @@ def _build_base_prompt(
|
||||
from src.constants import DATA_DIR
|
||||
_sm = SkillsManager(DATA_DIR)
|
||||
active_tools = list(set(TOOL_SECTIONS.keys()) - set(disabled or []))
|
||||
skill_idx = _sm.index_for(owner=None, active_toolsets=active_tools)
|
||||
skill_idx = _sm.index_for(owner=owner, active_toolsets=active_tools)
|
||||
if skill_idx:
|
||||
lines = ["## Available skills",
|
||||
"Procedures the assistant should consult before doing domain work. "
|
||||
@@ -1782,10 +1818,10 @@ async def stream_agent_loop(
|
||||
owner: Optional[str] = None,
|
||||
relevant_tools: Optional[Set[str]] = None,
|
||||
fallbacks: Optional[List[tuple]] = None,
|
||||
workspace: Optional[str] = None,
|
||||
plan_mode: bool = False,
|
||||
approved_plan: Optional[str] = None,
|
||||
tool_policy: Optional[ToolPolicy] = None,
|
||||
workspace: Optional[str] = None,
|
||||
_is_teacher_run: bool = False,
|
||||
) -> AsyncGenerator[str, None]:
|
||||
"""Streaming agent loop generator.
|
||||
@@ -1854,8 +1890,21 @@ async def stream_agent_loop(
|
||||
logger.info(f"[tool-rag] Using caller-provided relevant_tools ({len(_relevant_tools)} tools)")
|
||||
if not guide_only and not _relevant_tools and bool(_intent.get("low_signal")):
|
||||
from src.tool_index import ALWAYS_AVAILABLE
|
||||
_relevant_tools = set(ALWAYS_AVAILABLE)
|
||||
logger.info("[tool-rag] Low-signal agent message; skipping retrieval and using always-available tools only")
|
||||
if workspace:
|
||||
# An active workspace IS the file-work signal: a vague "look at the
|
||||
# project" means explore this folder. Surface only the READ-ONLY file
|
||||
# tools (intersection with the plan-mode read-only allowlist) so the
|
||||
# agent can investigate; write/shell tools stay out until the request
|
||||
# actually calls for them (RAG retrieval adds those on a real ask).
|
||||
_relevant_tools = set(ALWAYS_AVAILABLE)
|
||||
from src.tool_security import PLAN_MODE_READONLY_TOOLS
|
||||
_relevant_tools |= (_DOMAIN_TOOL_MAP["files"] & PLAN_MODE_READONLY_TOOLS)
|
||||
logger.info("[tool-rag] Low-signal but workspace active; including read-only file tools")
|
||||
else:
|
||||
# Don't short-circuit: fall through to RAG retrieval below.
|
||||
# Non-English queries are flagged low_signal by the English-only
|
||||
# intent classifier, but fastembed retrieval works across languages.
|
||||
logger.info("[tool-rag] Low-signal query; will run RAG retrieval")
|
||||
if not guide_only and not _relevant_tools:
|
||||
try:
|
||||
from src.tool_index import get_tool_index, ALWAYS_AVAILABLE
|
||||
@@ -1930,6 +1979,44 @@ async def stream_agent_loop(
|
||||
if _relevant_tools is not None and active_document is not None:
|
||||
_relevant_tools.update({"edit_document", "update_document", "suggest_document"})
|
||||
|
||||
# The skill index injected by _build_system_prompt tells the model to
|
||||
# call `manage_skills action=view`, and Jaccard-matched skills are pasted
|
||||
# into the prompt as procedures to follow — but neither path goes through
|
||||
# tool selection, so the model can be handed a procedure naming tools
|
||||
# (grep, read_file, ...) that aren't in its schema list. Keep the schemas
|
||||
# in lockstep: manage_skills is callable whenever any skill is indexed,
|
||||
# and a matched skill's declared requires_toolsets ride along with it.
|
||||
if not guide_only and _relevant_tools is not None:
|
||||
try:
|
||||
from services.memory.skills import SkillsManager
|
||||
from src.constants import DATA_DIR
|
||||
_skills_on = True
|
||||
try:
|
||||
from routes.prefs_routes import _load_for_user as _load_prefs
|
||||
_skills_on = (_load_prefs(owner) or {}).get("skills_enabled", True)
|
||||
except Exception:
|
||||
pass
|
||||
_sm = SkillsManager(DATA_DIR)
|
||||
_owner_skills = _sm.load(owner=owner) if _skills_on else []
|
||||
if _owner_skills:
|
||||
_relevant_tools.add("manage_skills")
|
||||
if _retrieval_query:
|
||||
# Validate against every known executable tool, not just
|
||||
# TOOL_SECTIONS — code-nav tools (grep/glob/ls) ship as
|
||||
# schemas without a prompt-prose section.
|
||||
from src.tool_policy import known_tool_names
|
||||
_known = known_tool_names()
|
||||
for _sk in _sm.get_relevant_skills(
|
||||
_retrieval_query, skills=_owner_skills,
|
||||
threshold=0.25, max_items=3,
|
||||
):
|
||||
_relevant_tools.update(
|
||||
t for t in (_sk.get("requires_toolsets") or [])
|
||||
if t in _known
|
||||
)
|
||||
except Exception as _e:
|
||||
logger.debug(f"[tool-rag] skill-aware tool include skipped: {_e}")
|
||||
|
||||
if _relevant_tools is not None:
|
||||
logger.info("[agent-intent] selected_tools=%s", sorted(_relevant_tools)[:50])
|
||||
|
||||
@@ -1980,6 +2067,10 @@ async def stream_agent_loop(
|
||||
# and can override this list for users who know their setup.
|
||||
_model_no_tools = any(kw in _model_lc for kw in (
|
||||
"deepseek-r1",
|
||||
# Open-weight GPT-OSS models are commonly served through llama.cpp /
|
||||
# llama-cpp-python. Their names contain "gpt-o", but they do not use
|
||||
# OpenAI's native tool-call channel unless the endpoint opts in.
|
||||
"gpt-oss",
|
||||
))
|
||||
# Native Ollama endpoints (/api/chat) handle tool schemas differently from
|
||||
# the OpenAI-compat path. Models like gemma4, qwen3.5, ministral respond to
|
||||
@@ -2011,27 +2102,6 @@ async def stream_agent_loop(
|
||||
suppress_local_context=guide_only,
|
||||
active_email=active_email,
|
||||
)
|
||||
if workspace and not guide_only:
|
||||
# PREPEND (not append) so it dominates the large base prompt — appended
|
||||
# at the end, small models ignored it and asked the user for code. The
|
||||
# folder IS the project; the agent must explore it, not ask.
|
||||
_ws_note = (
|
||||
f"## ACTIVE WORKSPACE — READ FIRST\n"
|
||||
f"The user is working in this folder: {workspace}\n"
|
||||
f"It IS the project. bash/python run with cwd set here and "
|
||||
f"read_file/write_file are confined to it (paths outside are rejected).\n"
|
||||
f"When the user says \"the code\" / \"this project\" / \"the workspace\" "
|
||||
f"or asks to review/find/edit something WITHOUT a path, they mean THIS "
|
||||
f"folder. Do NOT ask the user for code or a path, and do NOT read a file "
|
||||
f"literally named \"workspace\". ALWAYS start by exploring it yourself: "
|
||||
f"run `bash` → `git ls-files` (or `ls -R`) to see the files, then "
|
||||
f"read_file the relevant ones by path RELATIVE to the workspace."
|
||||
)
|
||||
if messages and messages[0].get("role") == "system":
|
||||
messages[0]["content"] = _ws_note + "\n\n" + (messages[0].get("content") or "")
|
||||
else:
|
||||
messages.insert(0, {"role": "system", "content": _ws_note})
|
||||
logger.info("[workspace] active for this turn: %s", workspace)
|
||||
if plan_mode and not guide_only:
|
||||
# Steer the model to investigate-then-propose. Hard tool gating handles
|
||||
# every write path except shell; this directive is what keeps the
|
||||
@@ -2063,30 +2133,34 @@ async def stream_agent_loop(
|
||||
_t3 = time.time()
|
||||
try:
|
||||
from src.context_compactor import trim_for_context
|
||||
from src.context_budget import compute_input_token_budget, DEFAULT_HARD_MAX
|
||||
from src.settings import is_setting_overridden
|
||||
from src.context_budget import compute_input_token_budget, DEFAULT_HARD_MAX, DEFAULT_BUDGET, budget_is_explicit as _budget_is_explicit
|
||||
from src.model_context import budget_context_for_model
|
||||
|
||||
soft_budget = int(get_setting("agent_input_token_budget", 6000) or 0)
|
||||
soft_budget = int(get_setting("agent_input_token_budget", DEFAULT_BUDGET) or 0)
|
||||
if soft_budget > 0:
|
||||
before_trim_tokens = estimate_tokens(messages)
|
||||
reserve_tokens = min(max(max_tokens or 1024, 512), 2048)
|
||||
# Honour the configurable ceiling for the auto-derived budget path.
|
||||
# No-op when the user has an explicit `agent_input_token_budget`
|
||||
# (that branch ignores hard_max). Falls back to DEFAULT_HARD_MAX
|
||||
# on missing/malformed values so misconfig can't zero the budget.
|
||||
# Ceiling for the auto-derived budget (no effect on an explicit budget;
|
||||
# see #1230). Falls back to DEFAULT_HARD_MAX on missing/malformed values
|
||||
# so misconfig can't zero the budget.
|
||||
try:
|
||||
hard_max = int(get_setting("agent_input_token_hard_max", DEFAULT_HARD_MAX) or DEFAULT_HARD_MAX)
|
||||
except (TypeError, ValueError):
|
||||
hard_max = DEFAULT_HARD_MAX
|
||||
if hard_max <= 0:
|
||||
hard_max = DEFAULT_HARD_MAX
|
||||
# Scale the default budget to the model's context window so long-context
|
||||
# models aren't silently capped at 6000; an explicit user setting is
|
||||
# still honoured (clamped to the window). (#1170)
|
||||
# Default value = auto sentinel (scale to the window); any other value =
|
||||
# explicit cap. Value-based, not presence-based, because the save path
|
||||
# materializes defaults so a persisted default must still read as auto (#4121).
|
||||
budget_is_explicit = _budget_is_explicit(soft_budget)
|
||||
# Scale only off a window we actually discovered, bound to the value it
|
||||
# proves (else 0) — not the passed-in context_length, which can be stale
|
||||
# or unset for some callers (#4122 review).
|
||||
ctx_for_budget = budget_context_for_model(endpoint_url, model, fallback=context_length)
|
||||
effective_budget = compute_input_token_budget(
|
||||
soft_budget,
|
||||
context_length,
|
||||
is_setting_overridden("agent_input_token_budget"),
|
||||
ctx_for_budget,
|
||||
budget_is_explicit,
|
||||
hard_max=hard_max,
|
||||
)
|
||||
trimmed_messages = trim_for_context(
|
||||
@@ -2161,11 +2235,12 @@ async def stream_agent_loop(
|
||||
# tool, so we don't nudge on harmless transitional text like "let me
|
||||
# know what you think".
|
||||
_INTENT_RE = re.compile(
|
||||
r"(?:^|\n)\s*(?:let me|i'?ll|i will|going to|let's)\s+"
|
||||
r"(?:^|\n)\s*(?:let me|i'?ll|i will|i need to|we need to|need to|"
|
||||
r"i should|we should|i must|we must|going to|let's)\s+"
|
||||
r"(?:tail|check|investigate|look at|see|tail|read|fetch|inspect|"
|
||||
r"verify|diagnose|examine|debug|capture|grab|pull|view|run|call|"
|
||||
r"trigger|launch|start|kick off|stop|kill|restart|adopt|serve|"
|
||||
r"register|adopt|list|search|find|query|hit|ping|test)"
|
||||
r"register|adopt|list|search|find|query|hit|ping|test|use|perform|do)"
|
||||
r"\b[^.\n]{0,140}",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
@@ -2206,9 +2281,17 @@ async def stream_agent_loop(
|
||||
elif _is_api_model:
|
||||
# Filter schemas by RAG-selected tools (if available)
|
||||
if _relevant_tools:
|
||||
# _build_base_prompt unions _ADMIN_TOOLS into the prompt
|
||||
# sections when admin intent fires — the schema list must
|
||||
# offer the same names, or the model reads prose describing
|
||||
# tools it cannot call and substitutes the nearest schema
|
||||
# it does have (e.g. manage_memory for manage_skills).
|
||||
_schema_names = set(_relevant_tools)
|
||||
if _needs_admin:
|
||||
_schema_names |= _ADMIN_TOOLS
|
||||
base_schemas = [
|
||||
s for s in FUNCTION_TOOL_SCHEMAS
|
||||
if s.get("function", {}).get("name") in _relevant_tools
|
||||
if s.get("function", {}).get("name") in _schema_names
|
||||
]
|
||||
_mcp_filtered = [
|
||||
s for s in mcp_schemas
|
||||
@@ -2254,6 +2337,7 @@ async def stream_agent_loop(
|
||||
prompt_type=prompt_type if round_num == 1 else None,
|
||||
tools=all_tool_schemas if all_tool_schemas else None,
|
||||
timeout=agent_stream_timeout,
|
||||
session_id=session_id,
|
||||
):
|
||||
if time.time() > _round_deadline:
|
||||
logger.warning(f"[agent] round {round_num} stream exceeded wall-clock deadline; cutting off")
|
||||
@@ -2743,6 +2827,46 @@ async def stream_agent_loop(
|
||||
)
|
||||
desc, result = await _tool_task
|
||||
|
||||
# A skill the model just loaded can prescribe tools that weren't
|
||||
# RAG-selected this turn (declared via requires_toolsets in its
|
||||
# frontmatter). Union them into the selection so the NEXT round's
|
||||
# schema list includes them — otherwise the model reads "use
|
||||
# grep" from the skill it fetched but has no grep schema to call.
|
||||
if (
|
||||
block.tool_type == "manage_skills"
|
||||
and _relevant_tools is not None
|
||||
and not result.get("error")
|
||||
):
|
||||
_ms_args = {}
|
||||
_ms_raw = (block.content or "").strip()
|
||||
if _ms_raw.startswith("{"):
|
||||
try:
|
||||
_ms_args = json.loads(_ms_raw)
|
||||
except json.JSONDecodeError:
|
||||
_ms_args = {}
|
||||
_ms_name = str(_ms_args.get("name", "") or "").strip()
|
||||
if _ms_name and _ms_args.get("action") in ("view", "view_ref"):
|
||||
try:
|
||||
from services.memory.skills import SkillsManager as _SkM
|
||||
from src.constants import DATA_DIR as _DD
|
||||
from src.tool_policy import known_tool_names as _ktn
|
||||
_known = _ktn()
|
||||
for _sk in _SkM(_DD).load(owner=owner):
|
||||
if _sk.get("name") == _ms_name:
|
||||
_new = {
|
||||
t for t in (_sk.get("requires_toolsets") or [])
|
||||
if t in _known and t not in _relevant_tools
|
||||
}
|
||||
if _new:
|
||||
_relevant_tools.update(_new)
|
||||
logger.info(
|
||||
"[tool-rag] skill '%s' unlocked tools for next round: %s",
|
||||
_ms_name, sorted(_new),
|
||||
)
|
||||
break
|
||||
except Exception as _e:
|
||||
logger.debug(f"skill requires_toolsets unlock skipped: {_e}")
|
||||
|
||||
# Extract structured web sources from web_search tool output.
|
||||
# web_search returns {"output": ..., "exit_code": 0}; check "output"
|
||||
# first so the <!-- SOURCES:…--> marker is found and stripped even
|
||||
@@ -2833,18 +2957,20 @@ async def stream_agent_loop(
|
||||
# On a bash/python timeout the result carries error + (often
|
||||
# empty) stdout/stderr; fall back to the error so the "timed
|
||||
# out" reason reaches the UI instead of a blank result.
|
||||
output_text = (result["stdout"] or result["stderr"] or result.get("error", ""))[:2000]
|
||||
raw = result["stdout"] or result["stderr"] or result.get("error", "")
|
||||
output_text = _truncate(raw)
|
||||
elif "output" in result:
|
||||
# bash / python canonical result: {"output": ..., "exit_code": ...}
|
||||
output_text = (result["output"] or "")[:2000]
|
||||
raw = result["output"] or ""
|
||||
output_text = _truncate(raw)
|
||||
elif "response" in result:
|
||||
# AI interaction tools (chat_with_model, send_to_session)
|
||||
label = result.get("model", result.get("session_name", "AI"))
|
||||
output_text = f"{label}: {result['response']}"[:4000]
|
||||
output_text = _truncate(f"{label}: {result['response']}")
|
||||
elif "content" in result:
|
||||
output_text = result["content"][:2000]
|
||||
output_text = _truncate(result["content"])
|
||||
elif "results" in result:
|
||||
output_text = result["results"][:4000]
|
||||
output_text = _truncate(result["results"])
|
||||
elif "session_id" in result and "name" in result:
|
||||
output_text = f"Session created: {result['name']} (id: {result['session_id']})"
|
||||
elif "success" in result:
|
||||
@@ -2854,7 +2980,7 @@ async def stream_agent_loop(
|
||||
else f"Error: {result.get('error', '')}"
|
||||
)
|
||||
elif "error" in result:
|
||||
output_text = result["error"][:2000]
|
||||
output_text = _truncate(result["error"])
|
||||
|
||||
# Emit tool_output (include ui_event data if present)
|
||||
tool_output_data = {"type": "tool_output", "tool": block.tool_type, "command": cmd_display, "output": output_text, "exit_code": result.get("exit_code")}
|
||||
|
||||
@@ -18,6 +18,30 @@ from src.tool_utils import _truncate, get_mcp_manager, set_mcp_manager
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from .subprocess_tools import BashTool, PythonTool
|
||||
from .web_tools import WebSearchTool, WebFetchTool
|
||||
from .filesystem_tools import ReadFileTool, WriteFileTool, EditFileTool, LsTool, GlobTool, GrepTool, GetWorkspaceTool
|
||||
from .document_tools import CreateDocumentTool, UpdateDocumentTool, EditDocumentTool, SuggestDocumentTool, ManageDocumentTool
|
||||
|
||||
TOOL_HANDLERS = {
|
||||
"bash": BashTool().execute,
|
||||
"python": PythonTool().execute,
|
||||
"web_search": WebSearchTool().execute,
|
||||
"web_fetch": WebFetchTool().execute,
|
||||
"read_file": ReadFileTool().execute,
|
||||
"write_file": WriteFileTool().execute,
|
||||
"edit_file": EditFileTool().execute,
|
||||
"ls": LsTool().execute,
|
||||
"glob": GlobTool().execute,
|
||||
"grep": GrepTool().execute,
|
||||
"create_document": CreateDocumentTool().execute,
|
||||
"update_document": UpdateDocumentTool().execute,
|
||||
"edit_document": EditDocumentTool().execute,
|
||||
"suggest_document": SuggestDocumentTool().execute,
|
||||
"manage_documents": ManageDocumentTool().execute,
|
||||
"get_workspace": GetWorkspaceTool().execute,
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Constants (re-exported for backward compatibility — single source of truth
|
||||
# is src.constants; always prefer importing from there for new code)
|
||||
@@ -28,7 +52,7 @@ PYTHON_TIMEOUT = 30
|
||||
|
||||
# Tool types that trigger execution
|
||||
TOOL_TAGS = {"bash", "python", "web_search", "web_fetch", "read_file", "write_file", "edit_file",
|
||||
"grep", "glob", "ls",
|
||||
"grep", "glob", "ls", "get_workspace",
|
||||
"create_document", "update_document", "edit_document",
|
||||
"search_chats",
|
||||
"chat_with_model", "create_session", "list_sessions",
|
||||
@@ -92,15 +116,14 @@ from src.tool_execution import ( # noqa: E402, F401
|
||||
format_tool_result,
|
||||
)
|
||||
|
||||
# Document functions
|
||||
from .document_tools import (
|
||||
set_active_document,
|
||||
set_active_model
|
||||
)
|
||||
|
||||
# Implementations
|
||||
from src.tool_implementations import ( # noqa: E402, F401
|
||||
set_active_document,
|
||||
set_active_model,
|
||||
get_active_document,
|
||||
do_create_document,
|
||||
do_update_document,
|
||||
do_edit_document,
|
||||
do_suggest_document,
|
||||
do_search_chats,
|
||||
do_manage_skills,
|
||||
do_manage_tasks,
|
||||
@@ -108,7 +131,6 @@ from src.tool_implementations import ( # noqa: E402, F401
|
||||
do_manage_mcp,
|
||||
do_manage_webhooks,
|
||||
do_manage_tokens,
|
||||
do_manage_documents,
|
||||
do_manage_settings,
|
||||
do_api_call,
|
||||
)
|
||||
@@ -0,0 +1,644 @@
|
||||
from typing import Any, Dict, List, Optional
|
||||
import logging
|
||||
import re
|
||||
import json
|
||||
from src.constants import MAX_READ_CHARS
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Active document state
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_active_document_id: Optional[str] = None
|
||||
_active_model: Optional[str] = None
|
||||
|
||||
|
||||
def set_active_document(doc_id: Optional[str]):
|
||||
"""Set the active document ID for document tool execution."""
|
||||
global _active_document_id
|
||||
_active_document_id = doc_id
|
||||
|
||||
|
||||
def set_active_model(model: Optional[str]):
|
||||
"""Set the current model name for version summaries."""
|
||||
global _active_model
|
||||
_active_model = model
|
||||
|
||||
|
||||
def get_active_document():
|
||||
return _active_document_id
|
||||
|
||||
|
||||
def clear_active_document(doc_id: Optional[str] = None) -> bool:
|
||||
"""Clear the in-memory active-document pointer.
|
||||
|
||||
With ``doc_id`` given, only clears when it matches the current pointer, so a
|
||||
different active document is left untouched. Returns True if it was cleared.
|
||||
|
||||
Called when a document is detached from its session or deleted (its tab is
|
||||
closed): without this, the stale pointer makes the last-resort doc-injection
|
||||
path re-surface a closed document in a later, unrelated chat — even one whose
|
||||
session no longer matches — because an unlinked doc has session_id NULL (#1160).
|
||||
"""
|
||||
global _active_document_id
|
||||
if doc_id is None or _active_document_id == doc_id:
|
||||
_active_document_id = None
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _owned_document_query(query, Document, owner: Optional[str]):
|
||||
if owner is None:
|
||||
# A bare Python `False` is not a valid SQL expression — SQLAlchemy 1.4
|
||||
# deprecates it and 2.0 raises ArgumentError. Use the SQL `false()`
|
||||
# literal to return zero rows for an unscoped (owner-less) query.
|
||||
from sqlalchemy import false
|
||||
return query.filter(false())
|
||||
return query.filter(Document.owner == owner)
|
||||
|
||||
|
||||
def _get_owned_document(db, Document, doc_id: str, owner: Optional[str], active_only: bool = False):
|
||||
q = db.query(Document).filter(Document.id == doc_id)
|
||||
if active_only:
|
||||
q = q.filter(Document.is_active == True)
|
||||
q = _owned_document_query(q, Document, owner)
|
||||
return q.first()
|
||||
|
||||
|
||||
def _most_recent_owned_document(db, Document, owner: Optional[str], active_only: bool = False):
|
||||
q = db.query(Document)
|
||||
if active_only:
|
||||
q = q.filter(Document.is_active == True)
|
||||
q = _owned_document_query(q, Document, owner)
|
||||
return q.order_by(Document.updated_at.desc()).first()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Document tools — create/update/edit/suggest living documents
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _sniff_doc_language(text: str) -> str:
|
||||
"""Best-effort detect a document's language from its content when the model
|
||||
didn't specify one. Defaults to 'markdown' (prose). Recognizes the common
|
||||
markup/code types the editor supports so e.g. an SVG isn't saved as markdown."""
|
||||
import json as _json, re as _re2
|
||||
s = (text or "").strip()
|
||||
if not s:
|
||||
return "markdown"
|
||||
head = s[:600]
|
||||
hl = head.lower()
|
||||
if _looks_like_email_document(s):
|
||||
return "email"
|
||||
# Markup (unambiguous)
|
||||
if "<svg" in hl:
|
||||
return "svg"
|
||||
if hl.startswith("<?xml"):
|
||||
return "xml"
|
||||
if (hl.startswith("<!doctype html") or hl.startswith("<html")
|
||||
or _re2.search(r"<(div|body|head|p|span|table|button|h[1-6]|ul|ol|li|img)\b", hl)):
|
||||
return "html"
|
||||
# JSON
|
||||
if s[0] in "{[":
|
||||
try:
|
||||
_json.loads(s)
|
||||
return "json"
|
||||
except Exception:
|
||||
pass
|
||||
# Shebang
|
||||
first = s.split("\n", 1)[0].strip().lower()
|
||||
if first.startswith("#!"):
|
||||
return "python" if "python" in first else "bash"
|
||||
# Code by strong leading signals (line-anchored so prose with stray words won't match)
|
||||
if _re2.search(r"(?m)^\s*(def \w|class \w|import \w|from \w[\w.]* import )", s):
|
||||
return "python"
|
||||
if _re2.search(r"(?m)^\s*(function \w|const \w|let \w|export |import .* from )", s):
|
||||
return "javascript"
|
||||
if _re2.search(r"(?mi)^\s*(select .* from |create table |insert into |update \w)", s):
|
||||
return "sql"
|
||||
if _re2.search(r"(?m)^[.#]?[\w-]+\s*\{[^{}]*:[^{}]*;", s):
|
||||
return "css"
|
||||
return "markdown"
|
||||
|
||||
def _looks_like_email_document(text: str = "", title: str = "") -> bool:
|
||||
import re as _re
|
||||
title_l = (title or "").strip().lower()
|
||||
if title_l in {"new email", "new mail", "new message"}:
|
||||
return True
|
||||
s = (text or "").lstrip()
|
||||
if "\n---\n" in s and _re.search(r"(?im)^To:\s*", s) and _re.search(r"(?im)^Subject:\s*", s):
|
||||
return True
|
||||
return bool(_re.search(r"(?im)^To:\s*", s) and _re.search(r"(?im)^Subject:\s*", s))
|
||||
|
||||
def _coerce_email_document_content(existing: str, incoming: str) -> str:
|
||||
"""Keep email docs in the To/Subject/---/body shape even if a model writes
|
||||
only the body or dumps header labels without the separator."""
|
||||
import re as _re
|
||||
old = existing or ""
|
||||
new = (incoming or "").strip()
|
||||
if "\n---\n" in new:
|
||||
return new
|
||||
header = old.split("\n---\n", 1)[0] if "\n---\n" in old else "To: \nSubject: "
|
||||
if _looks_like_email_document(new):
|
||||
lines = new.splitlines()
|
||||
last_header_idx = -1
|
||||
header_re = _re.compile(r"^(To|Cc|Bcc|Subject|In-Reply-To|References|X-Source-UID|X-Source-Folder|X-Attachments):", _re.I)
|
||||
for i, line in enumerate(lines):
|
||||
if header_re.match(line.strip()):
|
||||
last_header_idx = i
|
||||
body_lines = lines[last_header_idx + 1:] if last_header_idx >= 0 else lines
|
||||
while body_lines and not body_lines[0].strip():
|
||||
body_lines.pop(0)
|
||||
body = "\n".join(body_lines).strip()
|
||||
else:
|
||||
body = new
|
||||
return header.rstrip() + "\n---\n" + body
|
||||
|
||||
def _parse_tool_args(content):
|
||||
"""Parse a tool-call argument blob.
|
||||
|
||||
Accepts either a JSON string or an already-decoded dict. Unwraps the
|
||||
common `{"body": {...}}` envelope that smaller models emit when they
|
||||
read tool descriptions like "Body is JSON: {...}" literally — they
|
||||
pass `body` as a field name rather than treating it as a noun.
|
||||
|
||||
Returns a dict on success, raises ValueError on bad JSON.
|
||||
"""
|
||||
if isinstance(content, str):
|
||||
try:
|
||||
args = json.loads(content) if content.strip() else {}
|
||||
except (json.JSONDecodeError, TypeError) as e:
|
||||
raise ValueError(str(e))
|
||||
elif isinstance(content, dict):
|
||||
args = content
|
||||
else:
|
||||
args = {}
|
||||
# Unwrap {"body": {...}} envelope — but only if `body` is the sole key
|
||||
# and points at a dict. We don't want to clobber a legitimate `body`
|
||||
# field on tools where it's a real arg (e.g. send_email body text).
|
||||
if (
|
||||
isinstance(args, dict)
|
||||
and len(args) == 1
|
||||
and "body" in args
|
||||
and isinstance(args["body"], dict)
|
||||
and "action" in args["body"] # extra safety: only unwrap if the inner dict looks like a tool call
|
||||
):
|
||||
args = args["body"]
|
||||
return args
|
||||
|
||||
def parse_edit_blocks(content: str) -> list:
|
||||
"""Parse <<<FIND>>>...<<<REPLACE>>>...<<<END>>> blocks."""
|
||||
edits = []
|
||||
pattern = r'<<<FIND>>>\n(.*?)\n<<<REPLACE>>>\n(.*?)\n<<<END>>>'
|
||||
for m in re.finditer(pattern, content, re.DOTALL):
|
||||
edits.append({"find": m.group(1), "replace": m.group(2)})
|
||||
return edits
|
||||
|
||||
def parse_suggest_blocks(content: str) -> list:
|
||||
"""Parse <<<FIND>>>...<<<SUGGEST>>>...<<<REASON>>>...<<<END>>> blocks."""
|
||||
suggestions = []
|
||||
_skip_phrases = ["no change", "clear", "fine as", "looks good", "no improvement", "keep as"]
|
||||
pattern = r'<<<FIND>>>\n(.*?)\n<<<SUGGEST>>>\n(.*?)\n<<<REASON>>>\n(.*?)\n<<<END>>>'
|
||||
for m in re.finditer(pattern, content, re.DOTALL):
|
||||
find_text = m.group(1)
|
||||
replace_text = m.group(2)
|
||||
reason = m.group(3).strip()
|
||||
# Skip no-op suggestions where find == replace or reason says no change
|
||||
if find_text.strip() == replace_text.strip():
|
||||
continue
|
||||
if any(phrase in reason.lower() for phrase in _skip_phrases):
|
||||
continue
|
||||
suggestions.append({
|
||||
"id": f"sugg-{len(suggestions)+1}",
|
||||
"find": find_text,
|
||||
"replace": replace_text,
|
||||
"reason": reason,
|
||||
})
|
||||
return suggestions
|
||||
|
||||
|
||||
class CreateDocumentTool:
|
||||
async def execute(self, content: str, ctx: dict) -> dict:
|
||||
"""Create a new document. Supports two formats:
|
||||
1) Line-based: line 1 = title, line 2 (optional) = language, rest = content
|
||||
2) XML-like tags: <title>...</title><language>...</language><content>...</content>
|
||||
Some models mix them — strip any XML-style tags and fall back to line parsing."""
|
||||
import uuid, re as _re
|
||||
from src.database import SessionLocal, Document, DocumentVersion, Session as DbSession
|
||||
|
||||
raw = content or ""
|
||||
session_id = ctx.get("session_id")
|
||||
owner = ctx.get("owner")
|
||||
|
||||
# Known languages the editor understands (match the <select> in HTML)
|
||||
_KNOWN_LANGS = {
|
||||
"python", "javascript", "typescript", "html", "css", "markdown", "json",
|
||||
"yaml", "bash", "sql", "rust", "go", "java", "c", "cpp", "xml", "toml",
|
||||
"ini", "ruby", "php", "csv", "email", "text", "plain", "svg",
|
||||
}
|
||||
|
||||
# Try XML tag extraction first
|
||||
title = None
|
||||
language = None
|
||||
content = None
|
||||
mt = _re.search(r"<title>\s*(.*?)\s*</title>", raw, _re.DOTALL | _re.IGNORECASE)
|
||||
ml = _re.search(r"<language>\s*(.*?)\s*</language>", raw, _re.DOTALL | _re.IGNORECASE)
|
||||
mc = _re.search(r"<content>\s*(.*?)\s*</content>", raw, _re.DOTALL | _re.IGNORECASE)
|
||||
if mt or mc:
|
||||
title = mt.group(1).strip() if mt else None
|
||||
language = ml.group(1).strip().lower() if ml else None
|
||||
content = mc.group(1) if mc else None
|
||||
|
||||
# Fall back to line-based parsing. First strip any stray XML-ish tags.
|
||||
if title is None or content is None:
|
||||
cleaned = _re.sub(r"</?(?:title|language|content)>", "", raw)
|
||||
lines = cleaned.strip().split("\n")
|
||||
if title is None:
|
||||
title = lines[0].strip() if lines else "Untitled"
|
||||
lines = lines[1:]
|
||||
# Only consume second line as language if it looks like a valid short lang token
|
||||
if language is None and lines:
|
||||
candidate = lines[0].strip().lower()
|
||||
if candidate and len(candidate) < 20 and " " not in candidate and candidate in _KNOWN_LANGS:
|
||||
language = candidate
|
||||
lines = lines[1:]
|
||||
if content is None:
|
||||
content = "\n".join(lines)
|
||||
|
||||
# Validate language: must be in known set, else default based on content
|
||||
if language and language not in _KNOWN_LANGS:
|
||||
language = None
|
||||
if not language:
|
||||
# No explicit language — sniff it from the content so an SVG / HTML / JSON
|
||||
# / code document isn't silently saved as markdown. Prose → markdown.
|
||||
language = _sniff_doc_language(content)
|
||||
if _looks_like_email_document(content, title):
|
||||
language = "email"
|
||||
|
||||
if not title:
|
||||
title = "Untitled"
|
||||
|
||||
if not session_id:
|
||||
return {"error": "No session context for document creation"}
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
doc_id = str(uuid.uuid4())
|
||||
ver_id = str(uuid.uuid4())
|
||||
|
||||
# Inherit ownership from the chat session so the doc survives that
|
||||
# session later being deleted (session_id → NULL).
|
||||
_sess = db.query(DbSession).filter(DbSession.id == session_id).first()
|
||||
if owner is not None and (not _sess or _sess.owner != owner):
|
||||
return {"error": "Cannot create document in another user's session"}
|
||||
_owner = _sess.owner if _sess else None
|
||||
|
||||
doc = Document(
|
||||
id=doc_id,
|
||||
session_id=session_id,
|
||||
title=title,
|
||||
language=language,
|
||||
current_content=content,
|
||||
version_count=1,
|
||||
is_active=True,
|
||||
owner=_owner,
|
||||
)
|
||||
ver = DocumentVersion(
|
||||
id=ver_id,
|
||||
document_id=doc_id,
|
||||
version_number=1,
|
||||
content=content,
|
||||
summary=f"Created by {_active_model or 'AI'}",
|
||||
source="ai",
|
||||
)
|
||||
db.add(doc)
|
||||
db.add(ver)
|
||||
db.commit()
|
||||
|
||||
set_active_document(doc_id)
|
||||
try:
|
||||
from src.event_bus import fire_event
|
||||
fire_event("document_created", _owner)
|
||||
except Exception:
|
||||
logger.debug("document_created event dispatch failed", exc_info=True)
|
||||
|
||||
return {
|
||||
"action": "create",
|
||||
"doc_id": doc_id,
|
||||
"title": title,
|
||||
"language": language,
|
||||
"content": content,
|
||||
"version": 1,
|
||||
}
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
return {"error": f"Failed to create document: {e}"}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
class UpdateDocumentTool:
|
||||
async def execute(self, content: str, ctx: dict) -> Dict:
|
||||
"""Update an existing document. Content = full new document text."""
|
||||
import uuid
|
||||
from src.database import SessionLocal, Document, DocumentVersion
|
||||
|
||||
target_id = ctx.get("doc_id", None) or _active_document_id
|
||||
owner = ctx.get("owner")
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
doc = None
|
||||
if target_id:
|
||||
doc = _get_owned_document(db, Document, target_id, owner)
|
||||
if not doc:
|
||||
doc = _most_recent_owned_document(db, Document, owner)
|
||||
if doc:
|
||||
target_id = doc.id
|
||||
set_active_document(target_id)
|
||||
logger.info(f"update_document: fell back to most recent doc id={target_id}")
|
||||
if not doc:
|
||||
return {"error": "No documents exist to update"}
|
||||
|
||||
is_email_doc = doc.language == "email" or _looks_like_email_document(doc.current_content or "", doc.title or "")
|
||||
new_content = _coerce_email_document_content(doc.current_content or "", content) if is_email_doc else content.strip()
|
||||
if is_email_doc:
|
||||
doc.language = "email"
|
||||
|
||||
new_ver = doc.version_count + 1
|
||||
ver = DocumentVersion(
|
||||
id=str(uuid.uuid4()),
|
||||
document_id=target_id,
|
||||
version_number=new_ver,
|
||||
content=new_content,
|
||||
summary=f"Updated by {_active_model or 'AI'}",
|
||||
source="ai",
|
||||
)
|
||||
doc.current_content = new_content
|
||||
doc.version_count = new_ver
|
||||
db.add(ver)
|
||||
db.commit()
|
||||
|
||||
return {
|
||||
"action": "update",
|
||||
"doc_id": target_id,
|
||||
"title": doc.title,
|
||||
"language": doc.language,
|
||||
"content": new_content,
|
||||
"version": new_ver,
|
||||
}
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
return {"error": f"Failed to update document: {e}"}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
class EditDocumentTool:
|
||||
async def execute(self, content: str, ctx: dict) -> Dict:
|
||||
"""Apply targeted FIND/REPLACE edits to an existing document."""
|
||||
import uuid
|
||||
from src.database import SessionLocal, Document, DocumentVersion
|
||||
|
||||
target_id = ctx.get("doc_id", None) or _active_document_id
|
||||
owner = ctx.get("owner")
|
||||
|
||||
edits = parse_edit_blocks(content)
|
||||
if not edits:
|
||||
return {"error": "No valid <<<FIND>>>...<<<REPLACE>>>...<<<END>>> blocks found"}
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
doc = None
|
||||
if target_id:
|
||||
doc = _get_owned_document(db, Document, target_id, owner)
|
||||
if not doc:
|
||||
# Fallback: most recently updated document. Avoids "no active doc" errors
|
||||
# after server restart or when the agent loses track of which doc to edit.
|
||||
doc = _most_recent_owned_document(db, Document, owner)
|
||||
if doc:
|
||||
target_id = doc.id
|
||||
set_active_document(target_id)
|
||||
logger.info(f"edit_document: fell back to most recent doc id={target_id} title={doc.title!r}")
|
||||
if not doc:
|
||||
return {"error": "No documents exist to edit"}
|
||||
|
||||
updated_content = doc.current_content
|
||||
applied = 0
|
||||
skipped = 0
|
||||
for edit in edits:
|
||||
_find = edit["find"]
|
||||
if _find in updated_content:
|
||||
updated_content = updated_content.replace(_find, edit["replace"], 1)
|
||||
applied += 1
|
||||
else:
|
||||
# Defensive: the active-doc context shows a "N\t" line-number
|
||||
# gutter for reference. Weaker models sometimes copy that prefix
|
||||
# into FIND. If the exact match failed, retry with a leading
|
||||
# "<digits><tab>" stripped from each FIND line — but only use it
|
||||
# when that stripped form actually matches, so we never corrupt a
|
||||
# legitimately tab-prefixed document.
|
||||
_stripped = "\n".join(re.sub(r"^\d+\t", "", _l) for _l in _find.split("\n"))
|
||||
if _stripped != _find and _stripped in updated_content:
|
||||
updated_content = updated_content.replace(_stripped, edit["replace"], 1)
|
||||
applied += 1
|
||||
logger.info("edit_document: matched after stripping line-number gutter from FIND")
|
||||
else:
|
||||
logger.warning(f"edit_document: FIND text not found, skipping: {_find[:80]!r}")
|
||||
skipped += 1
|
||||
|
||||
if applied == 0:
|
||||
return {"error": f"No edits applied — none of the FIND blocks matched the document content (skipped {skipped})"}
|
||||
|
||||
new_ver = doc.version_count + 1
|
||||
ver = DocumentVersion(
|
||||
id=str(uuid.uuid4()),
|
||||
document_id=target_id,
|
||||
version_number=new_ver,
|
||||
content=updated_content,
|
||||
summary=f"Edited by {_active_model or 'AI'} ({applied} edit(s))",
|
||||
source="ai",
|
||||
)
|
||||
doc.current_content = updated_content
|
||||
doc.version_count = new_ver
|
||||
db.add(ver)
|
||||
db.commit()
|
||||
|
||||
return {
|
||||
"action": "edit",
|
||||
"doc_id": target_id,
|
||||
"title": doc.title,
|
||||
"language": doc.language,
|
||||
"content": updated_content,
|
||||
"version": new_ver,
|
||||
"applied": applied,
|
||||
"skipped": skipped,
|
||||
}
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
return {"error": f"Failed to edit document: {e}"}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
class SuggestDocumentTool:
|
||||
async def execute(self, content: str, ctx: dict) -> Dict:
|
||||
"""Create inline suggestions for the active document WITHOUT modifying it."""
|
||||
from src.database import SessionLocal, Document
|
||||
|
||||
target_id = ctx.get("doc_id", None) or _active_document_id
|
||||
owner = ctx.get("owner")
|
||||
|
||||
if not target_id:
|
||||
return {"error": "No active document to suggest on"}
|
||||
|
||||
suggestions = parse_suggest_blocks(content)
|
||||
if not suggestions:
|
||||
return {"error": "No valid <<<FIND>>>...<<<SUGGEST>>>...<<<REASON>>>...<<<END>>> blocks found"}
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
doc = _get_owned_document(db, Document, target_id, owner)
|
||||
if not doc:
|
||||
return {"error": f"Document {target_id} not found"}
|
||||
|
||||
# Validate that FIND text exists in document
|
||||
valid = []
|
||||
for s in suggestions:
|
||||
if s["find"] in doc.current_content:
|
||||
valid.append(s)
|
||||
else:
|
||||
logger.warning(f"suggest_document: FIND text not found, skipping: {s['find'][:80]!r}")
|
||||
|
||||
if not valid:
|
||||
return {"error": "No suggestions matched the document content"}
|
||||
|
||||
return {
|
||||
"action": "suggest",
|
||||
"doc_id": target_id,
|
||||
"suggestions": valid,
|
||||
"count": len(valid),
|
||||
}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Document management tool (delete, list, organize)
|
||||
# ---------------------------------------------------------------------------
|
||||
class ManageDocumentTool:
|
||||
async def execute(self, content: str, ctx: dict) -> Dict:
|
||||
"""Manage documents: list, read/view/open, delete, tidy.
|
||||
|
||||
Output format mirrors `manage_session`: list rows include a
|
||||
clickable `[Title](#document-<id>)` anchor + relative timestamps
|
||||
so the user can click straight from chat to open the editor.
|
||||
"""
|
||||
from core.database import SessionLocal, Document
|
||||
from datetime import datetime, timezone
|
||||
|
||||
owner = ctx.get("owner")
|
||||
|
||||
try:
|
||||
args = _parse_tool_args(content)
|
||||
except ValueError:
|
||||
return {"error": "Invalid JSON arguments", "exit_code": 1}
|
||||
|
||||
action = args.get("action", "list")
|
||||
db = SessionLocal()
|
||||
|
||||
def _rel(ts):
|
||||
if not ts:
|
||||
return 'never'
|
||||
try:
|
||||
now = datetime.now(timezone.utc) if ts.tzinfo is not None else datetime.utcnow()
|
||||
diff = (now - ts).total_seconds()
|
||||
except Exception:
|
||||
return 'unknown'
|
||||
if diff < 60: return 'just now'
|
||||
if diff < 3600: return f'{int(diff / 60)}m ago'
|
||||
if diff < 86400: return f'{int(diff / 3600)}h ago'
|
||||
if diff < 86400 * 7: return f'{int(diff / 86400)}d ago'
|
||||
return ts.strftime('%Y-%m-%d')
|
||||
|
||||
try:
|
||||
if action == "list":
|
||||
q = db.query(Document).filter(Document.is_active == True)
|
||||
q = _owned_document_query(q, Document, owner)
|
||||
if args.get("search"):
|
||||
q = q.filter(Document.title.ilike(f"%{args['search']}%"))
|
||||
if args.get("language"):
|
||||
q = q.filter(Document.language == args["language"])
|
||||
docs = q.order_by(Document.updated_at.desc()).limit(args.get("limit", 50)).all()
|
||||
if not docs:
|
||||
msg = "No documents found" + (f" matching '{args['search']}'" if args.get("search") else "") + "."
|
||||
return {"response": msg, "documents": [], "exit_code": 0}
|
||||
lines = []
|
||||
items = []
|
||||
for i, d in enumerate(docs):
|
||||
size = len(d.current_content or "")
|
||||
lang = d.language or "text"
|
||||
ts = getattr(d, 'updated_at', None) or getattr(d, 'created_at', None)
|
||||
marker = " ← most recent" if i == 0 else ""
|
||||
lines.append(
|
||||
f"- [{d.title}](#document-{d.id}) — {lang}, {size} chars, updated {_rel(ts)}{marker}"
|
||||
)
|
||||
items.append({"id": d.id, "title": d.title, "language": lang, "size": size})
|
||||
header = f"Found {len(docs)} document(s), sorted most-recent first. Click a title to open:"
|
||||
return {
|
||||
"response": header + "\n" + "\n".join(lines),
|
||||
"documents": items,
|
||||
"exit_code": 0,
|
||||
}
|
||||
|
||||
elif action in ("read", "view", "open", "get"):
|
||||
doc_id = args.get("document_id") or args.get("id") or args.get("uid")
|
||||
if not doc_id:
|
||||
return {"error": "Need document_id (use action=list to find one)", "exit_code": 1}
|
||||
doc = _get_owned_document(db, Document, doc_id, owner, active_only=True)
|
||||
if not doc:
|
||||
return {"error": f"Document '{doc_id}' not found", "exit_code": 1}
|
||||
body = doc.current_content or ""
|
||||
preview_limit = int(args.get("limit", MAX_READ_CHARS))
|
||||
truncated = len(body) > preview_limit
|
||||
preview = body[:preview_limit] + (f"\n... (truncated, {len(body)} chars total)" if truncated else "")
|
||||
anchor = f"[{doc.title}](#document-{doc.id})"
|
||||
return {
|
||||
"response": f"{anchor} — click to open in editor.\n\n```{doc.language or ''}\n{preview}\n```",
|
||||
"document": {
|
||||
"id": doc.id,
|
||||
"title": doc.title,
|
||||
"language": doc.language,
|
||||
"size": len(body),
|
||||
"content": preview,
|
||||
"truncated": truncated,
|
||||
},
|
||||
"exit_code": 0,
|
||||
}
|
||||
|
||||
elif action == "delete":
|
||||
doc_id = args.get("document_id") or args.get("id") or args.get("uid") or _active_document_id
|
||||
doc = None
|
||||
if doc_id:
|
||||
doc = _get_owned_document(db, Document, doc_id, owner)
|
||||
if not doc:
|
||||
# Fallback: most recently updated doc (likely what the user means)
|
||||
doc = _most_recent_owned_document(db, Document, owner, active_only=True)
|
||||
if not doc:
|
||||
return {"error": "No document to delete", "exit_code": 1}
|
||||
title = doc.title
|
||||
doc.is_active = False
|
||||
db.commit()
|
||||
if _active_document_id == doc.id:
|
||||
set_active_document(None)
|
||||
return {"response": f"Deleted document '{title}'", "exit_code": 0}
|
||||
|
||||
elif action == "tidy":
|
||||
from src.document_actions import run_document_tidy
|
||||
result = await run_document_tidy(owner or "")
|
||||
return {"response": result, "exit_code": 0}
|
||||
|
||||
else:
|
||||
return {"error": f"Unknown action: {action}", "exit_code": 1}
|
||||
except Exception as e:
|
||||
logger.error(f"manage_documents error: {e}")
|
||||
return {"error": str(e), "exit_code": 1}
|
||||
finally:
|
||||
db.close()
|
||||
@@ -0,0 +1,398 @@
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import difflib
|
||||
import fnmatch
|
||||
import shutil
|
||||
from typing import Optional, Dict, Any, Tuple
|
||||
|
||||
from src.constants import MAX_READ_CHARS, MAX_DIFF_LINES, MAX_OUTPUT_CHARS
|
||||
|
||||
_CODENAV_SKIP_DIRS = frozenset({
|
||||
".git", ".hg", ".svn", "node_modules", "venv", ".venv", "__pycache__",
|
||||
".mypy_cache", ".pytest_cache", ".ruff_cache", "dist", "build",
|
||||
".next", ".cache", "site-packages", ".idea", ".tox",
|
||||
})
|
||||
_CODENAV_MAX_HITS = 200
|
||||
_CODENAV_MAX_LINE = 400
|
||||
|
||||
def _unified_diff(old: str, new: str, path: str) -> Optional[Dict[str, Any]]:
|
||||
if old == new:
|
||||
return None
|
||||
old_lines = old.splitlines()
|
||||
new_lines = new.splitlines()
|
||||
label = path or "file"
|
||||
diff_lines = list(difflib.unified_diff(
|
||||
old_lines, new_lines,
|
||||
fromfile=f"a/{label}", tofile=f"b/{label}",
|
||||
lineterm="",
|
||||
))
|
||||
added = sum(1 for line in diff_lines if line.startswith("+") and not line.startswith("+++"))
|
||||
removed = sum(1 for line in diff_lines if line.startswith("-") and not line.startswith("---"))
|
||||
truncated = False
|
||||
if len(diff_lines) > MAX_DIFF_LINES:
|
||||
diff_lines = diff_lines[:MAX_DIFF_LINES]
|
||||
truncated = True
|
||||
text = "\n".join(diff_lines)
|
||||
if truncated:
|
||||
text += f"\n… diff truncated at {MAX_DIFF_LINES} lines"
|
||||
return {
|
||||
"text": text,
|
||||
"added": added,
|
||||
"removed": removed,
|
||||
"new_file": old == "",
|
||||
"file": os.path.basename(path) or (path or "file"),
|
||||
}
|
||||
|
||||
class EditFileTool:
|
||||
async def execute(self, content: str, ctx: dict) -> dict:
|
||||
from src.tool_execution import _resolve_tool_path, _resolve_search_root, _truncate
|
||||
try:
|
||||
args = json.loads(content) if content.strip().startswith("{") else {}
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
args = {}
|
||||
raw_path = (args.get("path") or "").strip()
|
||||
old = args.get("old_string", "")
|
||||
new = args.get("new_string", "")
|
||||
replace_all = bool(args.get("replace_all", False))
|
||||
if not raw_path:
|
||||
return {"error": "edit_file: path required", "exit_code": 1}
|
||||
try:
|
||||
path = _resolve_tool_path(raw_path)
|
||||
except ValueError as e:
|
||||
return {"error": f"edit_file: {e}", "exit_code": 1}
|
||||
if old == "":
|
||||
return {"error": "edit_file: old_string required (use write_file to create a file)", "exit_code": 1}
|
||||
if old == new:
|
||||
return {"error": "edit_file: old_string and new_string are identical", "exit_code": 1}
|
||||
|
||||
def _apply():
|
||||
"""Helper function that performs the actual string replacement and file writing logic."""
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
original = f.read()
|
||||
count = original.count(old)
|
||||
if count == 0:
|
||||
return original, None, "not_found"
|
||||
if count > 1 and not replace_all:
|
||||
return original, None, f"not_unique:{count}"
|
||||
updated = original.replace(old, new) if replace_all else original.replace(old, new, 1)
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
f.write(updated)
|
||||
return original, updated, "ok"
|
||||
|
||||
try:
|
||||
original, updated, status = await asyncio.to_thread(_apply)
|
||||
except FileNotFoundError:
|
||||
return {"error": f"edit_file: {path}: not found (use write_file to create it)", "exit_code": 1}
|
||||
except (IsADirectoryError, UnicodeDecodeError):
|
||||
return {"error": f"edit_file: {path}: not an editable text file", "exit_code": 1}
|
||||
except PermissionError:
|
||||
return {"error": f"edit_file: {path}: permission denied", "exit_code": 1}
|
||||
except OSError as e:
|
||||
return {"error": f"edit_file: {path}: {e}", "exit_code": 1}
|
||||
|
||||
if status == "not_found":
|
||||
return {"error": f"edit_file: old_string not found in {path}. Read the file and match it exactly.", "exit_code": 1}
|
||||
if status.startswith("not_unique"):
|
||||
n = status.split(":", 1)[1]
|
||||
return {"error": f"edit_file: old_string is not unique in {path} ({n} matches). Add surrounding context or set replace_all=true.", "exit_code": 1}
|
||||
|
||||
n = original.count(old)
|
||||
result = {"output": f"Edited {path} ({n} replacement{'s' if n != 1 else ''})", "exit_code": 0}
|
||||
diff = _unified_diff(original, updated, path)
|
||||
if diff:
|
||||
result["diff"] = diff
|
||||
return result
|
||||
|
||||
class ReadFileTool:
|
||||
async def execute(self, content: str, ctx: dict) -> dict:
|
||||
from src.tool_execution import _resolve_tool_path, _resolve_search_root, _truncate
|
||||
raw_path, offset, limit = content.split("\n", 1)[0].strip(), 0, 0
|
||||
_stripped = content.strip()
|
||||
if _stripped.startswith("{"):
|
||||
try:
|
||||
_a = json.loads(_stripped)
|
||||
raw_path = str(_a.get("path", "")).strip()
|
||||
offset = int(_a.get("offset") or 0)
|
||||
limit = int(_a.get("limit") or 0)
|
||||
except (json.JSONDecodeError, TypeError, ValueError):
|
||||
pass
|
||||
try:
|
||||
path = _resolve_tool_path(raw_path)
|
||||
except ValueError as e:
|
||||
return {"error": f"read_file: {e}", "exit_code": 1}
|
||||
try:
|
||||
def _read():
|
||||
if offset > 0 or limit > 0:
|
||||
start = max(offset, 1)
|
||||
out, n, budget = [], 0, MAX_READ_CHARS
|
||||
with open(path, "r", encoding="utf-8", errors="replace") as f:
|
||||
for i, line in enumerate(f, 1):
|
||||
if i < start:
|
||||
continue
|
||||
if limit > 0 and n >= limit:
|
||||
break
|
||||
out.append(line)
|
||||
n += 1
|
||||
budget -= len(line)
|
||||
if budget <= 0:
|
||||
out.append(f"\n... [truncated at {MAX_READ_CHARS} chars]")
|
||||
break
|
||||
return "".join(out)
|
||||
with open(path, "r", encoding="utf-8", errors="replace") as f:
|
||||
return f.read(MAX_READ_CHARS + 1)
|
||||
data = await asyncio.to_thread(_read)
|
||||
except FileNotFoundError:
|
||||
return {"error": f"read_file: {path}: not found", "exit_code": 1}
|
||||
except PermissionError:
|
||||
return {"error": f"read_file: {path}: permission denied", "exit_code": 1}
|
||||
except IsADirectoryError:
|
||||
return {"error": f"read_file: {path}: is a directory (use ls)", "exit_code": 1}
|
||||
except OSError as e:
|
||||
return {"error": f"read_file: {path}: {e}", "exit_code": 1}
|
||||
if not (offset > 0 or limit > 0) and len(data) > MAX_READ_CHARS:
|
||||
data = data[:MAX_READ_CHARS] + f"\n... [truncated at {MAX_READ_CHARS} chars]"
|
||||
return {"output": data, "exit_code": 0}
|
||||
|
||||
class WriteFileTool:
|
||||
async def execute(self, content: str, ctx: dict) -> dict:
|
||||
from src.tool_execution import _resolve_tool_path, _resolve_search_root, _truncate
|
||||
lines = content.split("\n", 1)
|
||||
raw_path = lines[0].strip()
|
||||
body = lines[1] if len(lines) > 1 else ""
|
||||
try:
|
||||
path = _resolve_tool_path(raw_path)
|
||||
except ValueError as e:
|
||||
return {"error": f"write_file: {e}", "exit_code": 1}
|
||||
try:
|
||||
def _write():
|
||||
old = ""
|
||||
try:
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
old = f.read()
|
||||
except (FileNotFoundError, IsADirectoryError, UnicodeDecodeError, OSError):
|
||||
old = ""
|
||||
d = os.path.dirname(path)
|
||||
if d:
|
||||
os.makedirs(d, exist_ok=True)
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
f.write(body)
|
||||
return old, len(body)
|
||||
old_content, size = await asyncio.to_thread(_write)
|
||||
except PermissionError:
|
||||
return {"error": f"write_file: {path}: permission denied", "exit_code": 1}
|
||||
except OSError as e:
|
||||
return {"error": f"write_file: {path}: {e}", "exit_code": 1}
|
||||
diff = _unified_diff(old_content, body, path)
|
||||
result = {"output": f"Wrote {size} bytes to {path}", "exit_code": 0}
|
||||
if diff:
|
||||
result["diff"] = diff
|
||||
return result
|
||||
|
||||
class LsTool:
|
||||
async def execute(self, content: str, ctx: dict) -> dict:
|
||||
from src.tool_execution import _resolve_tool_path, _resolve_search_root, _truncate
|
||||
raw_path = ""
|
||||
_s = (content or "").strip()
|
||||
if _s.startswith("{"):
|
||||
try:
|
||||
raw_path = str(json.loads(_s).get("path", "")).strip()
|
||||
except json.JSONDecodeError:
|
||||
raw_path = ""
|
||||
else:
|
||||
raw_path = _s.split("\n", 1)[0].strip()
|
||||
try:
|
||||
root = _resolve_search_root(raw_path)
|
||||
except ValueError as e:
|
||||
return {"error": f"ls: {e}", "exit_code": 1}
|
||||
|
||||
def _ls():
|
||||
if not os.path.isdir(root):
|
||||
return None, f"ls: {root}: not a directory"
|
||||
rows = []
|
||||
try:
|
||||
with os.scandir(root) as it:
|
||||
for entry in it:
|
||||
if entry.name.startswith("."):
|
||||
continue
|
||||
try:
|
||||
is_dir = entry.is_dir(follow_symlinks=False)
|
||||
size = entry.stat(follow_symlinks=False).st_size if not is_dir else 0
|
||||
except OSError:
|
||||
continue
|
||||
rows.append((is_dir, entry.name, size))
|
||||
except (PermissionError, OSError) as _e:
|
||||
return None, f"ls: {_e}"
|
||||
rows.sort(key=lambda r: (not r[0], r[1].lower()))
|
||||
lines = [f"{root}:"]
|
||||
for is_dir, name, size in rows[:_CODENAV_MAX_HITS]:
|
||||
lines.append(f" {name}/" if is_dir else f" {name} ({size} B)")
|
||||
if len(rows) > _CODENAV_MAX_HITS:
|
||||
lines.append(f" ... [{len(rows) - _CODENAV_MAX_HITS} more]")
|
||||
if not rows:
|
||||
lines.append(" (empty)")
|
||||
return "\n".join(lines), None
|
||||
|
||||
out, err = await asyncio.to_thread(_ls)
|
||||
if err:
|
||||
return {"error": err, "exit_code": 1}
|
||||
return {"output": _truncate(out), "exit_code": 0}
|
||||
|
||||
class GlobTool:
|
||||
async def execute(self, content: str, ctx: dict) -> dict:
|
||||
from src.tool_execution import _resolve_tool_path, _resolve_search_root, _truncate
|
||||
args = {}
|
||||
_s = (content or "").strip()
|
||||
if _s.startswith("{"):
|
||||
try:
|
||||
args = json.loads(_s)
|
||||
except json.JSONDecodeError:
|
||||
args = {}
|
||||
else:
|
||||
args = {"pattern": _s}
|
||||
pattern = str(args.get("pattern", "")).strip()
|
||||
if not pattern:
|
||||
return {"error": "glob: pattern is required", "exit_code": 1}
|
||||
try:
|
||||
root = _resolve_search_root(str(args.get("path", "")))
|
||||
except ValueError as e:
|
||||
return {"error": f"glob: {e}", "exit_code": 1}
|
||||
|
||||
def _glob():
|
||||
from pathlib import Path
|
||||
base = Path(root)
|
||||
if not base.is_dir():
|
||||
return None, f"glob: {root}: not a directory"
|
||||
matched = []
|
||||
try:
|
||||
for p in base.rglob(pattern):
|
||||
if set(p.relative_to(base).parts) & _CODENAV_SKIP_DIRS:
|
||||
continue
|
||||
try:
|
||||
mtime = p.stat().st_mtime
|
||||
except OSError:
|
||||
mtime = 0
|
||||
matched.append((mtime, str(p)))
|
||||
if len(matched) > _CODENAV_MAX_HITS * 5:
|
||||
break
|
||||
except (OSError, ValueError) as _e:
|
||||
return None, f"glob: {_e}"
|
||||
matched.sort(key=lambda t: t[0], reverse=True)
|
||||
return [pth for _, pth in matched[:_CODENAV_MAX_HITS]], None
|
||||
|
||||
paths, err = await asyncio.to_thread(_glob)
|
||||
if err:
|
||||
return {"error": err, "exit_code": 1}
|
||||
if not paths:
|
||||
return {"output": f"No files matching {pattern!r} under {root}", "exit_code": 0}
|
||||
out = "\n".join(paths)
|
||||
if len(paths) >= _CODENAV_MAX_HITS:
|
||||
out += f"\n... [capped at {_CODENAV_MAX_HITS} files]"
|
||||
return {"output": _truncate(out), "exit_code": 0}
|
||||
|
||||
class GrepTool:
|
||||
async def execute(self, content: str, ctx: dict) -> dict:
|
||||
from src.tool_execution import _resolve_tool_path, _resolve_search_root, _truncate
|
||||
args: Dict[str, Any] = {}
|
||||
_s = (content or "").strip()
|
||||
if _s.startswith("{"):
|
||||
try:
|
||||
args = json.loads(_s)
|
||||
except json.JSONDecodeError:
|
||||
args = {}
|
||||
else:
|
||||
args = {"pattern": _s}
|
||||
pattern = str(args.get("pattern", "")).strip()
|
||||
if not pattern:
|
||||
return {"error": "grep: pattern is required", "exit_code": 1}
|
||||
ignore_case = bool(args.get("ignore_case"))
|
||||
glob_pat = str(args.get("glob", "") or "").strip()
|
||||
try:
|
||||
max_hits = int(args.get("max_results") or _CODENAV_MAX_HITS)
|
||||
except (TypeError, ValueError):
|
||||
max_hits = _CODENAV_MAX_HITS
|
||||
max_hits = max(1, min(max_hits, _CODENAV_MAX_HITS))
|
||||
try:
|
||||
root = _resolve_search_root(str(args.get("path", "")))
|
||||
except ValueError as e:
|
||||
return {"error": f"grep: {e}", "exit_code": 1}
|
||||
|
||||
def _grep():
|
||||
import re as _re
|
||||
import shutil
|
||||
rg = shutil.which("rg")
|
||||
if rg:
|
||||
cmd = [rg, "--line-number", "--no-heading", "--color=never",
|
||||
"--max-count", str(max_hits)]
|
||||
if ignore_case:
|
||||
cmd.append("--ignore-case")
|
||||
if glob_pat:
|
||||
cmd += ["--glob", glob_pat]
|
||||
for _d in _CODENAV_SKIP_DIRS:
|
||||
cmd += ["--glob", f"!**/{_d}/**"]
|
||||
cmd += ["--regexp", pattern, root]
|
||||
try:
|
||||
import subprocess
|
||||
p = subprocess.run(cmd, capture_output=True, text=True, timeout=20)
|
||||
lines = [ln for ln in (p.stdout or "").splitlines() if ln][:max_hits]
|
||||
return lines, None
|
||||
except subprocess.TimeoutExpired:
|
||||
return None, "grep: timed out"
|
||||
except Exception as _e:
|
||||
return None, f"grep: {_e}"
|
||||
try:
|
||||
rx = _re.compile(pattern, _re.IGNORECASE if ignore_case else 0)
|
||||
except _re.error as _e:
|
||||
return None, f"grep: bad pattern: {_e}"
|
||||
hits = []
|
||||
if os.path.isfile(root):
|
||||
file_iter = [root]
|
||||
else:
|
||||
file_iter = []
|
||||
for dp, dns, fns in os.walk(root):
|
||||
dns[:] = [d for d in dns if d not in _CODENAV_SKIP_DIRS]
|
||||
for fn in fns:
|
||||
if glob_pat and not fnmatch.fnmatch(fn, glob_pat):
|
||||
continue
|
||||
file_iter.append(os.path.join(dp, fn))
|
||||
for fp in file_iter:
|
||||
if len(hits) >= max_hits:
|
||||
break
|
||||
try:
|
||||
with open(fp, "r", encoding="utf-8", errors="strict") as f:
|
||||
for i, line in enumerate(f, 1):
|
||||
if rx.search(line):
|
||||
hits.append(f"{fp}:{i}:{line.rstrip()[:_CODENAV_MAX_LINE]}")
|
||||
if len(hits) >= max_hits:
|
||||
break
|
||||
except (UnicodeDecodeError, OSError):
|
||||
continue
|
||||
return hits, None
|
||||
|
||||
lines, err = await asyncio.to_thread(_grep)
|
||||
if err:
|
||||
return {"error": err, "exit_code": 1}
|
||||
if not lines:
|
||||
return {"output": f"No matches for {pattern!r} under {root}", "exit_code": 0}
|
||||
out = "\n".join(ln[:_CODENAV_MAX_LINE] for ln in lines)
|
||||
if len(lines) >= max_hits:
|
||||
out += f"\n... [capped at {max_hits} matches]"
|
||||
return {"output": _truncate(out), "exit_code": 0}
|
||||
|
||||
class GetWorkspaceTool:
|
||||
"""Report the active workspace folder (no args). File tools are confined to
|
||||
it; the shell starts there (cwd) but is NOT sandboxed."""
|
||||
async def execute(self, content: str, ctx: dict) -> dict:
|
||||
from src.tool_execution import get_active_workspace
|
||||
ws = get_active_workspace()
|
||||
if ws:
|
||||
return {
|
||||
"output": f"{ws}\n(File tools are confined to this folder; the shell starts "
|
||||
f"here but is not sandboxed and can reach outside it.)",
|
||||
"exit_code": 0,
|
||||
}
|
||||
return {
|
||||
"output": "No workspace is set. File tools use the default allowed roots; "
|
||||
"resolve paths from the user or use absolute paths.",
|
||||
"exit_code": 0,
|
||||
}
|
||||
@@ -0,0 +1,153 @@
|
||||
import asyncio
|
||||
import sys
|
||||
import time
|
||||
import collections
|
||||
from typing import Optional, Callable, Awaitable, Tuple, Dict
|
||||
from src.constants import MAX_OUTPUT_CHARS
|
||||
|
||||
DEFAULT_BASH_TIMEOUT = 60 * 60 # 1 hour
|
||||
DEFAULT_PYTHON_TIMEOUT = 60 * 60
|
||||
|
||||
PROGRESS_INTERVAL_S = 2.0
|
||||
PROGRESS_TAIL_LINES = 12
|
||||
|
||||
async def _run_subprocess_streaming(
|
||||
proc: asyncio.subprocess.Process,
|
||||
*,
|
||||
timeout: float,
|
||||
progress_cb: Optional[Callable[[Dict], Awaitable[None]]] = None,
|
||||
) -> Tuple[str, str, Optional[int], bool]:
|
||||
started = time.time()
|
||||
stdout_full: list[str] = []
|
||||
stderr_full: list[str] = []
|
||||
tail = collections.deque(maxlen=PROGRESS_TAIL_LINES)
|
||||
|
||||
async def _reader(stream, full_buf, label: str):
|
||||
if stream is None:
|
||||
return
|
||||
while True:
|
||||
line = await stream.readline()
|
||||
if not line:
|
||||
break
|
||||
decoded = line.decode("utf-8", errors="replace").rstrip("\n")
|
||||
full_buf.append(decoded)
|
||||
if label == "err":
|
||||
tail.append(f"! {decoded}")
|
||||
else:
|
||||
tail.append(decoded)
|
||||
|
||||
async def _progress_emitter():
|
||||
await asyncio.sleep(PROGRESS_INTERVAL_S)
|
||||
while True:
|
||||
if progress_cb:
|
||||
try:
|
||||
await progress_cb({
|
||||
"elapsed_s": round(time.time() - started, 1),
|
||||
"tail": "\n".join(list(tail)),
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
await asyncio.sleep(PROGRESS_INTERVAL_S)
|
||||
|
||||
rd_out = asyncio.create_task(_reader(proc.stdout, stdout_full, "out"))
|
||||
rd_err = asyncio.create_task(_reader(proc.stderr, stderr_full, "err"))
|
||||
prog_task = asyncio.create_task(_progress_emitter()) if progress_cb else None
|
||||
|
||||
timed_out = False
|
||||
try:
|
||||
await asyncio.wait_for(proc.wait(), timeout=timeout)
|
||||
except asyncio.TimeoutError:
|
||||
timed_out = True
|
||||
try:
|
||||
proc.kill()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
await asyncio.wait_for(proc.wait(), timeout=2)
|
||||
except Exception:
|
||||
pass
|
||||
except asyncio.CancelledError:
|
||||
try:
|
||||
proc.kill()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
await asyncio.wait_for(proc.wait(), timeout=2)
|
||||
except Exception:
|
||||
pass
|
||||
for t in (rd_out, rd_err):
|
||||
t.cancel()
|
||||
if prog_task is not None:
|
||||
prog_task.cancel()
|
||||
raise
|
||||
finally:
|
||||
if prog_task is not None and not prog_task.done():
|
||||
prog_task.cancel()
|
||||
try:
|
||||
await prog_task
|
||||
except (asyncio.CancelledError, Exception):
|
||||
pass
|
||||
for t in (rd_out, rd_err):
|
||||
try:
|
||||
await asyncio.wait_for(t, timeout=1)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return (
|
||||
"\n".join(stdout_full),
|
||||
"\n".join(stderr_full),
|
||||
proc.returncode,
|
||||
timed_out,
|
||||
)
|
||||
|
||||
class BashTool:
|
||||
async def execute(self, content: str, ctx: dict) -> dict:
|
||||
from src.tool_execution import agent_cwd, _truncate
|
||||
progress_cb = ctx.get("progress_cb")
|
||||
_subproc_env = ctx.get("subproc_env")
|
||||
proc = await asyncio.create_subprocess_shell(
|
||||
content,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
env=_subproc_env,
|
||||
cwd=agent_cwd(),
|
||||
)
|
||||
stdout, stderr, rc, timed_out = await _run_subprocess_streaming(
|
||||
proc,
|
||||
timeout=DEFAULT_BASH_TIMEOUT,
|
||||
progress_cb=progress_cb,
|
||||
)
|
||||
if timed_out:
|
||||
return {"error": f"bash: timed out after {DEFAULT_BASH_TIMEOUT}s — process killed", "exit_code": 124, "stdout": _truncate(stdout, MAX_OUTPUT_CHARS), "stderr": _truncate(stderr, MAX_OUTPUT_CHARS)}
|
||||
output = stdout.rstrip()
|
||||
err = stderr.rstrip()
|
||||
if err:
|
||||
output = (output + "\nSTDERR: " + err).strip() if output else "STDERR: " + err
|
||||
output = _truncate(output, MAX_OUTPUT_CHARS)
|
||||
return {"output": output or "(no output)", "exit_code": rc or 0}
|
||||
|
||||
class PythonTool:
|
||||
async def execute(self, content: str, ctx: dict) -> dict:
|
||||
from src.tool_execution import agent_cwd, _truncate
|
||||
progress_cb = ctx.get("progress_cb")
|
||||
_subproc_env = ctx.get("subproc_env")
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
(sys.executable or "python"), "-I", "-c", content,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
env=_subproc_env,
|
||||
cwd=agent_cwd(),
|
||||
)
|
||||
stdout, stderr, rc, timed_out = await _run_subprocess_streaming(
|
||||
proc,
|
||||
timeout=DEFAULT_PYTHON_TIMEOUT,
|
||||
progress_cb=progress_cb,
|
||||
)
|
||||
if timed_out:
|
||||
return {"error": f"python: timed out after {DEFAULT_PYTHON_TIMEOUT}s — process killed", "exit_code": 124, "stdout": _truncate(stdout, MAX_OUTPUT_CHARS), "stderr": _truncate(stderr, MAX_OUTPUT_CHARS)}
|
||||
output = stdout.rstrip()
|
||||
err = stderr.rstrip()
|
||||
if err:
|
||||
output = (output + "\nSTDERR: " + err).strip() if output else "STDERR: " + err
|
||||
output = _truncate(output, MAX_OUTPUT_CHARS)
|
||||
return {"output": output or "(no output)", "exit_code": rc or 0}
|
||||
@@ -0,0 +1,101 @@
|
||||
import asyncio
|
||||
import json
|
||||
from typing import Dict, Any
|
||||
|
||||
from src.constants import MAX_OUTPUT_CHARS
|
||||
|
||||
class WebSearchTool:
|
||||
async def execute(self, content: str, ctx: dict) -> dict:
|
||||
from src.search import comprehensive_web_search
|
||||
raw = content.strip()
|
||||
query = raw
|
||||
time_filter = None
|
||||
max_pages = 5
|
||||
if raw.startswith("{"):
|
||||
try:
|
||||
parsed = json.loads(raw)
|
||||
if isinstance(parsed, dict) and "query" in parsed:
|
||||
query = str(parsed.get("query", "")).strip()
|
||||
tf = parsed.get("time_filter") or parsed.get("freshness")
|
||||
if isinstance(tf, str) and tf.lower() in ("day", "week", "month", "year"):
|
||||
time_filter = tf.lower()
|
||||
mp = parsed.get("max_pages")
|
||||
if isinstance(mp, int) and 1 <= mp <= 10:
|
||||
max_pages = mp
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
if not query:
|
||||
query = raw.split("\n")[0].strip()
|
||||
if time_filter is None:
|
||||
q_lc = query.lower()
|
||||
if any(kw in q_lc for kw in ("today", "latest", "breaking", "this morning", "right now", "currently")):
|
||||
time_filter = "day"
|
||||
elif any(kw in q_lc for kw in ("this week", "past week", "recent news", "last few days")):
|
||||
time_filter = "week"
|
||||
elif any(kw in q_lc for kw in ("this month", "past month")):
|
||||
time_filter = "month"
|
||||
elif " news" in q_lc or q_lc.startswith("news ") or q_lc.endswith(" news"):
|
||||
time_filter = "week"
|
||||
loop = asyncio.get_running_loop()
|
||||
text, sources = await asyncio.wait_for(
|
||||
loop.run_in_executor(
|
||||
None,
|
||||
lambda: comprehensive_web_search(
|
||||
query,
|
||||
max_pages=max_pages,
|
||||
time_filter=time_filter,
|
||||
return_sources=True,
|
||||
),
|
||||
),
|
||||
timeout=30,
|
||||
)
|
||||
output = text[:MAX_OUTPUT_CHARS] if len(text) > MAX_OUTPUT_CHARS else text
|
||||
if sources:
|
||||
output += "\n\n<!-- SOURCES:" + json.dumps(sources) + " -->"
|
||||
return {"output": output, "exit_code": 0}
|
||||
|
||||
class WebFetchTool:
|
||||
async def execute(self, content: str, ctx: dict) -> dict:
|
||||
from src.search.content import fetch_webpage_content
|
||||
raw = content.strip()
|
||||
url = ""
|
||||
if raw.startswith("{"):
|
||||
try:
|
||||
parsed = json.loads(raw)
|
||||
if isinstance(parsed, dict):
|
||||
url = str(parsed.get("url") or "").strip()
|
||||
except json.JSONDecodeError:
|
||||
url = ""
|
||||
if not url:
|
||||
url = raw.split("\n")[0].strip()
|
||||
if not url or url.startswith("{") or any(c in url for c in (" ", "\t", "\n")):
|
||||
return {"error": "web_fetch: provide a single URL or domain, e.g. example.com", "exit_code": 1}
|
||||
low = url.lower()
|
||||
if "://" in low and not low.startswith(("http://", "https://")):
|
||||
return {"error": f"web_fetch: unsupported URL scheme (only http/https): {url[:80]}", "exit_code": 1}
|
||||
if not low.startswith(("http://", "https://")):
|
||||
url = "https://" + url
|
||||
loop = asyncio.get_running_loop()
|
||||
try:
|
||||
result = await asyncio.wait_for(
|
||||
loop.run_in_executor(None, lambda: fetch_webpage_content(url, timeout=10)),
|
||||
timeout=30,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
return {"error": f"web_fetch: timed out fetching {url}", "exit_code": 1}
|
||||
except Exception as e:
|
||||
return {"error": f"web_fetch: {url}: {e}", "exit_code": 1}
|
||||
err = result.get("error")
|
||||
text = (result.get("content") or "").strip()
|
||||
title = result.get("title") or ""
|
||||
|
||||
if not text:
|
||||
if err:
|
||||
return {"error": f"web_fetch: {url}: {err}", "exit_code": 1}
|
||||
return {"error": f"web_fetch: {url}: no readable text content (not HTML, or the page needs JS/login)", "exit_code": 1}
|
||||
|
||||
header = (f"# {title}\n" if title else "") + f"Source: {url}\n\n"
|
||||
output = header + text
|
||||
if len(output) > MAX_OUTPUT_CHARS:
|
||||
output = output[:MAX_OUTPUT_CHARS] + "\n\n[...truncated]"
|
||||
return {"output": output, "exit_code": 0}
|
||||
@@ -24,7 +24,9 @@ MAX_PIPELINE_STEPS = 10
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Global managers (set from app.py, same pattern as _mcp_manager)
|
||||
# ---------------------------------------------------------------------------
|
||||
# _session_manager is kept as a local cache for performance (avoiding
|
||||
# repeated get_session_manager_instance() calls). It's synced with
|
||||
# the authoritative singleton in core.models.
|
||||
_session_manager = None
|
||||
_memory_manager = None
|
||||
_memory_vector = None
|
||||
@@ -33,11 +35,15 @@ _personal_docs_manager = None
|
||||
|
||||
|
||||
def set_session_manager(mgr):
|
||||
"""Set the global session manager. Syncs local cache + core singleton."""
|
||||
global _session_manager
|
||||
_session_manager = mgr
|
||||
from core.models import set_session_manager_instance
|
||||
set_session_manager_instance(mgr)
|
||||
|
||||
|
||||
def get_session_manager():
|
||||
"""Get the global session manager."""
|
||||
return _session_manager
|
||||
|
||||
|
||||
@@ -966,16 +972,15 @@ async def do_manage_memory(content: str, session_id: Optional[str] = None, owner
|
||||
memories = [m for m in memories if m.get("category", "").lower() == category_filter]
|
||||
if not memories:
|
||||
return {"results": "No memories found" + (f" in category '{category_filter}'" if category_filter else "") + "."}
|
||||
|
||||
result_lines = [f"Found {len(memories)} memory entries:\n"]
|
||||
for m in memories[:100]:
|
||||
for m in memories:
|
||||
cat = m.get("category", "fact")
|
||||
mid = m.get("id", "?")[:8]
|
||||
text = m.get("text", "")
|
||||
if len(text) > 150:
|
||||
text = text[:150] + "..."
|
||||
result_lines.append(f"- [{cat}] `{mid}` — {text}")
|
||||
if len(memories) > 100:
|
||||
result_lines.append(f"... and {len(memories) - 100} more")
|
||||
return {"results": "\n".join(result_lines)}
|
||||
|
||||
elif action == "add":
|
||||
|
||||
+16
-2
@@ -4,6 +4,8 @@ import logging
|
||||
from typing import Dict
|
||||
from cryptography.fernet import Fernet, InvalidToken
|
||||
|
||||
from core.platform_compat import safe_chmod
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class APIKeyManager:
|
||||
@@ -15,12 +17,20 @@ class APIKeyManager:
|
||||
def get_or_create_key(self) -> bytes:
|
||||
"""Get or create encryption key for API keys"""
|
||||
if os.path.exists(self.key_file):
|
||||
# Older versions wrote .key with the process umask (often 0o644,
|
||||
# i.e. group/world-readable). Re-restrict on read so existing
|
||||
# installs heal without needing the key to be regenerated.
|
||||
safe_chmod(self.key_file, 0o600)
|
||||
with open(self.key_file, 'rb') as f:
|
||||
return f.read()
|
||||
else:
|
||||
key = Fernet.generate_key()
|
||||
with open(self.key_file, 'wb') as f:
|
||||
f.write(key)
|
||||
# This key decrypts every stored provider credential, so restrict it
|
||||
# to the owner (0o600) — it must not be group/world-readable. No-op
|
||||
# on Windows (files there are ACL-restricted to the user already).
|
||||
safe_chmod(self.key_file, 0o600)
|
||||
return key
|
||||
|
||||
def encrypt_api_key(self, api_key: str) -> str:
|
||||
@@ -57,7 +67,12 @@ class APIKeyManager:
|
||||
# Legacy/wrong shape (e.g. a list) — .items() would raise. Ignore it.
|
||||
logger.warning("API keys file has unexpected shape (%s); ignoring", type(encrypted_keys).__name__)
|
||||
return {}
|
||||
return encrypted_keys
|
||||
|
||||
return {
|
||||
str(provider): key
|
||||
for provider, key in encrypted_keys.items()
|
||||
if isinstance(key, str)
|
||||
}
|
||||
|
||||
def save(self, provider: str, api_key: str):
|
||||
"""Save encrypted API key to file.
|
||||
@@ -82,4 +97,3 @@ class APIKeyManager:
|
||||
except (InvalidToken, ValueError) as e:
|
||||
logger.warning("Failed to decrypt API key for %s: %s", provider, e)
|
||||
return decrypted
|
||||
|
||||
|
||||
@@ -55,6 +55,8 @@ async def _drain_agent(sess, messages):
|
||||
if "delta" in d:
|
||||
delta = d.get("delta")
|
||||
if isinstance(delta, str):
|
||||
if d.get("thinking"):
|
||||
continue
|
||||
full += delta
|
||||
elif d.get("type") == "agent_step":
|
||||
round_num = d.get("round", round_num)
|
||||
|
||||
+32
-16
@@ -579,6 +579,24 @@ def _classify_event_heuristic(summary: str) -> tuple:
|
||||
return etype, None
|
||||
|
||||
|
||||
def _memory_context_lines(mems, limit: int = 40) -> list:
|
||||
"""Render Memory rows into short personal-context bullets for event classify.
|
||||
|
||||
Reads the Memory ORM `text` column. The previous inline code read a
|
||||
non-existent `content` attribute, so it raised AttributeError on the first
|
||||
row, the surrounding except swallowed it, and the classifier ran with no
|
||||
personal context at all. getattr keeps it robust to future schema drift.
|
||||
"""
|
||||
lines: list = []
|
||||
for m in mems:
|
||||
c = (getattr(m, "text", "") or "").strip()
|
||||
if c:
|
||||
lines.append(f"- {c[:200]}")
|
||||
if len(lines) >= limit:
|
||||
break
|
||||
return lines
|
||||
|
||||
|
||||
async def action_classify_events(owner: str, **kwargs) -> Tuple[str, bool]:
|
||||
"""Hybrid classification of upcoming calendar events: fast heuristic for
|
||||
obvious cases, LLM fallback for ambiguous ones. Assigns event_type +
|
||||
@@ -614,16 +632,11 @@ async def action_classify_events(owner: str, **kwargs) -> Tuple[str, bool]:
|
||||
try:
|
||||
from core.database import Memory as _Mem
|
||||
_mems = db.query(_Mem).filter(_Mem.owner == owner).limit(60).all() if owner else []
|
||||
if _mems:
|
||||
_lines = []
|
||||
for m in _mems:
|
||||
c = (m.content or "").strip()
|
||||
if c:
|
||||
_lines.append(f"- {c[:200]}")
|
||||
if _lines:
|
||||
_memory_context = "USER CONTEXT (relationships, work, life):\n" + "\n".join(_lines[:40]) + "\n\n"
|
||||
_lines = _memory_context_lines(_mems)
|
||||
if _lines:
|
||||
_memory_context = "USER CONTEXT (relationships, work, life):\n" + "\n".join(_lines) + "\n\n"
|
||||
except Exception as _me:
|
||||
logger.debug(f"Could not load memory for classify: {_me}")
|
||||
logger.warning(f"Could not load memory for classify: {_me}")
|
||||
|
||||
classified_h = 0
|
||||
classified_llm = 0
|
||||
@@ -796,14 +809,14 @@ async def action_learn_sender_signatures(owner: str, **kwargs) -> Tuple[str, boo
|
||||
import email as _email_mod
|
||||
import asyncio as _aio
|
||||
from datetime import datetime as _dt, timedelta as _td
|
||||
from routes.email_helpers import _imap_connect, SCHEDULED_DB
|
||||
from routes.email_helpers import _email_cache_owner_clause, _imap_connect, SCHEDULED_DB
|
||||
from src.endpoint_resolver import resolve_endpoint
|
||||
from src.llm_core import llm_call_async
|
||||
|
||||
# 1. Pull recent UIDs + From headers cheaply (header-only fetch).
|
||||
def _pull_headers():
|
||||
results = []
|
||||
conn = _imap_connect(None)
|
||||
conn = _imap_connect(None, owner=owner)
|
||||
try:
|
||||
conn.select("INBOX", readonly=True)
|
||||
status, data = conn.search(None, "ALL")
|
||||
@@ -855,9 +868,11 @@ async def action_learn_sender_signatures(owner: str, **kwargs) -> Tuple[str, boo
|
||||
# 3. Eligibility: ≥3 emails AND (no cache OR cache > 30 days old).
|
||||
try:
|
||||
conn = _sql3.connect(SCHEDULED_DB)
|
||||
owner_clause, owner_params = _email_cache_owner_clause(owner)
|
||||
cached = {
|
||||
r[0]: r[1] for r in conn.execute(
|
||||
"SELECT from_address, last_built_at FROM sender_signatures"
|
||||
f"SELECT from_address, last_built_at FROM sender_signatures WHERE {owner_clause}",
|
||||
owner_params,
|
||||
).fetchall()
|
||||
}
|
||||
conn.close()
|
||||
@@ -888,7 +903,7 @@ async def action_learn_sender_signatures(owner: str, **kwargs) -> Tuple[str, boo
|
||||
|
||||
def _fetch_bodies(_msgs):
|
||||
bodies = []
|
||||
conn2 = _imap_connect(None)
|
||||
conn2 = _imap_connect(None, owner=owner)
|
||||
try:
|
||||
conn2.select("INBOX", readonly=True)
|
||||
for mm in _msgs:
|
||||
@@ -965,11 +980,12 @@ async def action_learn_sender_signatures(owner: str, **kwargs) -> Tuple[str, boo
|
||||
|
||||
try:
|
||||
conn = _sql3.connect(SCHEDULED_DB)
|
||||
owner_value = (owner or "").strip()
|
||||
conn.execute(
|
||||
"INSERT OR REPLACE INTO sender_signatures "
|
||||
"(from_address, signature_text, sample_count, last_built_at, model_used, source) "
|
||||
"VALUES (?, ?, ?, ?, ?, ?)",
|
||||
(addr, cached_sig, len(bodies), _dt.utcnow().isoformat(), model, "llm"),
|
||||
"(from_address, owner, signature_text, sample_count, last_built_at, model_used, source) "
|
||||
"VALUES (?, ?, ?, ?, ?, ?, ?)",
|
||||
(addr, owner_value, cached_sig, len(bodies), _dt.utcnow().isoformat(), model, "llm"),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
+84
-6
@@ -5,11 +5,13 @@ Auto-registration of built-in MCP servers on startup.
|
||||
Each server runs as a stdio subprocess managed by McpManager.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import asyncio
|
||||
|
||||
from core.platform_compat import IS_WINDOWS, which_tool
|
||||
|
||||
@@ -196,18 +198,29 @@ def _npx_package_from_args(args):
|
||||
async def _is_npx_package_cached(npx_path, package_spec, timeout_s=5):
|
||||
"""Probe whether an npx package is already in the local cache.
|
||||
|
||||
Runs `npx --no-install <pkg> --version`. --no-install tells npx to
|
||||
fail instead of downloading, so a cache miss returns fast. We treat
|
||||
"exited 0 with non-empty stdout" as proof of a working cached copy.
|
||||
Anything else (non-zero exit, empty stdout, timeout, missing npx,
|
||||
network error) means we should skip the server.
|
||||
First checks the local `_npx` cache for an installed package. If the
|
||||
package is not found there, falls back to `npx --no-install <pkg>
|
||||
--version` so older npm layouts still work without downloading.
|
||||
"""
|
||||
if _is_package_in_npx_cache(package_spec):
|
||||
return True
|
||||
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
npx_path, "--no-install", package_spec, "--version",
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
except NotImplementedError:
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[npx_path, "--no-install", package_spec, "--version"],
|
||||
capture_output=True,
|
||||
timeout=timeout_s,
|
||||
)
|
||||
except (subprocess.TimeoutExpired, OSError, ValueError):
|
||||
return False
|
||||
return result.returncode == 0 and bool(result.stdout.strip())
|
||||
except (OSError, ValueError):
|
||||
return False
|
||||
try:
|
||||
@@ -220,3 +233,68 @@ async def _is_npx_package_cached(npx_path, package_spec, timeout_s=5):
|
||||
pass
|
||||
return False
|
||||
return proc.returncode == 0 and bool(stdout.strip())
|
||||
|
||||
|
||||
def _is_package_in_npx_cache(package_spec):
|
||||
"""Return True when npm's `_npx` cache already contains package_spec."""
|
||||
package_name = _npx_package_name(package_spec)
|
||||
if not package_name:
|
||||
return False
|
||||
|
||||
for cache_root in _npm_cache_roots():
|
||||
npx_root = os.path.join(cache_root, "_npx")
|
||||
if _npx_cache_contains_package(npx_root, package_name):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _npx_package_name(package_spec):
|
||||
"""Strip a version/range suffix from an npm package spec."""
|
||||
if not package_spec:
|
||||
return ""
|
||||
if package_spec.startswith("@"):
|
||||
parts = package_spec.split("@", 2)
|
||||
if len(parts) >= 3:
|
||||
return f"@{parts[1]}"
|
||||
return package_spec
|
||||
return package_spec.split("@", 1)[0]
|
||||
|
||||
|
||||
def _npm_cache_roots():
|
||||
roots = []
|
||||
configured = os.environ.get("npm_config_cache")
|
||||
if configured:
|
||||
roots.append(os.path.expanduser(configured))
|
||||
roots.append(os.path.join(os.path.expanduser("~"), ".npm"))
|
||||
local_app_data = os.environ.get("LOCALAPPDATA")
|
||||
if local_app_data:
|
||||
roots.append(os.path.join(local_app_data, "npm-cache"))
|
||||
return list(dict.fromkeys(roots))
|
||||
|
||||
|
||||
def _npx_cache_contains_package(npx_root, package_name):
|
||||
if not os.path.isdir(npx_root):
|
||||
return False
|
||||
package_path = os.path.join("node_modules", *package_name.split("/"), "package.json")
|
||||
try:
|
||||
entries = list(os.scandir(npx_root))
|
||||
except OSError:
|
||||
return False
|
||||
for entry in entries:
|
||||
try:
|
||||
is_dir = entry.is_dir()
|
||||
except OSError:
|
||||
continue
|
||||
cached_name = _cached_package_name(os.path.join(entry.path, package_path))
|
||||
if is_dir and cached_name == package_name:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _cached_package_name(package_json_path):
|
||||
try:
|
||||
with open(package_json_path, encoding="utf-8") as fh:
|
||||
data = json.load(fh)
|
||||
except (OSError, ValueError):
|
||||
return ""
|
||||
return str(data.get("name", "")).strip()
|
||||
|
||||
+178
-1
@@ -128,6 +128,17 @@ def validate_caldav_url(raw_url: str) -> str:
|
||||
return urlunparse(parsed._replace(fragment="")).rstrip("/")
|
||||
|
||||
|
||||
def _event_etag(obj) -> str:
|
||||
"""Best-effort ETag extraction from python-caldav resources."""
|
||||
try:
|
||||
etag = getattr(obj, "etag", None)
|
||||
if callable(etag):
|
||||
etag = etag()
|
||||
return str(etag or "")
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
def _stable_cal_id(remote_url: str, owner: str = "", account_id: str = "") -> str:
|
||||
"""Deterministic local id for a remote CalDAV calendar, scoped to owner
|
||||
and account so two users — or one user with two accounts — pointing at
|
||||
@@ -316,11 +327,12 @@ def _sync_blocking(owner: str, url: str, username: str, password: str, account_i
|
||||
color="#5b8abf",
|
||||
source="caldav",
|
||||
account_id=account_id or None,
|
||||
caldav_base_url=remote_url,
|
||||
)
|
||||
db.add(local_cal)
|
||||
db.commit()
|
||||
else:
|
||||
# Refresh display name and stamp account_id if missing.
|
||||
# Refresh display name and stamp CalDAV metadata if missing.
|
||||
changed = False
|
||||
if local_cal.name != display_name:
|
||||
local_cal.name = display_name
|
||||
@@ -328,6 +340,9 @@ def _sync_blocking(owner: str, url: str, username: str, password: str, account_i
|
||||
if account_id and not local_cal.account_id:
|
||||
local_cal.account_id = account_id
|
||||
changed = True
|
||||
if local_cal.caldav_base_url != remote_url:
|
||||
local_cal.caldav_base_url = remote_url
|
||||
changed = True
|
||||
if changed:
|
||||
db.commit()
|
||||
result["calendars"] += 1
|
||||
@@ -395,6 +410,9 @@ def _sync_blocking(owner: str, url: str, username: str, password: str, account_i
|
||||
|
||||
existing = _find_existing_event(db, pending, uid_val, local_cal.id)
|
||||
if existing:
|
||||
if existing.caldav_sync_pending in {"create", "update"}:
|
||||
result["events"] += 1
|
||||
continue
|
||||
existing.calendar_id = local_cal.id
|
||||
existing.summary = summary
|
||||
existing.description = description
|
||||
@@ -405,6 +423,9 @@ def _sync_blocking(owner: str, url: str, username: str, password: str, account_i
|
||||
existing.is_utc = row_is_utc
|
||||
existing.rrule = rrule
|
||||
existing.origin = "caldav"
|
||||
existing.remote_href = str(getattr(obj, "url", "") or "") or None
|
||||
existing.remote_etag = _event_etag(obj) or None
|
||||
existing.caldav_sync_pending = None
|
||||
else:
|
||||
new_ev = CalendarEvent(
|
||||
uid=uid_val,
|
||||
@@ -418,6 +439,8 @@ def _sync_blocking(owner: str, url: str, username: str, password: str, account_i
|
||||
is_utc=row_is_utc,
|
||||
rrule=rrule,
|
||||
origin="caldav",
|
||||
remote_href=str(getattr(obj, "url", "") or "") or None,
|
||||
remote_etag=_event_etag(obj) or None,
|
||||
)
|
||||
db.add(new_ev)
|
||||
pending[uid_val] = new_ev
|
||||
@@ -442,6 +465,8 @@ def _sync_blocking(owner: str, url: str, username: str, password: str, account_i
|
||||
CalendarEvent.origin == "caldav",
|
||||
CalendarEvent.dtstart >= start,
|
||||
CalendarEvent.dtstart <= end,
|
||||
CalendarEvent.remote_href.isnot(None),
|
||||
CalendarEvent.caldav_sync_pending.is_(None),
|
||||
~CalendarEvent.uid.in_(seen_uids) if seen_uids else CalendarEvent.uid.isnot(None),
|
||||
).all()
|
||||
for ev in stale:
|
||||
@@ -458,6 +483,92 @@ def _sync_blocking(owner: str, url: str, username: str, password: str, account_i
|
||||
return result
|
||||
|
||||
|
||||
def _event_payload(ev) -> dict:
|
||||
return {
|
||||
"uid": ev.uid,
|
||||
"summary": ev.summary,
|
||||
"description": ev.description,
|
||||
"location": ev.location,
|
||||
"dtstart": ev.dtstart,
|
||||
"dtend": ev.dtend,
|
||||
"all_day": ev.all_day,
|
||||
"is_utc": ev.is_utc,
|
||||
"rrule": ev.rrule or "",
|
||||
}
|
||||
|
||||
|
||||
def _load_event_for_writeback(owner: str, uid: str) -> tuple[str, str, dict] | None:
|
||||
from core.database import CalendarCal, CalendarEvent, SessionLocal
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
ev = (
|
||||
db.query(CalendarEvent)
|
||||
.join(CalendarCal)
|
||||
.filter(CalendarEvent.uid == uid, CalendarCal.owner == owner)
|
||||
.first()
|
||||
)
|
||||
if not ev or not ev.calendar or ev.calendar.source != "caldav":
|
||||
return None
|
||||
return ev.calendar.source, ev.calendar.id, _event_payload(ev)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
def _load_delete_for_writeback(owner: str, uid: str) -> tuple[str, str, dict] | None:
|
||||
from core.database import CalendarCal, CalendarDeletedEvent, CalendarEvent, SessionLocal
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
tombstone = db.query(CalendarDeletedEvent).filter(
|
||||
CalendarDeletedEvent.uid == uid,
|
||||
CalendarDeletedEvent.owner == owner,
|
||||
).first()
|
||||
if tombstone:
|
||||
return "caldav", tombstone.calendar_id, {"uid": uid}
|
||||
|
||||
ev = (
|
||||
db.query(CalendarEvent)
|
||||
.join(CalendarCal)
|
||||
.filter(CalendarEvent.uid == uid, CalendarCal.owner == owner)
|
||||
.first()
|
||||
)
|
||||
if not ev or not ev.calendar or ev.calendar.source != "caldav":
|
||||
return None
|
||||
return ev.calendar.source, ev.calendar.id, {"uid": uid}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
def _pending_writeback_uids(owner: str) -> tuple[list[str], list[str]]:
|
||||
from core.database import CalendarCal, CalendarDeletedEvent, CalendarEvent, SessionLocal
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
rows = (
|
||||
db.query(CalendarEvent.uid)
|
||||
.join(CalendarCal)
|
||||
.filter(
|
||||
CalendarCal.owner == owner,
|
||||
CalendarCal.source == "caldav",
|
||||
CalendarEvent.status != "cancelled",
|
||||
(
|
||||
(CalendarEvent.caldav_sync_pending.isnot(None))
|
||||
| (CalendarEvent.remote_href.is_(None))
|
||||
),
|
||||
)
|
||||
.all()
|
||||
)
|
||||
delete_rows = (
|
||||
db.query(CalendarDeletedEvent.uid)
|
||||
.filter(CalendarDeletedEvent.owner == owner)
|
||||
.all()
|
||||
)
|
||||
return [row[0] for row in rows], [row[0] for row in delete_rows]
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
def _load_caldav_accounts(owner: str) -> list:
|
||||
"""Return the list of CalDAV accounts for *owner*, auto-migrating the legacy
|
||||
single-account ``caldav`` key to the new ``caldav_accounts`` list on first call.
|
||||
@@ -533,3 +644,69 @@ async def sync_caldav(owner: str) -> dict:
|
||||
for err in result.get("errors", []):
|
||||
totals["errors"].append(f"{label}: {err}")
|
||||
return totals
|
||||
|
||||
|
||||
async def push_event_create(owner: str, uid: str) -> dict:
|
||||
loaded = _load_event_for_writeback(owner, uid)
|
||||
if not loaded:
|
||||
return {"ok": True, "skipped": True}
|
||||
source, calendar_id, payload = loaded
|
||||
from src.caldav_writeback import writeback_event
|
||||
return await writeback_event(owner, source, calendar_id, payload)
|
||||
|
||||
|
||||
async def push_event_update(owner: str, uid: str) -> dict:
|
||||
return await push_event_create(owner, uid)
|
||||
|
||||
|
||||
async def push_event_delete(owner: str, uid: str) -> dict:
|
||||
loaded = _load_delete_for_writeback(owner, uid)
|
||||
if not loaded:
|
||||
return {"ok": True, "skipped": True}
|
||||
source, calendar_id, payload = loaded
|
||||
from src.caldav_writeback import writeback_event
|
||||
return await writeback_event(owner, source, calendar_id, payload, delete=True)
|
||||
|
||||
|
||||
async def push_pending_events(owner: str) -> dict:
|
||||
result = {"events": 0, "errors": []}
|
||||
uids, delete_uids = _pending_writeback_uids(owner)
|
||||
for event_uid in uids:
|
||||
try:
|
||||
out = await push_event_update(owner, event_uid)
|
||||
if out.get("ok"):
|
||||
result["events"] += 1
|
||||
elif not out.get("skipped"):
|
||||
result["errors"].append(f"{event_uid}: {str(out.get('error') or out)[:160]}")
|
||||
except Exception as e:
|
||||
logger.warning("CalDAV pending push failed for uid=%s: %s", event_uid, e)
|
||||
result["errors"].append(f"{event_uid}: {str(e)[:160]}")
|
||||
for event_uid in delete_uids:
|
||||
try:
|
||||
out = await push_event_delete(owner, event_uid)
|
||||
if out.get("ok"):
|
||||
result["events"] += 1
|
||||
elif not out.get("skipped"):
|
||||
result["errors"].append(f"{event_uid}: {str(out.get('error') or out)[:160]}")
|
||||
except Exception as e:
|
||||
logger.warning("CalDAV pending delete failed for uid=%s: %s", event_uid, e)
|
||||
result["errors"].append(f"{event_uid}: {str(e)[:160]}")
|
||||
return result
|
||||
|
||||
|
||||
async def sync_caldav_direction(owner: str, direction: str = "pull") -> dict:
|
||||
direction = (direction or "pull").strip().lower()
|
||||
if direction == "pull":
|
||||
return await sync_caldav(owner)
|
||||
if direction == "push":
|
||||
return await push_pending_events(owner)
|
||||
if direction == "both":
|
||||
pushed = await push_pending_events(owner)
|
||||
pulled = await sync_caldav(owner)
|
||||
return {"push": pushed, "pull": pulled}
|
||||
return {
|
||||
"calendars": 0,
|
||||
"events": 0,
|
||||
"deleted": 0,
|
||||
"errors": [f"Unsupported CalDAV sync direction: {direction}"],
|
||||
}
|
||||
|
||||
+92
-6
@@ -89,6 +89,23 @@ def find_remote_calendar(calendars, local_cal_id: str, owner: str = "", account_
|
||||
return None
|
||||
|
||||
|
||||
def _resource_href(obj) -> str:
|
||||
try:
|
||||
return str(getattr(obj, "url", "") or "")
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
def _resource_etag(obj) -> str:
|
||||
try:
|
||||
etag = getattr(obj, "etag", None)
|
||||
if callable(etag):
|
||||
etag = etag()
|
||||
return str(etag or "")
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
def push_event(calendars, local_cal_id: str, ev: dict, *, delete: bool = False,
|
||||
owner: str = "", account_id: str = "") -> dict:
|
||||
"""Create/update (or delete) ``ev`` on the matching remote calendar.
|
||||
@@ -105,6 +122,7 @@ def push_event(calendars, local_cal_id: str, ev: dict, *, delete: bool = False,
|
||||
remote = find_remote_calendar(calendars, local_cal_id, owner=owner, account_id=account_id)
|
||||
if remote is None:
|
||||
return {"ok": False, "error": "remote calendar not found"}
|
||||
remote_url = str(getattr(remote, "url", "") or "")
|
||||
|
||||
try:
|
||||
existing = remote.event_by_uid(uid)
|
||||
@@ -113,17 +131,34 @@ def push_event(calendars, local_cal_id: str, ev: dict, *, delete: bool = False,
|
||||
|
||||
if delete:
|
||||
if existing is None:
|
||||
return {"ok": True, "note": "already absent on remote"}
|
||||
return {"ok": True, "note": "already absent on remote", "calendar_url": remote_url}
|
||||
existing.delete()
|
||||
return {"ok": True}
|
||||
return {
|
||||
"ok": True,
|
||||
"calendar_url": remote_url,
|
||||
"remote_href": _resource_href(existing),
|
||||
"remote_etag": _resource_etag(existing),
|
||||
}
|
||||
|
||||
ical = build_event_ical(ev)
|
||||
if existing is not None:
|
||||
existing.data = ical
|
||||
existing.save()
|
||||
return {"ok": True, "updated": True}
|
||||
remote.save_event(ical)
|
||||
return {"ok": True, "created": True}
|
||||
return {
|
||||
"ok": True,
|
||||
"updated": True,
|
||||
"calendar_url": remote_url,
|
||||
"remote_href": _resource_href(existing),
|
||||
"remote_etag": _resource_etag(existing),
|
||||
}
|
||||
created = remote.save_event(ical)
|
||||
return {
|
||||
"ok": True,
|
||||
"created": True,
|
||||
"calendar_url": remote_url,
|
||||
"remote_href": _resource_href(created),
|
||||
"remote_etag": _resource_etag(created),
|
||||
}
|
||||
|
||||
|
||||
def _discover_calendars(client):
|
||||
@@ -154,6 +189,54 @@ def _writeback_blocking(local_cal_id, ev, delete, url, username, password,
|
||||
owner=owner, account_id=account_id)
|
||||
|
||||
|
||||
def _persist_writeback_result(owner: str, calendar_id: str, uid: str, result: dict, *, delete: bool) -> None:
|
||||
from core.database import CalendarCal, CalendarDeletedEvent, CalendarEvent, SessionLocal
|
||||
|
||||
if not uid or not isinstance(result, dict):
|
||||
return
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
calendar = db.query(CalendarCal).filter(
|
||||
CalendarCal.id == calendar_id,
|
||||
CalendarCal.owner == owner,
|
||||
).first()
|
||||
if calendar and result.get("calendar_url"):
|
||||
calendar.caldav_base_url = result.get("calendar_url")
|
||||
|
||||
if delete:
|
||||
tombstone = db.query(CalendarDeletedEvent).filter(
|
||||
CalendarDeletedEvent.uid == uid,
|
||||
CalendarDeletedEvent.owner == owner,
|
||||
).first()
|
||||
if result.get("ok"):
|
||||
if tombstone:
|
||||
db.delete(tombstone)
|
||||
elif tombstone:
|
||||
tombstone.last_error = str(result.get("error") or result)[:500]
|
||||
db.commit()
|
||||
return
|
||||
|
||||
event = (
|
||||
db.query(CalendarEvent)
|
||||
.join(CalendarCal)
|
||||
.filter(CalendarEvent.uid == uid, CalendarCal.owner == owner)
|
||||
.first()
|
||||
)
|
||||
if event and result.get("ok"):
|
||||
if result.get("remote_href"):
|
||||
event.remote_href = result.get("remote_href")
|
||||
if result.get("remote_etag"):
|
||||
event.remote_etag = result.get("remote_etag")
|
||||
event.caldav_sync_pending = None
|
||||
db.commit()
|
||||
except Exception:
|
||||
db.rollback()
|
||||
logger.exception("CalDAV write-back metadata persistence failed")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
async def writeback_event(owner: str, calendar_source: str, calendar_id: str,
|
||||
ev: dict, *, delete: bool = False) -> dict:
|
||||
"""Best-effort push of a local change to the remote CalDAV server.
|
||||
@@ -204,9 +287,12 @@ async def writeback_event(owner: str, calendar_source: str, calendar_id: str,
|
||||
result = await asyncio.to_thread(
|
||||
_writeback_blocking, calendar_id, ev, delete, url, user, pw, owner, acc_id
|
||||
)
|
||||
_persist_writeback_result(owner, calendar_id, (ev or {}).get("uid", ""), result, delete=delete)
|
||||
if not result.get("ok"):
|
||||
logger.warning("CalDAV write-back did not apply: %s", result.get("error") or result)
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.exception("CalDAV write-back raised")
|
||||
return {"ok": False, "error": str(e)[:200]}
|
||||
result = {"ok": False, "error": str(e)[:200]}
|
||||
_persist_writeback_result(owner, calendar_id, (ev or {}).get("uid", ""), result, delete=delete)
|
||||
return result
|
||||
|
||||
+13
-9
@@ -175,6 +175,19 @@ class ChatProcessor:
|
||||
|
||||
Returns:
|
||||
Tuple of (preface messages, rag_sources list)
|
||||
|
||||
Note on KV-cache friendliness: the ``system``-role messages assembled
|
||||
here are later concatenated into a single system message and sent as
|
||||
the very first thing in the payload (see ``llm_core``'s "consolidate
|
||||
system messages" step). Local OpenAI-compatible backends (llama.cpp /
|
||||
LM Studio) key their KV cache off the byte-identical token prefix, so
|
||||
*anything* that changes turn-to-turn — timestamps, retrieved snippets,
|
||||
per-turn counts — must NOT be folded into a system message here. Such
|
||||
content belongs in a separate ``user``/context message appended near
|
||||
the end of the array (see ``current_datetime_context_message`` and
|
||||
``untrusted_context_message`` callers in ``build_chat_context``),
|
||||
which keeps the static system prefix byte-identical across turns of
|
||||
the same session and lets the backend reuse its cached prefix.
|
||||
"""
|
||||
preface = []
|
||||
rag_sources = []
|
||||
@@ -185,15 +198,6 @@ class ChatProcessor:
|
||||
"role": "system",
|
||||
"content": preset_system_prompt
|
||||
})
|
||||
if not agent_mode:
|
||||
try:
|
||||
from src.user_time import current_datetime_prompt
|
||||
preface.append({
|
||||
"role": "system",
|
||||
"content": current_datetime_prompt(),
|
||||
})
|
||||
except Exception:
|
||||
logger.debug("Failed to add current date/time context", exc_info=True)
|
||||
preface.append({
|
||||
"role": "system",
|
||||
"content": UNTRUSTED_CONTEXT_POLICY,
|
||||
|
||||
+27
-7
@@ -31,16 +31,22 @@ def compute_input_token_budget(
|
||||
|
||||
Args:
|
||||
configured: the value read from settings (may be the default).
|
||||
context_length: the model's discovered context window (0/unknown if none).
|
||||
explicit: True if the user explicitly set ``agent_input_token_budget``.
|
||||
context_length: the model's discovered context window. Pass 0 when the
|
||||
window is unknown / only a bare fallback — auto-scaling then stays
|
||||
conservative instead of trusting an unproven window (review on #4122).
|
||||
explicit: True if the user set a NON-default budget. The default value is
|
||||
the "auto" sentinel (scale to the window); any other value is an
|
||||
explicit cap. (A deliberately-chosen default can't be distinguished
|
||||
from a materialized default by value, so the default reads as auto.)
|
||||
|
||||
Rules:
|
||||
- Explicit user budget is honoured exactly, only clamped to the model's
|
||||
window when that window is known (never send more than the model holds).
|
||||
- Otherwise (default), scale to ``headroom`` of the context window, capped
|
||||
at ``hard_max`` — so long-context models use their capacity.
|
||||
- When the window is unknown, fall back to the configured/default value
|
||||
(preserving the previous behaviour).
|
||||
window when that window is known (the user's deliberate choice wins;
|
||||
``hard_max`` is an auto-budget ceiling only — see #1230).
|
||||
- Otherwise (auto), scale to ``headroom`` of the context window, capped at
|
||||
``hard_max`` — so long-context models use their capacity.
|
||||
- When the window is unknown (context_length <= 0), use the conservative
|
||||
``default`` budget and do NOT scale off the fallback.
|
||||
"""
|
||||
configured = int(configured or 0)
|
||||
context_length = int(context_length or 0)
|
||||
@@ -53,3 +59,17 @@ def compute_input_token_budget(
|
||||
return max(1, min(scaled, hard_max))
|
||||
|
||||
return configured if configured > 0 else default
|
||||
|
||||
|
||||
def budget_is_explicit(configured: int, *, default: int = DEFAULT_BUDGET) -> bool:
|
||||
"""Whether a configured agent_input_token_budget is a deliberate explicit cap.
|
||||
|
||||
The default value is the "auto" sentinel (scale to the model's window), so only
|
||||
a NON-default positive value counts as explicit. This keys off the VALUE, not
|
||||
settings *presence* — the settings-save path materializes every default into
|
||||
settings.json, so a persisted default must still read as auto (the regression
|
||||
#4121 / #1230 are about). Centralised here so the materialized-default contract
|
||||
is unit-testable and can't silently regress to a presence check.
|
||||
"""
|
||||
configured = int(configured or 0)
|
||||
return configured > 0 and configured != default
|
||||
|
||||
@@ -244,9 +244,17 @@ def trim_for_context(messages: List[Dict], context_length: int, reserve_tokens:
|
||||
protected_tokens = estimate_tokens(protected_msgs)
|
||||
budget -= protected_tokens
|
||||
|
||||
# Priority: keep first system msg (preset prompt), drop others (memory, RAG, memo)
|
||||
essential_system = system_msgs[:1] if system_msgs else []
|
||||
extra_system = system_msgs[1:]
|
||||
# Priority: keep first system msg (preset prompt), drop others (memory, RAG, memo).
|
||||
# Exception: a research-spinoff primer (the seeded report that grounds a
|
||||
# "Discuss" chat) must never be dropped — it is the conversation's whole
|
||||
# knowledge base. Treat any system message carrying research_spinoff_from
|
||||
# metadata as essential alongside the leading system prompt.
|
||||
def _is_research_primer(m):
|
||||
return bool((m.get("metadata") or {}).get("research_spinoff_from"))
|
||||
_primers = [m for m in system_msgs if _is_research_primer(m)]
|
||||
_non_primer = [m for m in system_msgs if not _is_research_primer(m)]
|
||||
essential_system = (_non_primer[:1] if _non_primer else []) + _primers
|
||||
extra_system = _non_primer[1:]
|
||||
|
||||
# Try dropping extra system messages one by one (from the end)
|
||||
trimmed = essential_system + convo_msgs
|
||||
@@ -438,8 +446,8 @@ def _update_session_history(session, split_point: int, summary: str,
|
||||
)
|
||||
new_history = system_prefix + [summary_msg] + recent_history
|
||||
try:
|
||||
from core import models as _core_models
|
||||
manager = getattr(_core_models, "_session_manager", None)
|
||||
from core.models import get_session_manager_instance
|
||||
manager = get_session_manager_instance()
|
||||
except Exception:
|
||||
manager = None
|
||||
if manager and getattr(session, "id", None):
|
||||
|
||||
@@ -136,7 +136,8 @@ async def _tick() -> None:
|
||||
return
|
||||
try:
|
||||
state = json.loads(state_path.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
logger.warning("cookbook_serve_lifecycle: state file unreadable (%s), skipping tick", e)
|
||||
return
|
||||
tasks = state.get("tasks") or []
|
||||
now_ms = int(time.time() * 1000)
|
||||
@@ -178,8 +179,26 @@ async def _tick() -> None:
|
||||
if stopped_any:
|
||||
try:
|
||||
from core.atomic_io import atomic_write_json
|
||||
state["tasks"] = tasks
|
||||
atomic_write_json(state_path, state)
|
||||
# Re-read the state file so concurrent UI writes (task adds,
|
||||
# status flips, config edits) are not silently overwritten.
|
||||
# Apply only our stop mutations to the fresh snapshot.
|
||||
try:
|
||||
fresh = json.loads(state_path.read_text(encoding="utf-8"))
|
||||
fresh_tasks = fresh.get("tasks") or []
|
||||
except Exception:
|
||||
fresh = state
|
||||
fresh_tasks = tasks
|
||||
stopped_sids = {sid for sid, _, _ in to_stop}
|
||||
for ft in fresh_tasks:
|
||||
if not isinstance(ft, dict):
|
||||
continue
|
||||
ft_sid = ft.get("sessionId") or ft.get("id")
|
||||
if ft_sid in stopped_sids:
|
||||
ft["status"] = "stopped"
|
||||
ft["_scheduledStopAtMs"] = None
|
||||
ft["_lastStatusFlipAt"] = now_ms
|
||||
fresh["tasks"] = fresh_tasks
|
||||
atomic_write_json(state_path, fresh)
|
||||
except Exception as e:
|
||||
logger.warning(f"cookbook_serve_lifecycle: state write failed: {e}")
|
||||
|
||||
|
||||
@@ -232,6 +232,7 @@ class DeepResearcher:
|
||||
self._start_time: float = 0
|
||||
self.queries_used: Set[str] = set()
|
||||
self.urls_fetched: Set[str] = set()
|
||||
self.analyzed_urls: List[Dict[str, str]] = []
|
||||
self.round_count: int = 0
|
||||
# Track which search providers actually returned results during the
|
||||
# run, in arrival order — surfaced in the visual report so users can
|
||||
@@ -525,6 +526,10 @@ class DeepResearcher:
|
||||
if url and url not in self.urls_fetched:
|
||||
urls_to_fetch.append(r)
|
||||
self.urls_fetched.add(url)
|
||||
self.analyzed_urls.append({
|
||||
"url": url,
|
||||
"title": r.get("title", "") or url,
|
||||
})
|
||||
if len(urls_to_fetch) >= self.max_urls_per_round * len(queries):
|
||||
break
|
||||
|
||||
|
||||
+11
-2
@@ -196,13 +196,22 @@ def _get_or_reset_collection(chroma_client, name: str, metadata: Dict[str, Any],
|
||||
try:
|
||||
chroma_client.delete_collection(name)
|
||||
restored = chroma_client.get_or_create_collection(name=name, metadata=current)
|
||||
old_embeddings = preserved.get("embeddings") or []
|
||||
if ids and docs and old_embeddings:
|
||||
# chromadb returns embeddings as a numpy ndarray, whose truth value
|
||||
# is ambiguous — `preserved.get("embeddings") or []` and a bare
|
||||
# `if ... and old_embeddings:` both raise ValueError, which aborts
|
||||
# the restore and loses the rows the reset was supposed to keep.
|
||||
# Use explicit None/len checks instead.
|
||||
old_embeddings = preserved.get("embeddings")
|
||||
if old_embeddings is None:
|
||||
old_embeddings = []
|
||||
if ids and docs and len(old_embeddings):
|
||||
for start in range(0, len(ids), 100):
|
||||
batch_ids = ids[start:start + 100]
|
||||
batch_docs = docs[start:start + 100]
|
||||
batch_metas = metas[start:start + 100]
|
||||
batch_embeddings = old_embeddings[start:start + 100]
|
||||
if hasattr(batch_embeddings, "tolist"):
|
||||
batch_embeddings = batch_embeddings.tolist()
|
||||
if len(batch_metas) < len(batch_ids):
|
||||
batch_metas += [{}] * (len(batch_ids) - len(batch_metas))
|
||||
restored.add(
|
||||
|
||||
+27
-14
@@ -12,7 +12,7 @@ from typing import Optional, Tuple, Dict
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
|
||||
from core.database import SessionLocal, ModelEndpoint
|
||||
from src.llm_core import _detect_provider, _host_match, _ollama_api_root
|
||||
from src.llm_core import _detect_provider, _host_match, _is_kimi_code_url, KIMI_CODE_USER_AGENT, _ollama_api_root
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -183,7 +183,16 @@ def build_chat_url(base: str) -> str:
|
||||
|
||||
|
||||
def build_models_url(base: str) -> Optional[str]:
|
||||
"""Return the provider-specific model-list endpoint URL for a base."""
|
||||
"""Return the provider-specific model-list endpoint URL for a base.
|
||||
|
||||
For OpenAI-compatible servers (LM Studio, llama.cpp, vLLM,
|
||||
text-generation-webui, etc.) the model list is exposed at ``/v1/models``.
|
||||
When the user-supplied base has no path — e.g. ``http://localhost:1234`` —
|
||||
we still need to land on ``/v1/models`` (issue #25); insert the ``/v1``
|
||||
segment only when the path is empty, leaving any explicit non-empty path
|
||||
untouched (so custom prefixes like ``/openai`` or ``/api/openai/v1`` keep
|
||||
their semantics).
|
||||
"""
|
||||
base = normalize_base(resolve_url(base))
|
||||
provider = _detect_provider(base)
|
||||
if provider == "anthropic":
|
||||
@@ -192,6 +201,12 @@ def build_models_url(base: str) -> Optional[str]:
|
||||
return _ollama_api_root(base) + "/tags"
|
||||
if provider == "chatgpt-subscription":
|
||||
return None
|
||||
# Generic OpenAI-compatible fallback: ensure the path lands on /v1/models
|
||||
# when the user omitted a path entirely. If a non-empty path is already
|
||||
# present (e.g. /openai, /api/openai/v1, /v1), trust the caller — the
|
||||
# /models suffix is appended as-is and the caller's prefix is preserved.
|
||||
if not urlparse(base).path:
|
||||
base = base + "/v1"
|
||||
return base + "/models"
|
||||
|
||||
|
||||
@@ -215,6 +230,8 @@ def build_headers(api_key: Optional[str], base: str) -> Dict[str, str]:
|
||||
if provider == "openrouter":
|
||||
headers.setdefault("HTTP-Referer", "https://github.com/pewdiepie-archdaemon/odysseus")
|
||||
headers.setdefault("X-OpenRouter-Title", "Odysseus")
|
||||
if _is_kimi_code_url(base):
|
||||
headers.setdefault("User-Agent", KIMI_CODE_USER_AGENT)
|
||||
return headers
|
||||
|
||||
|
||||
@@ -250,27 +267,23 @@ def resolve_endpoint(
|
||||
ep_id = _stg(f"{setting_prefix}_endpoint_id")
|
||||
model = _stg(f"{setting_prefix}_model")
|
||||
|
||||
# If the specific endpoint is not configured, but the caller provided a
|
||||
# Fall back to utility model for task/research/auto-naming if not specifically configured.
|
||||
if not ep_id and setting_prefix not in ("utility", "default"):
|
||||
ep_id = _stg("utility_endpoint_id")
|
||||
model = _stg("utility_model")
|
||||
|
||||
# If the endpoint is STILL not configured, but the caller provided a
|
||||
# valid fallback (e.g. the active session model), use that immediately.
|
||||
# This prevents background tasks from jumping to the global default_model
|
||||
# when the user is mid-conversation with a different model.
|
||||
if not ep_id and fallback_url and fallback_model:
|
||||
return fallback_url, fallback_model, fallback_headers
|
||||
|
||||
# Unset Utility means "same as Default Chat Model".
|
||||
if setting_prefix == "utility" and not ep_id:
|
||||
# Unset Utility (or anything else that didn't have a fallback) means "same as Default Chat Model".
|
||||
if not ep_id:
|
||||
ep_id = _stg("default_endpoint_id")
|
||||
model = _stg("default_model")
|
||||
|
||||
# Fall back to utility model for task/research/auto-naming if not specifically configured.
|
||||
# If Utility itself is unset, the block above makes that resolve to Default Chat.
|
||||
if not ep_id and setting_prefix != "utility":
|
||||
ep_id = _stg("utility_endpoint_id")
|
||||
model = _stg("utility_model")
|
||||
if not ep_id:
|
||||
ep_id = _stg("default_endpoint_id")
|
||||
model = _stg("default_model")
|
||||
|
||||
if not ep_id:
|
||||
return fallback_url, fallback_model, fallback_headers
|
||||
|
||||
|
||||
+79
-5
@@ -6,6 +6,7 @@ import re
|
||||
from typing import Dict, List, Optional, Any
|
||||
|
||||
import httpx
|
||||
from fastapi import HTTPException
|
||||
|
||||
from core.atomic_io import atomic_write_json
|
||||
from core.platform_compat import safe_chmod
|
||||
@@ -258,6 +259,11 @@ def add_integration(data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
integration.setdefault("name", "")
|
||||
integration.setdefault("base_url", "")
|
||||
|
||||
if not isinstance(integration.get("name"), str) or not integration["name"].strip():
|
||||
raise HTTPException(400, "Integration name is required")
|
||||
if not isinstance(integration.get("base_url"), str) or not integration["base_url"].strip():
|
||||
raise HTTPException(400, "Integration base URL is required")
|
||||
|
||||
integrations = load_integrations()
|
||||
integrations.append(integration)
|
||||
save_integrations(integrations)
|
||||
@@ -266,6 +272,11 @@ def add_integration(data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
|
||||
def update_integration(integration_id: str, data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||||
"""Update fields on an existing integration. Returns updated integration or None."""
|
||||
if "name" in data and (not isinstance(data["name"], str) or not data["name"].strip()):
|
||||
raise HTTPException(400, "Integration name is required")
|
||||
if "base_url" in data and (not isinstance(data["base_url"], str) or not data["base_url"].strip()):
|
||||
raise HTTPException(400, "Integration base URL is required")
|
||||
|
||||
integrations = load_integrations()
|
||||
for item in integrations:
|
||||
if item.get("id") == integration_id:
|
||||
@@ -411,17 +422,80 @@ async def execute_api_call(
|
||||
if "application/json" in content_type:
|
||||
try:
|
||||
data = response.json()
|
||||
formatted = json.dumps(data, indent=2, ensure_ascii=False)
|
||||
full = json.dumps(data, indent=2, ensure_ascii=False)
|
||||
if len(full) > 12000:
|
||||
if isinstance(data, list):
|
||||
# Binary-search for the largest prefix such that the
|
||||
# final array (prefix + sentinel) fits within the limit.
|
||||
# Pre-compute the sentinel so we know its serialized size.
|
||||
sentinel_placeholder = {
|
||||
"_truncated": True,
|
||||
"total_items": len(data),
|
||||
"shown_items": 0,
|
||||
}
|
||||
# Overhead: the sentinel appears as an extra array element.
|
||||
# Add a conservative padding for the separating comma,
|
||||
# newline, and indentation characters (~6 chars).
|
||||
sentinel_overhead = len(
|
||||
json.dumps(sentinel_placeholder, indent=2, ensure_ascii=False)
|
||||
) + 6
|
||||
budget = 12000 - sentinel_overhead
|
||||
lo, hi = 0, len(data)
|
||||
while lo < hi:
|
||||
mid = (lo + hi + 1) // 2
|
||||
candidate = json.dumps(
|
||||
data[:mid], indent=2, ensure_ascii=False
|
||||
)
|
||||
if len(candidate) < budget:
|
||||
lo = mid
|
||||
else:
|
||||
hi = mid - 1
|
||||
sentinel = {
|
||||
"_truncated": True,
|
||||
"total_items": len(data),
|
||||
"shown_items": lo,
|
||||
}
|
||||
formatted = json.dumps(
|
||||
data[:lo] + [sentinel], indent=2, ensure_ascii=False
|
||||
)
|
||||
elif isinstance(data, dict):
|
||||
# Truncate dict entries until the result fits, then add
|
||||
# the _truncated marker. Walk keys in insertion order.
|
||||
DICT_LIMIT = 12000
|
||||
kept: dict = {}
|
||||
for k, v in data.items():
|
||||
candidate = json.dumps(
|
||||
{**kept, k: v, "_truncated": True},
|
||||
indent=2,
|
||||
ensure_ascii=False,
|
||||
)
|
||||
if len(candidate) <= DICT_LIMIT:
|
||||
kept[k] = v
|
||||
else:
|
||||
break
|
||||
formatted = json.dumps(
|
||||
{**kept, "_truncated": True}, indent=2, ensure_ascii=False
|
||||
)
|
||||
else:
|
||||
total = len(full)
|
||||
formatted = full[:12000] + f"\n... (truncated, {total} chars total)"
|
||||
else:
|
||||
formatted = full
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
formatted = response.text
|
||||
if len(formatted) > 12000:
|
||||
total = len(formatted)
|
||||
formatted = formatted[:12000] + f"\n... (truncated, {total} chars total)"
|
||||
elif "text/html" in content_type:
|
||||
formatted = _strip_html_tags(response.text)
|
||||
if len(formatted) > 12000:
|
||||
total = len(formatted)
|
||||
formatted = formatted[:12000] + f"\n... (truncated, {total} chars total)"
|
||||
else:
|
||||
formatted = response.text
|
||||
|
||||
# Truncate
|
||||
if len(formatted) > 12000:
|
||||
formatted = formatted[:12000] + "\n... (truncated)"
|
||||
if len(formatted) > 12000:
|
||||
total = len(formatted)
|
||||
formatted = formatted[:12000] + f"\n... (truncated, {total} chars total)"
|
||||
|
||||
output = f"HTTP {status}\n{formatted}"
|
||||
|
||||
|
||||
+314
-17
@@ -7,6 +7,7 @@ import logging
|
||||
import hashlib
|
||||
import threading
|
||||
import re
|
||||
import os
|
||||
from fastapi import HTTPException
|
||||
from typing import Optional, Dict, List, Tuple
|
||||
from src.model_context import get_context_length, DEFAULT_CONTEXT
|
||||
@@ -22,6 +23,24 @@ class LLMConfig:
|
||||
MAX_RETRIES = 3
|
||||
RETRY_DELAY = 0.5
|
||||
STREAM_TIMEOUT = 300
|
||||
# TCP+TLS connect budget for a SINGLE attempt. The old hard-coded 3.0s
|
||||
# assumed LAN/Tailscale peers ('SYN in <100ms'); it is too tight for public
|
||||
# cloud endpoints (offshore APIs take ~0.5-1.5s cold, with jitter), so a
|
||||
# brief blip on the first connect of an idle chat surfaced as a 503 on the
|
||||
# streaming path (which, unlike llm_call, does not retry the connect). A
|
||||
# genuinely dead upstream stays bounded by the dead-host cooldown. Override
|
||||
# with env LLM_CONNECT_TIMEOUT (seconds).
|
||||
CONNECT_TIMEOUT = float(os.getenv('LLM_CONNECT_TIMEOUT', '10') or '10')
|
||||
|
||||
|
||||
def _call_timeout(read_timeout) -> httpx.Timeout:
|
||||
"""Per-request timeout for non-streaming LLM calls (connect from config)."""
|
||||
return httpx.Timeout(connect=LLMConfig.CONNECT_TIMEOUT, read=float(read_timeout), write=10.0, pool=5.0)
|
||||
|
||||
|
||||
def _stream_timeout(read_timeout) -> httpx.Timeout:
|
||||
"""Per-request timeout for streaming LLM calls (connect from config)."""
|
||||
return httpx.Timeout(connect=LLMConfig.CONNECT_TIMEOUT, read=float(read_timeout), write=30.0, pool=5.0)
|
||||
|
||||
|
||||
# Cache for LLM responses
|
||||
@@ -276,6 +295,24 @@ def _is_ollama_native_url(url: str) -> bool:
|
||||
return local_ollama_host and (path == "" or path == "/api" or path.startswith("/api/"))
|
||||
|
||||
|
||||
def _is_ollama_openai_compat_url(url: str) -> bool:
|
||||
"""Return True for local Ollama's OpenAI-compatible /v1 surface.
|
||||
|
||||
Mirrors the host detection used by ``_is_ollama_native_url`` so that the
|
||||
two helpers stay in lockstep: a localhost Ollama on a non-default port
|
||||
(custom ``OLLAMA_HOST``, reverse proxy, container port remap) is treated
|
||||
the same way here as it is on the native ``/api`` path.
|
||||
"""
|
||||
try:
|
||||
parsed = urlparse(url or "")
|
||||
except Exception:
|
||||
return False
|
||||
host = parsed.hostname or ""
|
||||
path = (parsed.path or "").rstrip("/")
|
||||
local_ollama_host = host in {"localhost", "127.0.0.1", "0.0.0.0", "::1"} or parsed.port == 11434
|
||||
return local_ollama_host and (path == "/v1" or path.startswith("/v1/"))
|
||||
|
||||
|
||||
def _ollama_api_root(url: str) -> str:
|
||||
"""Return a native Ollama API root such as https://ollama.com/api."""
|
||||
url = (url or "").strip().rstrip("/")
|
||||
@@ -405,6 +442,146 @@ def _host_match(url: str, *domains: str) -> bool:
|
||||
return any(host == d or host.endswith("." + d) for d in domains)
|
||||
|
||||
|
||||
# Kimi Code subscription keys (api.kimi.com/coding/v1) require a whitelisted
|
||||
# coding-agent User-Agent; otherwise the API returns 403 access_terminated_error.
|
||||
# Tried in order; first success is cached per base URL for later requests.
|
||||
KIMI_CODE_USER_AGENTS: tuple[str, ...] = (
|
||||
"claude-code/0.1.0",
|
||||
"claude-code/1.0.0",
|
||||
"KimiCLI/1.0",
|
||||
"Kilo-Code/1.0",
|
||||
"Roo-Code/1.0",
|
||||
"Cursor/1.0",
|
||||
)
|
||||
KIMI_CODE_USER_AGENT = KIMI_CODE_USER_AGENTS[0]
|
||||
_kimi_code_ua_cache: dict[str, str] = {}
|
||||
|
||||
|
||||
def _is_kimi_code_url(url: str) -> bool:
|
||||
if not url or not _host_match(url, "kimi.com"):
|
||||
return False
|
||||
try:
|
||||
return "/coding" in (urlparse(url).path or "")
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _kimi_code_base_key(url: str) -> str:
|
||||
"""Normalize a Kimi Code chat/models URL to its OpenAI base (.../coding/v1)."""
|
||||
parsed = urlparse(url)
|
||||
path = (parsed.path or "").rstrip("/")
|
||||
for suffix in ("/chat/completions", "/models", "/completions"):
|
||||
if path.endswith(suffix):
|
||||
path = path[: -len(suffix)]
|
||||
path = path.rstrip("/") or "/coding/v1"
|
||||
return f"{parsed.scheme}://{parsed.netloc}{path}"
|
||||
|
||||
|
||||
def _is_kimi_code_access_denied(status: int, body: bytes | str) -> bool:
|
||||
if status != 403:
|
||||
return False
|
||||
text = body.decode("utf-8", errors="replace") if isinstance(body, bytes) else (body or "")
|
||||
lower = text.lower()
|
||||
return (
|
||||
"access_terminated_error" in lower
|
||||
or "coding agents" in lower
|
||||
or "only available for coding" in lower
|
||||
)
|
||||
|
||||
|
||||
def _kimi_code_ua_candidates(url: str) -> list[str]:
|
||||
if not _is_kimi_code_url(url):
|
||||
return []
|
||||
base_key = _kimi_code_base_key(url)
|
||||
cached = _kimi_code_ua_cache.get(base_key)
|
||||
if cached:
|
||||
return [cached] + [ua for ua in KIMI_CODE_USER_AGENTS if ua != cached]
|
||||
return list(KIMI_CODE_USER_AGENTS)
|
||||
|
||||
|
||||
def _remember_kimi_code_user_agent(url: str, user_agent: str) -> None:
|
||||
_kimi_code_ua_cache[_kimi_code_base_key(url)] = user_agent
|
||||
|
||||
|
||||
def apply_kimi_code_headers(headers: Optional[Dict], url: str) -> Dict[str, str]:
|
||||
"""Pick a Kimi Code User-Agent (cached probe when possible)."""
|
||||
h = dict(headers or {})
|
||||
if not _is_kimi_code_url(url):
|
||||
return h
|
||||
base_key = _kimi_code_base_key(url)
|
||||
cached = _kimi_code_ua_cache.get(base_key)
|
||||
if cached:
|
||||
h["User-Agent"] = cached
|
||||
return h
|
||||
models_url = base_key.rstrip("/") + "/models"
|
||||
from src.tls_overrides import llm_verify
|
||||
for ua in KIMI_CODE_USER_AGENTS:
|
||||
trial = dict(h)
|
||||
trial["User-Agent"] = ua
|
||||
try:
|
||||
r = httpx.get(models_url, headers=trial, timeout=8, verify=llm_verify())
|
||||
except Exception:
|
||||
continue
|
||||
if _is_kimi_code_access_denied(r.status_code, r.content):
|
||||
logger.debug("Kimi Code rejected User-Agent %s (403), trying next", ua)
|
||||
continue
|
||||
if r.status_code < 400:
|
||||
_remember_kimi_code_user_agent(url, ua)
|
||||
h["User-Agent"] = ua
|
||||
return h
|
||||
break
|
||||
h.setdefault("User-Agent", KIMI_CODE_USER_AGENT)
|
||||
return h
|
||||
|
||||
|
||||
def httpx_get_kimi_aware(url: str, headers: Optional[Dict], **kwargs):
|
||||
h = apply_kimi_code_headers(headers, url)
|
||||
if not _is_kimi_code_url(url):
|
||||
return httpx.get(url, headers=h, **kwargs)
|
||||
last = None
|
||||
for ua in _kimi_code_ua_candidates(url):
|
||||
trial = dict(h)
|
||||
trial["User-Agent"] = ua
|
||||
last = httpx.get(url, headers=trial, **kwargs)
|
||||
if not _is_kimi_code_access_denied(last.status_code, last.content):
|
||||
if last.status_code < 400:
|
||||
_remember_kimi_code_user_agent(url, ua)
|
||||
return last
|
||||
return last
|
||||
|
||||
|
||||
def httpx_post_kimi_aware(url: str, headers: Optional[Dict], **kwargs):
|
||||
h = apply_kimi_code_headers(headers, url)
|
||||
if not _is_kimi_code_url(url):
|
||||
return httpx.post(url, headers=h, **kwargs)
|
||||
last = None
|
||||
for ua in _kimi_code_ua_candidates(url):
|
||||
trial = dict(h)
|
||||
trial["User-Agent"] = ua
|
||||
last = httpx.post(url, headers=trial, **kwargs)
|
||||
if not _is_kimi_code_access_denied(last.status_code, last.content):
|
||||
if last.status_code < 400:
|
||||
_remember_kimi_code_user_agent(url, ua)
|
||||
return last
|
||||
return last
|
||||
|
||||
|
||||
async def httpx_post_kimi_aware_async(client, url: str, headers: Optional[Dict], **kwargs):
|
||||
h = apply_kimi_code_headers(headers, url)
|
||||
if not _is_kimi_code_url(url):
|
||||
return await client.post(url, headers=h, **kwargs)
|
||||
last = None
|
||||
for ua in _kimi_code_ua_candidates(url):
|
||||
trial = dict(h)
|
||||
trial["User-Agent"] = ua
|
||||
last = await client.post(url, headers=trial, **kwargs)
|
||||
if not _is_kimi_code_access_denied(last.status_code, last.content):
|
||||
if last.status_code < 400:
|
||||
_remember_kimi_code_user_agent(url, ua)
|
||||
return last
|
||||
return last
|
||||
|
||||
|
||||
def _detect_provider(url: str) -> str:
|
||||
"""Detect the API provider from a configured endpoint URL.
|
||||
|
||||
@@ -426,6 +603,10 @@ def _detect_provider(url: str) -> str:
|
||||
return "openrouter"
|
||||
if _host_match(url, "groq.com"):
|
||||
return "groq"
|
||||
if _host_match(url, "nvidia.com"):
|
||||
return "nvidia"
|
||||
if _host_match(url, "moonshot.ai") or _host_match(url, "moonshot.cn"):
|
||||
return "moonshot"
|
||||
from src.chatgpt_subscription import is_chatgpt_subscription_base
|
||||
if is_chatgpt_subscription_base(url):
|
||||
return "chatgpt-subscription"
|
||||
@@ -435,6 +616,53 @@ def _detect_provider(url: str) -> str:
|
||||
return "openai"
|
||||
|
||||
|
||||
def _is_self_hosted_openai_compatible(url: str) -> bool:
|
||||
"""True for custom/local OpenAI-compatible servers (llama.cpp, LM Studio,
|
||||
vLLM, text-generation-webui, etc.) as opposed to cloud APIs.
|
||||
|
||||
Used to gate llama.cpp-server-specific payload extras (``session_id``,
|
||||
``cache_prompt``) used for KV-cache slot affinity (issue #2927). Strict
|
||||
cloud providers reject unrecognized top-level fields (api.openai.com
|
||||
returns 400, Mistral returns 422 "extra_forbidden", issue #3793), and any
|
||||
unknown OpenAI-compatible host used to be treated as self-hosted, so those
|
||||
fields leaked to every strict provider added as a custom endpoint.
|
||||
|
||||
A server only counts as self-hosted when it also resolves as local:
|
||||
loopback/private/tailscale host, or the endpoint explicitly configured
|
||||
with kind "local". A self-hosted server exposed via a public hostname
|
||||
loses the affinity hint unless its endpoint kind is set to "local" -
|
||||
a lost perf hint, versus a hard 4xx on every request the other way.
|
||||
"""
|
||||
if _detect_provider(url) != "openai" or _host_match(url, "openai.com"):
|
||||
return False
|
||||
from src.model_context import is_local_endpoint
|
||||
return is_local_endpoint(url)
|
||||
|
||||
|
||||
def _apply_local_cache_affinity(payload: Dict, url: str, session_id: Optional[str]) -> None:
|
||||
"""Add llama.cpp-server slot-affinity hints to an outgoing payload, in place.
|
||||
|
||||
As diagnosed in issue #2927, llama.cpp assigns requests to processing
|
||||
slots via LRU when no stable identifier is present ("session_id=<empty>
|
||||
server-selected (LCP/LRU)"), which means consecutive turns of the same
|
||||
chat can land on different slots and lose their cached prefix entirely.
|
||||
Sending a stable ``session_id`` (derived from the Odysseus session) lets
|
||||
the server keep routing the same conversation to the same slot, and
|
||||
``cache_prompt: true`` asks it to retain/reuse the prefix it already has.
|
||||
|
||||
Both fields are llama.cpp / LM Studio extensions to the OpenAI schema; we
|
||||
only set them for self-hosted OpenAI-compatible endpoints (never
|
||||
api.openai.com or other cloud providers, which reject unrecognized
|
||||
top-level request fields).
|
||||
"""
|
||||
if not session_id:
|
||||
return
|
||||
if not _is_self_hosted_openai_compatible(url):
|
||||
return
|
||||
payload.setdefault("session_id", str(session_id))
|
||||
payload.setdefault("cache_prompt", True)
|
||||
|
||||
|
||||
def _provider_headers(provider: str, headers: Optional[Dict] = None) -> Dict[str, str]:
|
||||
h = {"Content-Type": "application/json"}
|
||||
if isinstance(headers, dict):
|
||||
@@ -471,9 +699,16 @@ def _provider_label(url: str) -> str:
|
||||
if is_copilot_base(url): return "GitHub Copilot"
|
||||
if _host_match(url, "mistral.ai"): return "Mistral"
|
||||
if _host_match(url, "deepseek.com"): return "DeepSeek"
|
||||
if _host_match(url, "nvidia.com"): return "NVIDIA"
|
||||
if _host_match(url, "googleapis.com"): return "Google"
|
||||
if _host_match(url, "together.xyz", "together.ai"): return "Together"
|
||||
if _host_match(url, "fireworks.ai"): return "Fireworks"
|
||||
if _host_match(url, "kimi.com"):
|
||||
try:
|
||||
if "/coding" in (urlparse(url).path or ""):
|
||||
return "Kimi Code"
|
||||
except Exception:
|
||||
pass
|
||||
if _is_ollama_native_url(url): return "Ollama"
|
||||
try:
|
||||
host = (urlparse(url).hostname or "").lower()
|
||||
@@ -542,8 +777,9 @@ def _build_chatgpt_responses_payload(
|
||||
}
|
||||
if not _restricts_temperature(model):
|
||||
payload["temperature"] = temperature
|
||||
if max_tokens and max_tokens > 0:
|
||||
payload["max_output_tokens"] = max_tokens
|
||||
# ChatGPT Subscription Codex API does not support max_output_tokens —
|
||||
# passing it returns HTTP 400 "Unsupported parameter: max_output_tokens".
|
||||
# Do not include it in the payload.
|
||||
return payload
|
||||
|
||||
|
||||
@@ -613,7 +849,7 @@ def _uses_max_completion_tokens(model: str) -> bool:
|
||||
# perfectly good model as failing. For these models we omit the field and let
|
||||
# the API use its required default. (gpt-4.5 is intentionally excluded — it is
|
||||
# not a reasoning model and accepts temperature normally.)
|
||||
_FIXED_TEMPERATURE_MODELS = ("o1", "o3", "o4", "gpt-5")
|
||||
_FIXED_TEMPERATURE_MODELS = ("o1", "o3", "o4", "gpt-5", "kimi-for-coding")
|
||||
|
||||
def _restricts_temperature(model: str) -> bool:
|
||||
"""Check if a model rejects any non-default temperature."""
|
||||
@@ -622,6 +858,49 @@ def _restricts_temperature(model: str) -> bool:
|
||||
m = model.lower()
|
||||
return any(m.startswith(p) or f"/{p}" in m for p in _FIXED_TEMPERATURE_MODELS)
|
||||
|
||||
|
||||
# The official Moonshot API fixes temperature at 1.0 in thinking mode and 0.6
|
||||
# when thinking is explicitly disabled for Kimi K2.5/K2.6. Any other explicit
|
||||
# value returns HTTP 400. Odysseus does not currently send the `thinking` mode
|
||||
# control, so omit temperature and let Moonshot use its default thinking mode.
|
||||
# Keep the gate provider-specific: self-hosted Kimi deployments may accept
|
||||
# custom sampling values, and older Moonshot models have different defaults.
|
||||
def _moonshot_rejects_custom_temperature(provider: str, model: str) -> bool:
|
||||
"""Check if the official Moonshot API fixes temperature for this model."""
|
||||
if provider != "moonshot" or not isinstance(model, str):
|
||||
return False
|
||||
model_id = model.lower().rsplit("/", 1)[-1]
|
||||
return bool(re.match(r"^kimi-k2\.(?:5|6)(?:$|[-_:])", model_id))
|
||||
|
||||
|
||||
def _omit_temperature(provider: str, model: str) -> bool:
|
||||
"""Check if a request should use the provider's default temperature."""
|
||||
return _restricts_temperature(model) or _moonshot_rejects_custom_temperature(
|
||||
provider, model
|
||||
)
|
||||
|
||||
|
||||
# Anthropic removed the sampling parameters (temperature, top_p, top_k) starting
|
||||
# with Claude Opus 4.7. On Opus 4.7 and later, sending `temperature` at all —
|
||||
# even 0.0 — returns HTTP 400. Earlier Claude models (Opus 4.6 and below, every
|
||||
# Sonnet/Haiku) still accept temperature in [0.0, 1.0], so the omission must be
|
||||
# version-gated rather than applied to all `claude-*` models.
|
||||
def _anthropic_rejects_temperature(model: str) -> bool:
|
||||
"""Check if a native-Anthropic model rejects the temperature field (Opus 4.7+)."""
|
||||
if not isinstance(model, str) or not model:
|
||||
return False
|
||||
# `(?<![a-z])` anchors "opus" to a word boundary so a substring match like
|
||||
# `oct-opus`/`octopus-4-8` can't be read as Opus (it would otherwise strip
|
||||
# temperature). Cap the minor at 1-2 digits and forbid a trailing digit so a
|
||||
# dated id like `claude-opus-4-20250514` (Opus 4.0) parses as major-only (no
|
||||
# minor match, kept) instead of reading the date `20250514` as a giant minor
|
||||
# that would falsely test >= 4.7. Dated 4.7+ snapshots (`claude-opus-4-7-
|
||||
# 20260201`) keep their explicit minor and are still matched.
|
||||
match = re.search(r"(?<![a-z])opus[-_]?(\d+)[-_.](\d{1,2})(?!\d)", model.lower())
|
||||
if not match:
|
||||
return False
|
||||
return (int(match.group(1)), int(match.group(2))) >= (4, 7)
|
||||
|
||||
# Models that support structured thinking — may output </think> without opening tag
|
||||
_THINKING_MODEL_PATTERNS = ("qwen3", "qwq", "deepseek-r1", "deepseek-reasoner", "minimax", "m2-reap", "gemma")
|
||||
|
||||
@@ -725,8 +1004,11 @@ def _build_anthropic_payload(model, messages, temperature, max_tokens, stream=Fa
|
||||
"model": model,
|
||||
"messages": chat_messages,
|
||||
"max_tokens": max_tokens if max_tokens and max_tokens > 0 else 4096,
|
||||
"temperature": temperature,
|
||||
}
|
||||
# Opus 4.7+ removed the sampling parameters — sending `temperature` (even 0.0)
|
||||
# returns HTTP 400. Omit it for those models; older Claude models still take it.
|
||||
if not _anthropic_rejects_temperature(model):
|
||||
payload["temperature"] = temperature
|
||||
if system_parts:
|
||||
system_text = "\n\n".join(system_parts)
|
||||
# Send `system` as a structured text block so we can attach a prompt-cache
|
||||
@@ -810,7 +1092,7 @@ def _sanitize_llm_messages(messages: List[Dict]) -> List[Dict]:
|
||||
(content=None, since Gemini/Ollama reject tool_calls alongside ""). Dropping
|
||||
it leaves the tool result dangling and breaks the next round.
|
||||
"""
|
||||
allowed = {"role", "content", "name", "tool_call_id", "tool_calls", "function_call"}
|
||||
allowed = {"role", "content", "name", "tool_call_id", "tool_calls", "function_call", "reasoning_content"}
|
||||
cleaned = []
|
||||
for msg in messages or []:
|
||||
if not isinstance(msg, dict):
|
||||
@@ -1045,7 +1327,7 @@ def list_model_ids(
|
||||
from src.endpoint_resolver import build_models_url
|
||||
|
||||
models_url = build_models_url(base_chat_url)
|
||||
r = httpx.get(models_url, headers=h, timeout=timeout)
|
||||
r = httpx_get_kimi_aware(models_url, h, timeout=timeout)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
model_ids = [m.get("id") for m in (data.get("data") or []) if m.get("id")]
|
||||
@@ -1146,14 +1428,14 @@ def llm_call(url: str, model: str, messages: List[Dict], temperature: float = LL
|
||||
"messages": messages_copy,
|
||||
"temperature": temperature,
|
||||
}
|
||||
if _restricts_temperature(model):
|
||||
if _omit_temperature(provider, model):
|
||||
payload.pop("temperature", None)
|
||||
if max_tokens and max_tokens > 0:
|
||||
tok_key = "max_completion_tokens" if _uses_max_completion_tokens(model) else "max_tokens"
|
||||
payload[tok_key] = max_tokens
|
||||
try:
|
||||
note_model_activity(target_url, model)
|
||||
r = httpx.post(target_url, headers=h, json=payload, timeout=timeout)
|
||||
r = httpx_post_kimi_aware(target_url, h, json=payload, timeout=timeout)
|
||||
except Exception as e:
|
||||
raise HTTPException(502, f"POST {target_url} failed: {e}")
|
||||
if not r.is_success:
|
||||
@@ -1247,7 +1529,8 @@ async def llm_call_async(
|
||||
headers: Optional[Dict] = None,
|
||||
timeout: int = LLMConfig.STREAM_TIMEOUT,
|
||||
max_retries: int = LLMConfig.MAX_RETRIES,
|
||||
prompt_type: Optional[str] = None
|
||||
prompt_type: Optional[str] = None,
|
||||
session_id: Optional[str] = None,
|
||||
) -> str:
|
||||
"""Asynchronous LLM call using httpx with connection pooling, timeout, retry logic, and performance logging."""
|
||||
provider = _detect_provider(url)
|
||||
@@ -1339,16 +1622,20 @@ async def llm_call_async(
|
||||
"messages": messages_copy,
|
||||
"temperature": temperature,
|
||||
}
|
||||
if _restricts_temperature(model):
|
||||
if _omit_temperature(provider, model):
|
||||
payload.pop("temperature", None)
|
||||
if max_tokens and max_tokens > 0:
|
||||
tok_key = "max_completion_tokens" if _uses_max_completion_tokens(model) else "max_tokens"
|
||||
payload[tok_key] = max_tokens
|
||||
# Suppress thinking for qwen3/gemma4 on Ollama /v1 — same as stream_llm.
|
||||
if _is_ollama_openai_compat_url(url) and _supports_thinking(model):
|
||||
payload["think"] = False
|
||||
_apply_local_cache_affinity(payload, url, session_id)
|
||||
|
||||
if _is_host_dead(target_url):
|
||||
raise HTTPException(503, f"Upstream {_host_key(target_url)} marked unreachable (cooldown active)")
|
||||
|
||||
call_timeout = httpx.Timeout(connect=3.0, read=float(timeout), write=10.0, pool=5.0)
|
||||
call_timeout = _call_timeout(timeout)
|
||||
attempt = 0
|
||||
while attempt < max_retries:
|
||||
attempt += 1
|
||||
@@ -1356,7 +1643,7 @@ async def llm_call_async(
|
||||
try:
|
||||
note_model_activity(target_url, model)
|
||||
client = _get_http_client()
|
||||
r = await client.post(target_url, headers=h, json=payload, timeout=call_timeout)
|
||||
r = await httpx_post_kimi_aware_async(client, target_url, h, json=payload, timeout=call_timeout)
|
||||
duration = time.time() - start
|
||||
if not r.is_success:
|
||||
friendly = _format_upstream_error(r.status_code, r.text, target_url)
|
||||
@@ -1401,7 +1688,7 @@ async def llm_call_async(
|
||||
async def stream_llm(url: str, model: str, messages: List[Dict], temperature: float = LLMConfig.DEFAULT_TEMPERATURE,
|
||||
max_tokens: int = LLMConfig.DEFAULT_MAX_TOKENS, headers: Optional[Dict] = None,
|
||||
timeout: int = LLMConfig.STREAM_TIMEOUT, prompt_type: Optional[str] = None,
|
||||
tools: Optional[List[Dict]] = None):
|
||||
tools: Optional[List[Dict]] = None, session_id: Optional[str] = None):
|
||||
"""Stream LLM responses with improved error handling.
|
||||
|
||||
Yields SSE chunks:
|
||||
@@ -1452,7 +1739,7 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
|
||||
"temperature": temperature,
|
||||
"stream": True,
|
||||
}
|
||||
if _restricts_temperature(model):
|
||||
if _omit_temperature(provider, model):
|
||||
payload.pop("temperature", None)
|
||||
if provider not in {"openrouter", "groq"}:
|
||||
payload["stream_options"] = {"include_usage": True}
|
||||
@@ -1461,14 +1748,23 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
|
||||
payload[tok_key] = max_tokens
|
||||
if tools:
|
||||
payload["tools"] = tools
|
||||
# For Ollama's OpenAI-compat /v1 endpoint with thinking models (qwen3,
|
||||
# gemma4, etc.), suppress thinking so tool calls aren't swallowed inside
|
||||
# <think> blocks. Ollama /v1 accepts "think": false as a top-level param.
|
||||
if _is_ollama_openai_compat_url(url) and _supports_thinking(model):
|
||||
payload["think"] = False
|
||||
_apply_local_cache_affinity(payload, url, session_id)
|
||||
h = _provider_headers(provider, headers)
|
||||
if provider == "copilot":
|
||||
from src.copilot import apply_request_headers
|
||||
apply_request_headers(h, messages_copy)
|
||||
|
||||
# Short connect timeout: a reachable peer answers SYN in <100ms even on
|
||||
# Tailscale. 3s is plenty; 30s let one dead upstream wedge the UI.
|
||||
stream_timeout = httpx.Timeout(connect=3.0, read=float(timeout), write=30.0, pool=5.0)
|
||||
# Connect budget from LLMConfig.CONNECT_TIMEOUT (env LLM_CONNECT_TIMEOUT).
|
||||
# The dead-host cooldown still bounds a genuinely unreachable upstream, so a
|
||||
# wider connect budget only affects first contact and stops a brief cold
|
||||
# connect blip (offshore/public endpoints) surfacing as a 503 on this stream
|
||||
# path, which -- unlike llm_call -- does not retry the connect.
|
||||
stream_timeout = _stream_timeout(timeout)
|
||||
|
||||
if _is_host_dead(target_url):
|
||||
yield f'event: error\ndata: {json.dumps({"error": f"Upstream {_host_key(target_url)} unreachable (cooldown active)", "status": 503})}\n\n'
|
||||
@@ -1744,6 +2040,7 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
|
||||
events.append(_stream_delta_event(part))
|
||||
return events
|
||||
|
||||
h = apply_kimi_code_headers(h, target_url)
|
||||
try:
|
||||
client = _get_http_client()
|
||||
async with client.stream('POST', target_url, json=payload, headers=h, timeout=stream_timeout) as r:
|
||||
|
||||
+82
-34
@@ -5,6 +5,7 @@ Query and cache model context window sizes from OpenAI-compatible APIs.
|
||||
Provides token estimation for context usage tracking.
|
||||
"""
|
||||
|
||||
import ipaddress
|
||||
import logging
|
||||
import sys
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
@@ -19,7 +20,20 @@ _LOCAL_HOSTS = {"localhost", "127.0.0.1", "0.0.0.0", "::1", "host.docker.interna
|
||||
_PRIVATE_PREFIXES = ("10.", "172.16.", "172.17.", "172.18.", "172.19.",
|
||||
"172.20.", "172.21.", "172.22.", "172.23.", "172.24.",
|
||||
"172.25.", "172.26.", "172.27.", "172.28.", "172.29.",
|
||||
"172.30.", "172.31.", "192.168.", "100.")
|
||||
"172.30.", "172.31.", "192.168.")
|
||||
|
||||
# Tailscale uses the CGNAT range 100.64.0.0/10, NOT all of 100.0.0.0/8.
|
||||
# A bare "100." prefix would classify public addresses (e.g. AWS ranges
|
||||
# under 100.x outside the CGNAT block) as local; routes/model_routes.py
|
||||
# already narrows this the same way for endpoint classification.
|
||||
_TAILSCALE_CGNAT = ipaddress.ip_network("100.64.0.0/10")
|
||||
|
||||
|
||||
def _in_tailscale_range(host: str) -> bool:
|
||||
try:
|
||||
return ipaddress.ip_address(host) in _TAILSCALE_CGNAT
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
def _normalize_base_for_compare(url: str) -> str:
|
||||
@@ -64,7 +78,7 @@ def _configured_endpoint_kind(url: str) -> Optional[str]:
|
||||
return None
|
||||
|
||||
|
||||
def _is_local_endpoint(url: str) -> bool:
|
||||
def is_local_endpoint(url: str) -> bool:
|
||||
"""Check if URL points to a local/private/tailscale address."""
|
||||
kind = _configured_endpoint_kind(url)
|
||||
if kind in ("api", "proxy"):
|
||||
@@ -73,7 +87,7 @@ def _is_local_endpoint(url: str) -> bool:
|
||||
return True
|
||||
try:
|
||||
host = urlparse(url).hostname or ""
|
||||
return host in _LOCAL_HOSTS or host.startswith(_PRIVATE_PREFIXES)
|
||||
return host in _LOCAL_HOSTS or host.startswith(_PRIVATE_PREFIXES) or _in_tailscale_range(host)
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
@@ -208,7 +222,30 @@ KNOWN_CONTEXT_WINDOWS = {
|
||||
# ---------------------------------------------------------------------------
|
||||
# Cache
|
||||
# ---------------------------------------------------------------------------
|
||||
_context_cache: Dict[Tuple[str, str], int] = {}
|
||||
_context_cache: Dict[Tuple[str, str], Tuple[int, bool]] = {}
|
||||
|
||||
|
||||
def _get_context_length_cached(endpoint_url: str, model: str) -> Tuple[int, bool]:
|
||||
"""Return (context_length, known). ``known`` is False only when the value is a
|
||||
bare DEFAULT_CONTEXT fallback (no endpoint report and not in the known table)."""
|
||||
configured_kind = _configured_endpoint_kind(endpoint_url)
|
||||
is_local = is_local_endpoint(endpoint_url)
|
||||
# Key on (endpoint_url, model): the same model id can be served by two
|
||||
# different remote endpoints with different real context windows (e.g. a
|
||||
# capped proxy vs. the full provider), so caching by model id alone would
|
||||
# serve one endpoint's window for the other (issue #2603).
|
||||
cache_key = (endpoint_url, model)
|
||||
if not is_local and cache_key in _context_cache:
|
||||
return _context_cache[cache_key]
|
||||
|
||||
ctx, known = _query_context_length(endpoint_url, model)
|
||||
# Only cache non-default values to allow retry on next request.
|
||||
# Local endpoints can restart with a different --max-model-len while keeping
|
||||
# the same model id, so always re-query them instead of serving stale cache.
|
||||
if not is_local and (ctx != DEFAULT_CONTEXT or configured_kind in ("api", "proxy")):
|
||||
_context_cache[cache_key] = (ctx, known)
|
||||
logger.info(f"Context length for {model}: {ctx}")
|
||||
return ctx, known
|
||||
|
||||
|
||||
def get_context_length(endpoint_url: str, model: str) -> int:
|
||||
@@ -218,24 +255,33 @@ def get_context_length(endpoint_url: str, model: str) -> int:
|
||||
or context_window fields. Caches result per (endpoint, model).
|
||||
Falls back to DEFAULT_CONTEXT if unavailable.
|
||||
"""
|
||||
configured_kind = _configured_endpoint_kind(endpoint_url)
|
||||
is_local = _is_local_endpoint(endpoint_url)
|
||||
# Key on (endpoint_url, model): the same model id can be served by two
|
||||
# different remote endpoints with different real context windows (e.g. a
|
||||
# capped proxy vs. the full provider), so caching by model id alone would
|
||||
# serve one endpoint's window for the other (issue #2603).
|
||||
cache_key = (endpoint_url, model)
|
||||
if not is_local and cache_key in _context_cache:
|
||||
return _context_cache[cache_key]
|
||||
return _get_context_length_cached(endpoint_url, model)[0]
|
||||
|
||||
ctx = _query_context_length(endpoint_url, model)
|
||||
# Only cache non-default values to allow retry on next request.
|
||||
# Local endpoints can restart with a different --max-model-len while keeping
|
||||
# the same model id, so always re-query them instead of serving stale cache.
|
||||
if not is_local and (ctx != DEFAULT_CONTEXT or configured_kind in ("api", "proxy")):
|
||||
_context_cache[cache_key] = ctx
|
||||
logger.info(f"Context length for {model}: {ctx}")
|
||||
return ctx
|
||||
|
||||
def get_context_length_known(endpoint_url: str, model: str) -> Tuple[int, bool]:
|
||||
"""Like ``get_context_length`` but also returns whether the window was actually
|
||||
discovered (endpoint-reported or in the known-models table) rather than the bare
|
||||
DEFAULT_CONTEXT fallback. Callers that *scale* a budget off the window must not
|
||||
trust an unknown value — a fallback 128K isn't proof the model holds 128K
|
||||
(review on #4122)."""
|
||||
return _get_context_length_cached(endpoint_url, model)
|
||||
|
||||
|
||||
def budget_context_for_model(endpoint_url: str, model: str, *, fallback: int = 0) -> int:
|
||||
"""Context window to scale the agent input budget against.
|
||||
|
||||
Returns the *freshly discovered* window when it was actually proven
|
||||
(endpoint-reported / known table), else 0 so auto-scaling stays conservative.
|
||||
Crucially this binds the ``known`` flag to the value it proves — callers must
|
||||
not pair this flag with a context length from a *different* lookup (a stale
|
||||
local re-query, or a caller that didn't pass one), which would budget off an
|
||||
unproven number (review on #4122). On probe error, returns ``fallback`` (the
|
||||
caller's best-known value) to preserve prior behaviour."""
|
||||
try:
|
||||
ctx, known = get_context_length_known(endpoint_url, model)
|
||||
return ctx if known else 0
|
||||
except Exception:
|
||||
return fallback
|
||||
|
||||
|
||||
def _lookup_known(model: str) -> Optional[int]:
|
||||
@@ -257,8 +303,9 @@ def _lookup_known(model: str) -> Optional[int]:
|
||||
return best_ctx
|
||||
|
||||
|
||||
def _query_context_length(endpoint_url: str, model: str) -> int:
|
||||
"""Query the model API for context length."""
|
||||
def _query_context_length(endpoint_url: str, model: str) -> Tuple[int, bool]:
|
||||
"""Query the model API for context length. Returns (context_length, known) where
|
||||
``known`` is False only for the bare DEFAULT_CONTEXT fallback."""
|
||||
known = _lookup_known(model)
|
||||
api_ctx = None
|
||||
configured_kind = _configured_endpoint_kind(endpoint_url)
|
||||
@@ -269,11 +316,11 @@ def _query_context_length(endpoint_url: str, model: str) -> int:
|
||||
if configured_kind in ("api", "proxy"):
|
||||
if known:
|
||||
logger.info(f"Using known context window for {model}: {known}")
|
||||
return known
|
||||
return DEFAULT_CONTEXT
|
||||
return known, True
|
||||
return DEFAULT_CONTEXT, False
|
||||
|
||||
# Try llama.cpp /slots endpoint first — reports actual serving context
|
||||
if _is_local_endpoint(endpoint_url):
|
||||
if is_local_endpoint(endpoint_url):
|
||||
try:
|
||||
base = endpoint_url.split("/v1")[0] if "/v1" in endpoint_url else endpoint_url.rsplit("/", 1)[0]
|
||||
r = httpx.get(f"{base}/slots", timeout=REQUEST_TIMEOUT)
|
||||
@@ -283,7 +330,7 @@ def _query_context_length(endpoint_url: str, model: str) -> int:
|
||||
n_ctx = slots[0].get("n_ctx")
|
||||
if n_ctx and isinstance(n_ctx, int) and n_ctx > 0:
|
||||
logger.info(f"llama.cpp /slots reports n_ctx={n_ctx} for {model}")
|
||||
return n_ctx
|
||||
return n_ctx, True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -295,7 +342,8 @@ def _query_context_length(endpoint_url: str, model: str) -> int:
|
||||
if is_copilot_base(endpoint_url):
|
||||
if known:
|
||||
logger.info(f"Using known context window for {model}: {known}")
|
||||
return known or DEFAULT_CONTEXT
|
||||
return known, True
|
||||
return DEFAULT_CONTEXT, False
|
||||
|
||||
from src.endpoint_resolver import build_models_url
|
||||
|
||||
@@ -337,21 +385,21 @@ def _query_context_length(endpoint_url: str, model: str) -> int:
|
||||
# For local/self-hosted endpoints, trust the API value (user set --max-model-len)
|
||||
# For cloud APIs, use the larger value (API can report low defaults)
|
||||
if api_ctx and known:
|
||||
_is_local = _is_local_endpoint(endpoint_url)
|
||||
_is_local = is_local_endpoint(endpoint_url)
|
||||
if _is_local and api_ctx < known:
|
||||
logger.info(f"Local endpoint reports {api_ctx} for {model} (known max: {known}) — using API value")
|
||||
return api_ctx
|
||||
return api_ctx, True
|
||||
result = max(api_ctx, known)
|
||||
if api_ctx < known:
|
||||
logger.info(f"API reported {api_ctx} for {model}, using known {known} instead")
|
||||
return result
|
||||
return result, True
|
||||
if api_ctx:
|
||||
return api_ctx
|
||||
return api_ctx, True
|
||||
if known:
|
||||
logger.info(f"Using known context window for {model}: {known}")
|
||||
return known
|
||||
return known, True
|
||||
|
||||
return DEFAULT_CONTEXT
|
||||
return DEFAULT_CONTEXT, False
|
||||
|
||||
|
||||
def estimate_tokens(messages: List[Dict]) -> int:
|
||||
|
||||
@@ -223,6 +223,25 @@ class ModelDiscovery:
|
||||
)
|
||||
return {"hosts": hosts, "items": items}
|
||||
|
||||
def warmup_ping_urls(self, limit: int = 5) -> List[str]:
|
||||
"""The ``/models`` URLs of up to ``limit`` discovered endpoints.
|
||||
|
||||
Used by the startup warmup / keepalive loop to prime connections. Each
|
||||
discovered item already carries a ``/v1/chat/completions`` url; swap the
|
||||
suffix for the cheap ``/models`` probe. Failures degrade to an empty list
|
||||
so warmup never crashes the caller.
|
||||
"""
|
||||
try:
|
||||
items = (self.discover_models() or {}).get("items", [])
|
||||
except Exception:
|
||||
return []
|
||||
urls: List[str] = []
|
||||
for ep in items[:limit]:
|
||||
url = (ep.get("url") or "").replace("/chat/completions", "/models")
|
||||
if url:
|
||||
urls.append(url)
|
||||
return urls
|
||||
|
||||
def get_providers(self) -> Dict[str, Any]:
|
||||
"""Get all available providers"""
|
||||
discovery = self.discover_models()
|
||||
|
||||
+1
-1
@@ -32,7 +32,7 @@ def create_office_document(
|
||||
DocumentVersion,
|
||||
Session as DbSession,
|
||||
)
|
||||
from src.tool_implementations import set_active_document
|
||||
from src.agent_tools.document_tools import set_active_document
|
||||
|
||||
if not body_text or not body_text.strip():
|
||||
return None
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
"""Compatibility helpers for optional third-party dependencies."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import types
|
||||
|
||||
|
||||
def patch_realesrgan_torchvision_compat() -> None:
|
||||
"""Restore the torchvision import path expected by BasicSR/Real-ESRGAN."""
|
||||
module_name = "torchvision.transforms.functional_tensor"
|
||||
if module_name in sys.modules:
|
||||
return
|
||||
try:
|
||||
from torchvision.transforms import functional
|
||||
except Exception:
|
||||
return
|
||||
|
||||
rgb_to_grayscale = getattr(functional, "rgb_to_grayscale", None)
|
||||
if rgb_to_grayscale is None:
|
||||
return
|
||||
|
||||
shim = types.ModuleType(module_name)
|
||||
shim.rgb_to_grayscale = rgb_to_grayscale
|
||||
shim.__getattr__ = lambda name: getattr(functional, name)
|
||||
sys.modules[module_name] = shim
|
||||
|
||||
|
||||
def prepare_optional_dependency_import(name: str) -> None:
|
||||
"""Apply known import-time compatibility shims before probing a package."""
|
||||
if name == "realesrgan":
|
||||
patch_realesrgan_torchvision_compat()
|
||||
+2
-2
@@ -219,7 +219,7 @@ def create_plain_pdf_document(
|
||||
pages without form-field overlays.
|
||||
"""
|
||||
from src.database import SessionLocal, Document, DocumentVersion, Session as DbSession
|
||||
from src.tool_implementations import set_active_document
|
||||
from src.agent_tools.document_tools import set_active_document
|
||||
|
||||
content = render_plain_pdf_markdown(upload_id, title, body_text)
|
||||
db = SessionLocal()
|
||||
@@ -402,7 +402,7 @@ def create_form_markdown_document(
|
||||
inside the content, which the export route looks for.
|
||||
"""
|
||||
from src.database import SessionLocal, Document, DocumentVersion, Session as DbSession
|
||||
from src.tool_implementations import set_active_document
|
||||
from src.agent_tools.document_tools import set_active_document
|
||||
|
||||
content = render_form_as_markdown(fields, upload_id, title, intro_text=intro_text)
|
||||
db = SessionLocal()
|
||||
|
||||
+24
-1
@@ -221,6 +221,22 @@ class ResearchHandler:
|
||||
# Task registry — background research with persistence
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def rename_owner(self, old_owner: str, new_owner: str) -> int:
|
||||
"""Move in-flight research tasks from one owner key to another."""
|
||||
old_key = str(old_owner or "").strip().lower()
|
||||
new_key = str(new_owner or "").strip().lower()
|
||||
if not old_key or not new_key:
|
||||
return 0
|
||||
|
||||
changed = 0
|
||||
for entry in list(self._active_tasks.values()):
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
if str(entry.get("owner", "")).strip().lower() == old_key:
|
||||
entry["owner"] = new_key
|
||||
changed += 1
|
||||
return changed
|
||||
|
||||
def start_research(
|
||||
self,
|
||||
session_id: str,
|
||||
@@ -390,7 +406,6 @@ class ResearchHandler:
|
||||
|
||||
def get_status(self, session_id: str) -> Optional[dict]:
|
||||
"""Get current research status for a session."""
|
||||
avg = self.get_avg_duration()
|
||||
if session_id in self._active_tasks:
|
||||
entry = self._active_tasks[session_id]
|
||||
result = {
|
||||
@@ -399,6 +414,14 @@ class ResearchHandler:
|
||||
"query": entry["query"],
|
||||
"started_at": entry["started_at"],
|
||||
}
|
||||
# avg_duration is a historical figure over completed reports on
|
||||
# disk; get_avg_duration() globs and JSON-parses the whole research
|
||||
# dir, so compute it at most once per active stream (memoized on the
|
||||
# entry) instead of on every ~1s SSE poll. The disk branch below
|
||||
# never used it, so it no longer pays that cost at all.
|
||||
if "_avg_duration" not in entry:
|
||||
entry["_avg_duration"] = self.get_avg_duration()
|
||||
avg = entry["_avg_duration"]
|
||||
if avg is not None:
|
||||
result["avg_duration"] = round(avg, 1)
|
||||
return result
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user