Compare commits
362 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| bd0c67b6d3 | |||
| ff5bcd9864 | |||
| 270b8570fc | |||
| 0750486654 | |||
| d38e2cbc07 | |||
| 7fd937fa57 | |||
| c41caac438 | |||
| 1747c13133 | |||
| ffd0aaf69b | |||
| 81e7074d93 | |||
| f66a23d19d | |||
| f602819523 | |||
| 85a773ea02 | |||
| fb0a64fe4f | |||
| bcf46dafb9 | |||
| b118c33e37 | |||
| da74cc23e4 | |||
| d792b61722 | |||
| 1faadf7e10 | |||
| e87b44126c | |||
| 62476ddb55 | |||
| e899817969 | |||
| 1cc9a003fd | |||
| f7aa2de410 | |||
| 514d345334 | |||
| 6d507f8128 | |||
| 2cbd55b8bd | |||
| cd02ac7ef6 | |||
| e7abb7559d | |||
| 172a8ea7b0 | |||
| 2adae2bbba | |||
| f5d3e5098a | |||
| 4ee5ed4dce | |||
| f2bfe9b91f | |||
| 3f3c05e8c2 | |||
| 2e9f641c2c | |||
| 627a52ac44 | |||
| 397fce6e32 | |||
| 10cc2295e5 | |||
| 933ec8fec9 | |||
| 8fe98cf471 | |||
| 55b4a5e6ff | |||
| 3c0e9fcb25 | |||
| d5de061656 | |||
| 8b157f452c | |||
| daec3604f3 | |||
| e75a52efbb | |||
| f28703adf6 | |||
| d8e7cc7053 | |||
| f7e2d0c0b7 | |||
| b5a7d5ccda | |||
| f7a5047228 | |||
| 4ccb7c4890 | |||
| 1aa5ffb57c | |||
| 0939983ddf | |||
| ffc0f1dccc | |||
| 57646300a4 | |||
| f23e2e6ffb | |||
| 955455b797 | |||
| 674457384a | |||
| 2cf8bd14ae | |||
| a172522d87 | |||
| cd41de8043 | |||
| fb9e023381 | |||
| 65c7321ace | |||
| 2966ad6ef6 | |||
| d6a3c9a0fe | |||
| 7ebbc15377 | |||
| 23837f4571 | |||
| b28aa1f2c4 | |||
| 33c26bab88 | |||
| e52d078ea1 | |||
| 7ae6133d7f | |||
| 589fcd314a | |||
| 5e0cdb6cbb | |||
| 039431f5ea | |||
| aac589ee49 | |||
| 8cff1f87ee | |||
| ec4f91afdd | |||
| 268bc1d1a6 | |||
| 7f571c8f7e | |||
| 056d1fb960 | |||
| faf27c4a90 | |||
| ebbcdc15af | |||
| 4b0a977988 | |||
| 29180c4731 | |||
| 54690997ec | |||
| be046dd29a | |||
| 4d070ef4cb | |||
| 59af91cb22 | |||
| e39c9fbbd5 | |||
| ece6cebc03 | |||
| 4c41834dc7 | |||
| 96052c5e8a | |||
| afc81bdd7b | |||
| 71ccd59b54 | |||
| b20cea347a | |||
| a07fe35936 | |||
| a7766d0b7f | |||
| 6824fbb729 | |||
| f14ea6d67d | |||
| 59efa8a44b | |||
| dbd1e6572f | |||
| 2857723e47 | |||
| 011e6b07a5 | |||
| 4e0b65491e | |||
| a633611823 | |||
| 6d756215a2 | |||
| 7dedc51d9f | |||
| 9fd85f67e8 | |||
| 21ff44e9e8 | |||
| 2e99825a29 | |||
| 1fcec32a3c | |||
| 768bcb565a | |||
| 63b4ad2e9c | |||
| d70eb99a0d | |||
| d44de3af43 | |||
| 25dd94234c | |||
| 600fa6be8a | |||
| 781a3ee829 | |||
| a9de61771a | |||
| 9873f9b44f | |||
| 09a82852c0 | |||
| 4074e77d93 | |||
| d3944be1be | |||
| ce964b9a00 | |||
| 1d7d9c5e9c | |||
| adac89c8e2 | |||
| 65a2e51af8 | |||
| 04a97adbb3 | |||
| 8829ae2675 | |||
| 09a1718103 | |||
| f03a9e79a7 | |||
| bb66914b1e | |||
| 8053d6a50a | |||
| 7cbf5a2c00 | |||
| 0895c70fc9 | |||
| 16c41612ca | |||
| 7ef3e353c6 | |||
| 10b9e6b81f | |||
| 360ce696e0 | |||
| 0548d335d4 | |||
| 79d55b46a6 | |||
| 93c0529e00 | |||
| a29c2b25d0 | |||
| 654f9f82c7 | |||
| 45b3cd15df | |||
| d006e38a2f | |||
| 438db357ff | |||
| 3ff4eb5519 | |||
| f34cb42b07 | |||
| ac4de93928 | |||
| 6763fe4d44 | |||
| 44a60c1261 | |||
| f09f606bec | |||
| e6349c016e | |||
| e630605aef | |||
| 74e563dabc | |||
| ae0b29af3d | |||
| d68c75a82c | |||
| a615f7f786 | |||
| 0808de0b3b | |||
| aba3a7ae43 | |||
| fa3adca5fc | |||
| f78084c230 | |||
| 7004e1de7b | |||
| e2a30c0600 | |||
| eb0abe7c90 | |||
| c822d34ce6 | |||
| 0889eb4e01 | |||
| 77f00eeab1 | |||
| 86daf254cf | |||
| 9ea3a250db | |||
| c537d2b95c | |||
| f538da9a8e | |||
| 015aeb1fab | |||
| 0d27480719 | |||
| 81a9a1fed3 | |||
| a01ca5a0a1 | |||
| 3239430996 | |||
| 65ead1f799 | |||
| 6cc45a4f77 | |||
| f6c4c9a67c | |||
| 10a25f5959 | |||
| a57327c13f | |||
| 37e49246a6 | |||
| 0351e5e166 | |||
| 98c05dd08d | |||
| 4811af7ab2 | |||
| ba17829202 | |||
| 8f696064d5 | |||
| 3819a23344 | |||
| cedc38fee8 | |||
| 198af4709d | |||
| 696ff78302 | |||
| f2da86b455 | |||
| 5212758698 | |||
| 9e73912d24 | |||
| 6d328b1ad7 | |||
| 27c92caee8 | |||
| 85966881d3 | |||
| dc170b1f58 | |||
| 37269fd96a | |||
| e832133e47 | |||
| 51a41c0c30 | |||
| 8b8ec7fb1d | |||
| 8f4747b1ff | |||
| be7b3d796c | |||
| 760c8ef72c | |||
| 3c4fb62d3a | |||
| cc86c3dd04 | |||
| 32898a68eb | |||
| 55e438d18c | |||
| a653f74cab | |||
| 9d7a3d66c0 | |||
| 4913a1363b | |||
| 6edcc07c1b | |||
| 7369c7c642 | |||
| 7db4e8df4a | |||
| 5d5cfc07d7 | |||
| d592b1e6af | |||
| b5449ea3f9 | |||
| 73dbf3cde7 | |||
| debd2cd386 | |||
| d95abaff1b | |||
| 13086c3662 | |||
| 5719e4db5f | |||
| 9ac3f40955 | |||
| 3a5c58da75 | |||
| 7cf3402ef4 | |||
| 6066d0af02 | |||
| 7e029db44a | |||
| f569b9394e | |||
| fce9942ae0 | |||
| 93ae65f99f | |||
| f8d3890e6a | |||
| 85a11ad416 | |||
| 2be0c5c892 | |||
| e0af7bd8a0 | |||
| 1d1678214a | |||
| 06899c669c | |||
| 05f05dd372 | |||
| a195f4f194 | |||
| 28caa40e68 | |||
| 6c1ce446f5 | |||
| 729494a59b | |||
| df69bced42 | |||
| 12c8f9637f | |||
| 7fe8a70032 | |||
| 2e8e097683 | |||
| 24dfd04964 | |||
| 86965950ac | |||
| 79e9225c68 | |||
| 1a3880347f | |||
| 20968d5a87 | |||
| a7200dd39b | |||
| d1f732bae1 | |||
| d849189b8c | |||
| d30b2d11e6 | |||
| 156009f9ad | |||
| 0aa8d17d6c | |||
| 39331fafb5 | |||
| 05f87b0f50 | |||
| 9f1435f761 | |||
| 772ddf4a86 | |||
| 432b41cede | |||
| e7466175ef | |||
| 5bf7caecc9 | |||
| 4bf389ed09 | |||
| 90acad0d4b | |||
| 6e6b860f04 | |||
| e4c7a3aad9 | |||
| ac4627b69d | |||
| 99660e1c6d | |||
| f91f37ef70 | |||
| 682ec11003 | |||
| 41c0ffbb52 | |||
| be430fc4a4 | |||
| 15f2b106ab | |||
| e310336a42 | |||
| e1585aa4aa | |||
| 6a392542f3 | |||
| 7b3bc598f4 | |||
| 239cc02422 | |||
| 44f12f266e | |||
| 8e8ce8ddd6 | |||
| f2ccf8b21f | |||
| 5d9d21f227 | |||
| 537f492762 | |||
| 6a0a7622fd | |||
| 719867a819 | |||
| 9dfea188bf | |||
| df908b4c11 | |||
| be126afcf8 | |||
| b2243efd3f | |||
| 79c04c71e9 | |||
| ebd2332db4 | |||
| 070ec4c711 | |||
| 6fc79e90ac | |||
| f5ad59317c | |||
| 803df21fc2 | |||
| df47536b8d | |||
| 2049eb7713 | |||
| f42cee8512 | |||
| 8a00f954a9 | |||
| 6d1d626d87 | |||
| 8632072ce0 | |||
| c637b5057b | |||
| 153b788134 | |||
| bc2d934b94 | |||
| 2b1e2e9e20 | |||
| b5b96980e3 | |||
| 127745d13b | |||
| 5ec1e12a50 | |||
| 7c1af0385a | |||
| dde2d25804 | |||
| 7f71fbc3ea | |||
| 7017127a11 | |||
| 00643b5a4b | |||
| e25c279e4b | |||
| df54d8d2bf | |||
| 8ae31aeb13 | |||
| cc86760a26 | |||
| 2e7cfbe1fa | |||
| 9dbe31bfb0 | |||
| 2bf372b41c | |||
| a86990fc58 | |||
| f4c1b264c6 | |||
| 031a600725 | |||
| b385b25d5f | |||
| 49b72bd09c | |||
| 0a3333b961 | |||
| 1638db9c86 | |||
| cd9ad1a7f2 | |||
| 023f1ba575 | |||
| 1a4659b7fc | |||
| 965b0e143c | |||
| 1eca28e588 | |||
| a80421efb6 | |||
| 89efd7d44b | |||
| 41980df6f1 | |||
| baa4449a03 | |||
| 1ee51be420 | |||
| 94931ba59f | |||
| 49ecd806a2 | |||
| 1eaa5c2a81 | |||
| e107c5876e | |||
| 4f7061fd61 | |||
| 7690860ab1 | |||
| b6366e9da5 | |||
| 64122269e9 | |||
| 1bdd515941 | |||
| 8ac0ae72dc | |||
| b2458f9891 | |||
| 2252776a97 | |||
| c9fecd53dc | |||
| 75268e7f43 | |||
| 8ef9b8b215 | |||
| 459b825daa | |||
| 3247773447 | |||
| 013beab861 | |||
| c5230e85a9 |
@@ -10,6 +10,12 @@ dist/
|
||||
build/
|
||||
.env
|
||||
.env.bak.*
|
||||
# Secrets: keep plaintext and every transient secrets.env variant out of
|
||||
# the build context. If an encrypted secrets.env is used, it is mounted
|
||||
# at runtime — never baked into the image. Mirrored in .gitignore.
|
||||
secrets.env
|
||||
secrets.env.*
|
||||
!secrets.env.example
|
||||
/data/
|
||||
/logs/
|
||||
.git/
|
||||
|
||||
@@ -190,3 +190,10 @@ SEARXNG_INSTANCE=http://localhost:8080
|
||||
# These overlays only expose the GPU devices. The slim Odysseus image
|
||||
# still needs CUDA/ROCm userspace via Cookbook -> Dependencies (vLLM,
|
||||
# llama-cpp-python, etc.) before models can actually serve on GPU.
|
||||
|
||||
# ============================================================
|
||||
# Storage Paths (Docker Compose)
|
||||
# ============================================================
|
||||
|
||||
# APP_DATA_DIR=./data
|
||||
# APP_LOGS_DIR=./logs
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
# Code owners.
|
||||
#
|
||||
# Every file is owned by the maintainer, so that when branch protection has
|
||||
# "Require review from Code Owners" turned on, no pull request can be merged
|
||||
# without the maintainer's review. This is the human gate that backs up the
|
||||
# automated security checks. See docs/security-ci.md for how to turn it on.
|
||||
|
||||
* @pewdiepie-archdaemon
|
||||
# Intentionally empty for now. The catch-all rule that mapped every path to a
|
||||
# single owner froze all merges the moment "Require review from Code Owners"
|
||||
# was enabled, because no other maintainer's approval could satisfy the gate.
|
||||
# A per-area ownership map (security/auth, CI, frontend, agent internals, with
|
||||
# multiple named owners per line) is being worked out in issue #593; once
|
||||
# agreed it replaces this file. Until then, required reviews and the security
|
||||
# CI gate (docs/security-ci.md) remain in force via branch protection.
|
||||
|
||||
@@ -0,0 +1,123 @@
|
||||
# Pull Request Review Template
|
||||
|
||||
Use this shape as a copyable reference for substantive PR reviews; GitHub does
|
||||
not auto-apply this file to review comments. Omit sections that do not add
|
||||
useful signal. Lead with confirmed findings; keep speculative notes out of the
|
||||
public review unless they are framed as a concrete open question.
|
||||
|
||||
## Small PR Path
|
||||
|
||||
For narrow docs, typo, test-only, or obvious local fixes, a short review is
|
||||
enough:
|
||||
|
||||
```md
|
||||
LGTM after checking:
|
||||
- scope:
|
||||
- validation:
|
||||
- residual risk:
|
||||
```
|
||||
|
||||
Use the fuller structure below for larger, risky, multi-finding, or
|
||||
security-sensitive reviews.
|
||||
|
||||
## Findings
|
||||
|
||||
**<sub><sub></sub></sub> issue (test): Short issue title**
|
||||
|
||||
- **Problem:** Concrete broken flow, contract, input, or risk.
|
||||
|
||||
- **Impact:** Why this matters to users, CI, maintainers, data, security, or scale.
|
||||
|
||||
- **Ask:** Smallest practical correction or decision the author should make.
|
||||
|
||||
- **Location:** `path:line`
|
||||
|
||||
## Open Questions
|
||||
|
||||
- **question (scope, non-blocking): Short author question** Ask the concrete
|
||||
intent, scope, or tradeoff question.
|
||||
|
||||
## Validation
|
||||
|
||||
- Ran:
|
||||
- Not run:
|
||||
- Residual risk:
|
||||
|
||||
## PR Hygiene
|
||||
|
||||
- Target/template/checks:
|
||||
- Related, duplicate, or superseding context:
|
||||
|
||||
## No Findings Variant
|
||||
|
||||
```md
|
||||
## Findings
|
||||
|
||||
none confirmed
|
||||
|
||||
## Validation
|
||||
|
||||
- Ran:
|
||||
- Not run:
|
||||
- Residual risk:
|
||||
```
|
||||
|
||||
## Legend
|
||||
|
||||
- **Findings:** Verified, author-actionable issues that should be fixed or
|
||||
consciously accepted before merge.
|
||||
- **Priority badges:** The shields.io badges below are optional formatting for
|
||||
priority labels. Plain `P0`, `P1`, `P2`, or `P3` text is also acceptable when
|
||||
an external image dependency is undesirable or may not render.
|
||||
- **P0:** `` -
|
||||
release-blocking or actively dangerous.
|
||||
- **P1:** `` -
|
||||
serious bug, security risk, data-loss risk, or broken primary flow.
|
||||
- **P2:** `` -
|
||||
meaningful correctness, test, maintainability, or edge-case issue.
|
||||
- **P3:** `` -
|
||||
minor polish or low-risk cleanup.
|
||||
- **Intent labels:**
|
||||
- **`issue`:** A confirmed defect, regression, broken contract, or concrete
|
||||
risk.
|
||||
- **`suggestion`:** A non-blocking improvement that would make the PR clearer,
|
||||
safer, or easier to maintain.
|
||||
- **`nit`:** A tiny, non-blocking cleanup or style note. Use it only when the
|
||||
author can safely ignore it without changing the review outcome.
|
||||
- **`question`:** A real author-facing clarification about intent, scope, or
|
||||
tradeoffs. Do not use questions to hide an issue that should be stated
|
||||
directly.
|
||||
- **`LGTM`:** "Looks good to me." Use only when the review found no blocking
|
||||
issues, or when any remaining notes are clearly optional.
|
||||
- **Decorations:** Optional labels in parentheses that clarify the finding type,
|
||||
scope, or merge impact.
|
||||
- **`security`:** Auth, authorization, ownership, secrets, SSRF, injection,
|
||||
unsafe external input, or other trust-boundary concerns.
|
||||
- **`test`:** Missing, failing, misleading, brittle, or insufficient tests.
|
||||
- **`scope`:** PR scope, feature boundaries, unrelated churn, or work that
|
||||
should be split into a separate issue or PR.
|
||||
- **`ci`:** CI configuration, workflow failures, flaky checks, or validation
|
||||
signal quality.
|
||||
- **`api`:** Route, request/response, public function, schema, persistence, or
|
||||
integration contract changes.
|
||||
- **`docs`:** User-facing docs, contributor docs, examples, or comments that
|
||||
need to change with the code.
|
||||
- **`non-blocking`:** Useful feedback that should not prevent merge by
|
||||
itself.
|
||||
- **Finding fields:**
|
||||
- **Problem:** What is wrong, what contract is ambiguous, or what risk the PR
|
||||
introduces.
|
||||
- **Impact:** Why the problem matters in practical terms.
|
||||
- **Ask:** The smallest concrete fix, test, or decision requested from the PR
|
||||
author.
|
||||
- **Location:** The most useful repo-relative file and line reference for the
|
||||
finding, using `path:line`.
|
||||
- **Optional sections:**
|
||||
- **Open Questions:** Genuine scope or intent questions; omit when there are
|
||||
no real questions.
|
||||
- **Validation:** What the reviewer ran, what was intentionally not run, and
|
||||
what risk remains after review.
|
||||
- **PR Hygiene:** Target-branch, template, CI/check, duplicate, related-work,
|
||||
or superseding-PR notes.
|
||||
- **`none confirmed`:** Use only when no review-worthy findings were confirmed;
|
||||
still list validation gaps or residual risk when relevant.
|
||||
@@ -19,10 +19,10 @@ jobs:
|
||||
name: Python syntax (compileall)
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
with:
|
||||
persist-credentials: false
|
||||
- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: "3.11"
|
||||
# Byte-compile sources — catches syntax errors without installing deps.
|
||||
@@ -32,10 +32,10 @@ jobs:
|
||||
name: JS syntax (node --check)
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
with:
|
||||
persist-credentials: false
|
||||
- uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
|
||||
- uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0
|
||||
with:
|
||||
node-version: "20"
|
||||
# Syntax-check our own JS (skip vendored libs in static/lib).
|
||||
@@ -54,7 +54,7 @@ jobs:
|
||||
# ROADMAP "fresh install smoke tests" item; make this required once green.
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
- uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: false
|
||||
@@ -81,7 +81,7 @@ jobs:
|
||||
echo "docs_only=false" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
|
||||
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
if: steps.docs-check.outputs.docs_only != 'true'
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
@@ -1,61 +0,0 @@
|
||||
# CodeQL code scanning
|
||||
#
|
||||
# Purpose: GitHub's own static analysis engine reads the application source
|
||||
# (Python backend + the JavaScript frontend) and looks for real
|
||||
# vulnerabilities -- SQL/command injection, path traversal, auth mistakes,
|
||||
# unsafe deserialization. Findings appear in the repo's Security tab. This is
|
||||
# the deepest check in the suite and the most valuable for a high-profile
|
||||
# target.
|
||||
#
|
||||
# It runs on every push to main and on a weekly schedule (to catch newly
|
||||
# disclosed query patterns against unchanged code). It deliberately does NOT
|
||||
# run on pull requests: most PRs here come from forks, whose read-only token
|
||||
# cannot publish results, which would produce confusing failures. To scan pull
|
||||
# requests too, a maintainer can instead enable CodeQL "default setup" in
|
||||
# Settings -> Security -> Code scanning (one toggle, no file needed) -- see
|
||||
# docs/security-ci.md.
|
||||
|
||||
name: CodeQL
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
schedule:
|
||||
# Weekly, Monday 06:00 UTC.
|
||||
- cron: '0 6 * * 1'
|
||||
workflow_dispatch:
|
||||
|
||||
permissions: {}
|
||||
|
||||
concurrency:
|
||||
group: codeql-${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
analyze:
|
||||
name: Analyze (${{ matrix.language }})
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
security-events: write # publish results to the Security tab
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
# Both are interpreted, so CodeQL needs no build step (build-mode none).
|
||||
language: [python, javascript-typescript]
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@03e4368ac7daa2bd82b3e85262f3bf87ee112f57 # v3.36.0
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
build-mode: none
|
||||
|
||||
- name: Perform CodeQL analysis
|
||||
uses: github/codeql-action/analyze@03e4368ac7daa2bd82b3e85262f3bf87ee112f57 # v3.36.0
|
||||
with:
|
||||
category: "/language:${{ matrix.language }}"
|
||||
@@ -37,7 +37,7 @@ jobs:
|
||||
contents: read
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
|
||||
@@ -52,7 +52,7 @@ jobs:
|
||||
contents: read
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -93,7 +93,7 @@ jobs:
|
||||
security-events: write # upload SARIF to the Security tab
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -119,7 +119,7 @@ jobs:
|
||||
TRIVY_DB_REPOSITORY: ghcr.io/aquasecurity/trivy-db:2
|
||||
|
||||
- name: Upload Trivy results
|
||||
uses: github/codeql-action/upload-sarif@03e4368ac7daa2bd82b3e85262f3bf87ee112f57 # v3.36.0
|
||||
uses: github/codeql-action/upload-sarif@8aad20d150bbac5944a9f9d289da16a4b0d87c1e # v4.36.2
|
||||
with:
|
||||
sarif_file: trivy-results.sarif
|
||||
category: trivy-image
|
||||
|
||||
@@ -36,7 +36,7 @@ jobs:
|
||||
contents: read
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -55,7 +55,7 @@ jobs:
|
||||
contents: read
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
|
||||
@@ -35,7 +35,7 @@ jobs:
|
||||
contents: read
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
with:
|
||||
# Full history so a secret committed in an earlier commit (and later
|
||||
# deleted) is still caught -- deletion does not remove it from Git.
|
||||
|
||||
@@ -36,7 +36,7 @@ jobs:
|
||||
contents: read
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
@@ -61,7 +61,7 @@ jobs:
|
||||
contents: read
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3
|
||||
with:
|
||||
persist-credentials: false
|
||||
|
||||
|
||||
@@ -14,6 +14,15 @@ venv/
|
||||
.env
|
||||
.env.bak.*
|
||||
!.env.example
|
||||
# Local uv lockfile (optional, per-platform — see "Faster installs with uv" in README)
|
||||
requirements.lock
|
||||
|
||||
# SOPS workflow — encrypted `secrets.env` is intentionally committable,
|
||||
# but every variant (plaintext, manual decrypt copy, editor backup)
|
||||
# must stay out of git. Mirrored in .dockerignore so the same artifacts
|
||||
# also cannot enter image build layers.
|
||||
secrets.env.*
|
||||
!secrets.env.example
|
||||
|
||||
# Data — all user data stays local
|
||||
data/
|
||||
@@ -61,6 +70,9 @@ output.txt.txt
|
||||
*.tiff
|
||||
*.pdf
|
||||
|
||||
# …except shipped static assets
|
||||
!static/icons/*.png
|
||||
|
||||
# …except shipped demo assets in docs/ that the README links to.
|
||||
!docs/*.jpg
|
||||
!docs/*.jpeg
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
FROM python:3.12-slim
|
||||
FROM python:3.14-slim
|
||||
|
||||
# System deps. tmux is required by Cookbook for background downloads/serves.
|
||||
# openssh-client is required for Cookbook remote server tests, setup, probes,
|
||||
|
||||
@@ -1,444 +1,65 @@
|
||||
# Odysseus
|
||||
<p align="center">
|
||||
<img src="docs/odysseus-wordmark.png" alt="Odysseus" width="280">
|
||||
</p>
|
||||
|
||||
> **Branch note:** `dev` is the default branch and contains the latest development changes, but it may be unstable. For the more stable curated branch, use [`main`](https://github.com/pewdiepie-archdaemon/odysseus/tree/main).
|
||||
<p align="center">
|
||||
A self-hosted AI workspace for chat, agents, research, documents, email, notes, calendar, and local model workflows.
|
||||
</p>
|
||||
|
||||
```
|
||||
───────────────────────────────────────────────
|
||||
⊹ ࣪ ˖ ૮( ˶ᵔ ᵕ ᵔ˶ )っ Odysseus vers. 1.0
|
||||
───────────────────────────────────────────────
|
||||
```
|
||||
<p align="center">
|
||||
<a href="#quick-start">Quick Start</a> ·
|
||||
<a href="docs/setup.md">Setup Guide</a> ·
|
||||
<a href="CONTRIBUTING.md">Contributing</a> ·
|
||||
<a href="ROADMAP.md">Roadmap</a>
|
||||
</p>
|
||||
|
||||

|
||||
<p align="center">
|
||||
<a href="https://repology.org/project/odysseus-ai/versions"><img src="https://repology.org/badge/vertical-allrepos/odysseus-ai.svg" alt="Packaging status"></a>
|
||||
</p>
|
||||
|
||||
A self-hosted AI workspace -- meant to be the self-hosted version of the UI experience you get from ChatGPT and Claude. But with more jank and fun. Running on your own hardware, with your own data -- local-first, privacy-first, and no trojan.
|
||||
<p align="center">
|
||||
<img src="docs/odysseus.jpg" alt="Odysseus interface">
|
||||
</p>
|
||||
|
||||
## Features
|
||||
- **Chat** -- chat with any local model or API; adding them is super simple.<br> <sub>vLLM · llama.cpp · Ollama · OpenRouter · OpenAI · GitHub Copilot</sub>
|
||||
- **Agent** -- hand it tools and let it run the whole task itself.<br> <sub>built on [opencode](https://github.com/anomalyco/opencode) · MCP · web · files · shell · skills · memory</sub>
|
||||
- **Cookbook** -- Scans your hardware, recommends models, click to download and serve.. easy!<br> <sub>built on [llmfit](https://github.com/AlexsJones/llmfit) · VRAM-aware · GGUF / FP8 / AWQ · fit scoring · vLLM / llama.cpp serving</sub>
|
||||
- **Deep Research** -- multi-step runs that gather, read, and synthesize sources into a nice visual report.<br> <sub>adapted from [Tongyi DeepResearch](https://github.com/Alibaba-NLP/DeepResearch)</sub>
|
||||
- **Compare** -- a fun tool to compare models side by side. Test completely blind, no bias!<br> <sub>multi-model · blind test · synthesis</sub>
|
||||
- **Documents** -- YOU write the text, AI is there to assist, not the opposite.<br> <sub>multi-tab editor · markdown · HTML · CSV · syntax highlighting · AI edits · suggestions</sub>
|
||||
- **Memory / Skills** -- Persistent memory and skills, your agent evolves over time as it better understands you and your tasks!<br> <sub>ChromaDB · fastembed (ONNX) · vector + keyword retrieval · import/export</sub>
|
||||
- **Email** -- IMAP/SMTP inbox with AI triage built in: urgency reminders, auto-tag, auto-summary, auto-reply drafts, auto-spam.<br> <sub>IMAP · SMTP · per-account routing · CalDAV-aware</sub>
|
||||
- **Notes & Tasks** -- Quick notes with reminders, a todo list, and scheduled tasks the agent can act on.<br> <sub>note pings · checklist · cron-style tasks · ntfy / browser / email channels</sub>
|
||||
- **Calendar** -- Local-first calendar with CalDAV sync to Radicale / Nextcloud / Apple / Fastmail.<br> <sub>CalDAV pull · .ics import/export · per-calendar colors · agent-aware</sub>
|
||||
- **Works on mobile** -- looks and runs great on your phone, not just desktop.<br> <sub>responsive · installable (PWA) · touch gestures</sub>
|
||||
- **Extras** -- more to explore, happy if you give it a go!<br> <sub>image editor · theme editor · file uploads (vision + PDF) · web search · presets · sessions · 2FA</sub>
|
||||
|
||||
## Demo
|
||||
A full, hover-to-play tour lives on the landing page (`docs/index.html`).
|
||||
|
||||
<details>
|
||||
<summary>Screenshots / clips</summary>
|
||||
|
||||
### Chat & Agents
|
||||

|
||||
### Deep Research
|
||||

|
||||
### Compare
|
||||

|
||||
### Documents
|
||||

|
||||
### Notes & Tasks
|
||||

|
||||
|
||||
</details>
|
||||
---
|
||||
|
||||
## Quick Start
|
||||
|
||||
Defaults work out of the box: clone, run, then configure models/search/email
|
||||
inside **Settings**. Only edit `.env` for deployment-level overrides like
|
||||
`APP_BIND`, `APP_PORT`, `AUTH_ENABLED`, `DATABASE_URL`, or a pre-seeded admin password.
|
||||
> `dev` is the default branch and gets the newest changes first. Use [`main`](https://github.com/pewdiepie-archdaemon/odysseus/tree/main) if you want the more curated branch.
|
||||
|
||||
On first setup, Odysseus creates an admin account (`admin` unless
|
||||
`ODYSSEUS_ADMIN_USER` is set) and prints a temporary password in the terminal.
|
||||
For Docker installs, the same line is in `docker compose logs odysseus`.
|
||||
Use that for the first login, then change it in **Settings**.
|
||||
|
||||
Contributing? See [CONTRIBUTING.md](CONTRIBUTING.md) for setup, testing, and
|
||||
pull request guidelines.
|
||||
|
||||
### Docker (recommended)
|
||||
```bash
|
||||
git clone https://github.com/pewdiepie-archdaemon/odysseus.git
|
||||
cd odysseus
|
||||
cp .env.example .env # optional, but recommended for explicit defaults
|
||||
cp .env.example .env
|
||||
docker compose up -d --build
|
||||
```
|
||||
To include optional extras in the image (PDF viewer, Office extraction; includes AGPL PyMuPDF), build with `docker compose build --build-arg INSTALL_OPTIONAL=true` before `up`.
|
||||
|
||||
Open `http://localhost:7000` when the containers are healthy. Docker Compose
|
||||
binds the web UI to `127.0.0.1` by default. If the port is taken, set
|
||||
`APP_PORT=7001` in `.env` and recreate the container. Set `APP_BIND=0.0.0.0`
|
||||
only when you intentionally want LAN/reverse-proxy access.
|
||||
Open `http://localhost:7000` when the containers are healthy. The first admin password is printed in `docker compose logs odysseus`.
|
||||
|
||||
### Native Linux / macOS
|
||||
```bash
|
||||
git clone https://github.com/pewdiepie-archdaemon/odysseus.git
|
||||
cd odysseus
|
||||
python3 -m venv venv
|
||||
source venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
python setup.py
|
||||
python -m uvicorn app:app --host 127.0.0.1 --port 7000
|
||||
```
|
||||
Requirements: Python 3.11+. Cookbook also needs `tmux` for background model
|
||||
downloads and serves. The app itself is lightweight; local model serving is the
|
||||
heavy part and depends on the model, runtime, GPU, and VRAM, so small hosts can
|
||||
connect to API or remote model servers instead. Use `--host 0.0.0.0` only when you intentionally want LAN/reverse-proxy access.
|
||||
Native installs, GPU notes, Windows/macOS instructions, HTTPS, and configuration live in the [setup guide](docs/setup.md).
|
||||
|
||||
### Apple Silicon
|
||||
Docker on macOS cannot use the Metal GPU. For GPU-accelerated Cookbook on an
|
||||
M-series Mac, run Odysseus natively:
|
||||
## Features
|
||||
|
||||
```bash
|
||||
git clone https://github.com/pewdiepie-archdaemon/odysseus.git
|
||||
cd odysseus
|
||||
./start-macos.sh
|
||||
```
|
||||
- **Chat + Agents** — local/API models, tools, MCP, files, shell, skills, and memory.
|
||||
- **Cookbook** — hardware-aware model recommendations, downloads, and serving.
|
||||
- **Deep Research** — multi-step web research with source reading and report generation.
|
||||
- **Compare** — blind side-by-side model testing and synthesis.
|
||||
- **Documents** — writing-first editor with AI edits, suggestions, Markdown, HTML, CSV, and syntax highlighting.
|
||||
- **Email** — IMAP/SMTP inbox with triage, tags, summaries, reminders, and reply drafts.
|
||||
- **Notes, Tasks + Calendar** — reminders, todos, scheduled agent tasks, and CalDAV sync.
|
||||
- **Extras** — gallery/image editor, themes, uploads, web search, presets, sessions, and 2FA.
|
||||
|
||||
It launches at `http://127.0.0.1:7860`. To expose it to your phone over a trusted LAN/VPN such as Tailscale, bind all interfaces:
|
||||
## Demo
|
||||
|
||||
```bash
|
||||
ODYSSEUS_HOST=0.0.0.0 ./start-macos.sh
|
||||
# then open http://<tailscale-ip>:7860
|
||||
```
|
||||
|
||||
The script also reads `.env` at startup, so `APP_BIND=0.0.0.0` and `APP_PORT`
|
||||
set there are picked up automatically without a command-line override each run.
|
||||
|
||||
Keep `AUTH_ENABLED=true` (the default) before binding outside loopback. Do not
|
||||
expose this port directly to the public internet. To build a clickable app wrapper:
|
||||
|
||||
```bash
|
||||
./build-macos-app.sh
|
||||
```
|
||||
|
||||
<details>
|
||||
<summary>Cookbook, GPU, Ollama, and troubleshooting notes</summary>
|
||||
|
||||
**Docker bundled services.** Compose starts Odysseus, ChromaDB, SearXNG, and
|
||||
ntfy. Odysseus and the bundled service ports bind to `127.0.0.1` by default, so
|
||||
they are reachable from the host but not exposed to your LAN/public internet
|
||||
unless you opt in.
|
||||
|
||||
**Cookbook storage in Docker.** Downloads live in `./data/huggingface`
|
||||
(`~/.cache/huggingface` in the container). Cookbook-installed Python CLIs and
|
||||
serve engines live in `./data/local` (`~/.local` in the container), so they
|
||||
survive container recreation.
|
||||
|
||||
**Remote servers.** In **Cookbook -> Settings -> Servers**, generate the
|
||||
Odysseus SSH key and add the public key to the remote server's
|
||||
`~/.ssh/authorized_keys`. From the host you can also run:
|
||||
|
||||
```bash
|
||||
ssh-copy-id -i data/ssh/id_ed25519.pub user@server
|
||||
```
|
||||
|
||||
**Docker GPU overlays.** CPU-only users can skip this section. Cookbook can
|
||||
only detect GPUs that Docker exposes to the container — if the host runtime or
|
||||
device passthrough is not configured, Cookbook sees the iGPU, another card, or
|
||||
CPU instead of your intended GPU.
|
||||
|
||||
For NVIDIA, `scripts/check-docker-gpu.sh` diagnoses GPU passthrough and can
|
||||
optionally install the host runtime or update `.env`.
|
||||
|
||||
```bash
|
||||
# Read-only diagnostic (default — installs nothing, never edits .env):
|
||||
scripts/check-docker-gpu.sh
|
||||
|
||||
# Print OS-specific install commands without running them:
|
||||
scripts/check-docker-gpu.sh --print-install-commands
|
||||
|
||||
# Install NVIDIA Container Toolkit on Ubuntu/Debian (requires sudo):
|
||||
scripts/check-docker-gpu.sh --install-nvidia-toolkit
|
||||
|
||||
# Write COMPOSE_FILE to .env (only when GPU passthrough is confirmed working):
|
||||
scripts/check-docker-gpu.sh --enable-nvidia-overlay
|
||||
|
||||
# Full assisted setup — install toolkit, then enable overlay if passthrough works:
|
||||
scripts/check-docker-gpu.sh --install-nvidia-toolkit --enable-nvidia-overlay
|
||||
```
|
||||
|
||||
Safety notes:
|
||||
- The app never installs host GPU runtime automatically.
|
||||
- The app never edits `.env` automatically.
|
||||
- `.env` is only modified when `--enable-nvidia-overlay` is explicitly passed,
|
||||
and only after GPU passthrough succeeds. `--yes` skips prompts but does not
|
||||
bypass the passthrough gate.
|
||||
- `.env.bak.*` backups created by `--enable-nvidia-overlay` are ignored by
|
||||
Git and the Docker build context.
|
||||
|
||||
To enable manually without the script, add this to `.env`:
|
||||
|
||||
```bash
|
||||
COMPOSE_FILE=docker-compose.yml:docker/gpu.nvidia.yml
|
||||
```
|
||||
|
||||
**AMD / ROCm.** AMD setup is read-only diagnostic plus manual `.env` edit. Run:
|
||||
|
||||
```bash
|
||||
scripts/check-docker-amd-gpu.sh
|
||||
```
|
||||
|
||||
Then add the reported values to `.env`, replacing `RENDER_GID` with your host's
|
||||
numeric render group id:
|
||||
|
||||
```bash
|
||||
COMPOSE_FILE=docker-compose.yml:docker/gpu.amd.yml
|
||||
RENDER_GID=989
|
||||
```
|
||||
|
||||
For NVIDIA/AMD GPU support, also read the comments in the selected overlay file: docker/gpu.nvidia.yml or docker/gpu.amd.yml.
|
||||
|
||||
**Stack-management UIs (Portainer, Coolify, Dockhand, etc.).** These tools
|
||||
often accept only a single Compose file and do not reliably honor `COMPOSE_FILE`
|
||||
or multiple `-f` overlays. CLI users should keep using the `COMPOSE_FILE`
|
||||
overlay workflow above. For stack UIs, point the stack at one of the standalone
|
||||
files instead, which bundle the base stack plus the GPU settings:
|
||||
|
||||
- `docker-compose.gpu-nvidia.yml` — still requires the NVIDIA Container Toolkit
|
||||
on the host.
|
||||
- `docker-compose.gpu-amd.yml` — still requires host ROCm/kfd/DRI setup, the
|
||||
`video`/`render` group membership, and `RENDER_GID` when needed.
|
||||
|
||||
The base `docker-compose.yml` plus the `docker/gpu.*.yml` overlays remain the
|
||||
source of truth; the standalone files mirror them for single-file deployments.
|
||||
|
||||
Verify after enabling either overlay:
|
||||
|
||||
```bash
|
||||
docker compose exec odysseus nvidia-smi -L # NVIDIA
|
||||
docker compose exec odysseus sh -lc 'test -e /dev/kfd && test -d /dev/dri && ls -l /dev/kfd /dev/dri/renderD*' # AMD
|
||||
```
|
||||
|
||||
> **GPU passthrough ≠ llama.cpp CUDA.** `nvidia-smi` passing inside the
|
||||
> container confirms Docker GPU access, but llama.cpp also needs `cudart` and
|
||||
> the CUDA Toolkit at runtime. If Cookbook logs show `Unable to find cudart
|
||||
> library`, `Could NOT find CUDAToolkit`, `CUDA Toolkit not found`, or
|
||||
> tensors/layers assigned to CPU, that is a Cookbook/llama.cpp build issue —
|
||||
> not a Docker passthrough failure. Reinstall the serve engine via
|
||||
> **Cookbook → Dependencies** to get a CUDA-enabled build.
|
||||
>
|
||||
> The same split applies to AMD/ROCm: seeing `/dev/kfd` and `/dev/dri` inside
|
||||
> the container confirms device passthrough, not ROCm userspace or a
|
||||
> ROCm-enabled vLLM/llama.cpp build. `rocm-smi` and `rocminfo` are not expected
|
||||
> inside the slim Odysseus image.
|
||||
|
||||
**Ollama with Docker.** If Ollama runs on the host, add this endpoint in
|
||||
Settings:
|
||||
|
||||
```text
|
||||
http://host.docker.internal:11434/v1
|
||||
```
|
||||
|
||||
Ollama must listen outside its own loopback interface:
|
||||
|
||||
```bash
|
||||
OLLAMA_HOST=0.0.0.0:11434 ollama serve
|
||||
```
|
||||
|
||||
This connects Odysseus in Docker to an Ollama server that is already running on
|
||||
your host machine; it does not start Ollama inside the container.
|
||||
`host.docker.internal` is Docker's hostname for the host machine from inside the
|
||||
container. Cookbook **Serve** is a separate workflow for serving downloaded
|
||||
models through Odysseus/llama.cpp, so Windows users with an existing Ollama
|
||||
install usually only need to add the endpoint in Settings.
|
||||
|
||||
**Useful checks.**
|
||||
|
||||
```bash
|
||||
docker compose ps
|
||||
docker compose logs --tail=120 odysseus
|
||||
docker compose logs odysseus | grep -E 'ChromaDB|MemoryVectorStore|DEGRADED'
|
||||
```
|
||||
|
||||
**macOS details.** `start-macos.sh` installs Homebrew deps, creates the venv,
|
||||
runs setup, and starts uvicorn on port `7860` because AirPlay often holds
|
||||
`7000`. It uses llama.cpp/Ollama for Metal. vLLM/SGLang are CUDA/ROCm-only and
|
||||
do not run on macOS. MLX-only models are not served by Odysseus.
|
||||
|
||||
</details>
|
||||
|
||||
### Native Windows
|
||||
|
||||
**One-command launcher** (creates the venv, installs deps, runs setup, starts the
|
||||
server; safe to re-run):
|
||||
|
||||
```powershell
|
||||
git clone https://github.com/pewdiepie-archdaemon/odysseus.git
|
||||
cd odysseus
|
||||
powershell -ExecutionPolicy Bypass -File .\launch-windows.ps1
|
||||
```
|
||||
|
||||
Or do it by hand:
|
||||
|
||||
```powershell
|
||||
git clone https://github.com/pewdiepie-archdaemon/odysseus.git
|
||||
cd odysseus
|
||||
py -3.11 -m venv venv
|
||||
venv\Scripts\Activate.ps1
|
||||
pip install -r requirements.txt
|
||||
python setup.py
|
||||
python -m uvicorn app:app --host 127.0.0.1 --port 7000
|
||||
```
|
||||
|
||||
If `python` points at an older interpreter, use `py -3.12` (or another installed
|
||||
3.11+ version) for the venv step.
|
||||
|
||||
**Requirements:** Python 3.11+. The core app (chat, agent, memory, documents,
|
||||
email, calendar, deep research) runs fully native. For full **Cookbook** background
|
||||
model downloads and the agent shell tool, also install
|
||||
[Git for Windows](https://git-scm.com/download/win) (provides `bash.exe`).
|
||||
Local GPU *serving* of vLLM/SGLang needs Linux/WSL2; for a local model on Windows,
|
||||
[Ollama](https://ollama.com/download) is the easiest path — point Odysseus at
|
||||
`http://localhost:11434/v1` in Settings.
|
||||
|
||||
Open `http://localhost:7000`, log in with the generated admin password,
|
||||
and configure everything else inside **Settings**.
|
||||
|
||||
## Troubleshooting & Advanced Setup
|
||||
|
||||
### `chromadb-client` conflicts with embedded ChromaDB
|
||||
If `chromadb-client` (the lightweight HTTP-only package) is installed alongside the full `chromadb` package, Odysseus starts but ChromaDB silently falls back to HTTP-only mode and fails.
|
||||
|
||||
**Fix:** uninstall `chromadb-client` and force-reinstall the full package:
|
||||
```bash
|
||||
./venv/bin/pip uninstall chromadb-client -y
|
||||
./venv/bin/pip install --force-reinstall chromadb
|
||||
```
|
||||
|
||||
### HTTPS + LAN/Tailscale exposure
|
||||
To expose Odysseus on a local network or Tailscale with HTTPS:
|
||||
1. Change the bind address to `0.0.0.0` in `.env` (`APP_BIND=0.0.0.0` or `ODYSSEUS_HOST=0.0.0.0`).
|
||||
2. Generate a locally-trusted cert for your LAN/Tailscale IPs using [mkcert](https://github.com/FiloSottile/mkcert):
|
||||
```bash
|
||||
mkcert -install
|
||||
mkcert -cert-file cert.pem -key-file key.pem 192.168.1.100 tailscale-ip
|
||||
```
|
||||
3. Run `uvicorn` with the generated certs:
|
||||
```bash
|
||||
python -m uvicorn app:app --host 0.0.0.0 --port 7000 --ssl-certfile=cert.pem --ssl-keyfile=key.pem
|
||||
```
|
||||
4. Install the `mkcert` CA on any other device you want to access Odysseus from (e.g., for iOS, email the `rootCA.pem` to yourself, install the profile, and trust it in Certificate Trust Settings).
|
||||
|
||||
### Optional Dependencies
|
||||
`requirements-optional.txt` contains packages that unlock extra features. It is not installed by default.
|
||||
|
||||
| Package | Feature unlocked |
|
||||
|---------|-----------------|
|
||||
| `faster-whisper` | Local speech-to-text (microphone -> text) via the "local" STT provider. |
|
||||
| `ddgs` | DuckDuckGo as a search provider option. |
|
||||
| `PyMuPDF` | PDF page rendering in the side viewer panel and form-filling. (Note: AGPL-3.0) |
|
||||
| `markitdown` | Office/EPUB document text extraction (converts .docx/.xlsx/.pptx/.xls/.epub to Markdown). |
|
||||
|
||||
### Outlook / Office 365 email
|
||||
Odysseus email accounts currently use IMAP/SMTP username-password auth. Outlook
|
||||
and Microsoft 365 generally require OAuth instead, so normal Microsoft mailbox
|
||||
passwords will fail. See [docs/email-outlook.md](docs/email-outlook.md) for the
|
||||
current limitation and the planned integration direction.
|
||||
|
||||
## Security Notes
|
||||
Odysseus is a self-hosted workspace with powerful local tools: shell access, file uploads, model downloads, web research, email/calendar integrations, and API tokens. Treat it like an admin console.
|
||||
|
||||
- Keep `AUTH_ENABLED=true` for any network-accessible deployment.
|
||||
- Keep `LOCALHOST_BYPASS=false` outside local development.
|
||||
- Use `SECURE_COOKIES=true` when Odysseus is served through HTTPS by a trusted reverse proxy or private access gateway.
|
||||
- Do not expose it directly to the public internet without HTTPS and a trusted reverse proxy or private access layer.
|
||||
- Keep `.env`, `data/`, `logs/`, databases, uploads, generated media, backups, auth/session files, API keys, and model/provider tokens out of Git and private shares. They are ignored by default.
|
||||
- Review `data/auth.json` after first boot: disable open signup unless you intentionally want it, make only your own account admin, and keep demo/test accounts non-admin.
|
||||
- Non-admin users do not get shell/Python/file read/write by default, and admin-only routes/tools such as MCP management, API tokens, webhooks, model/cookbook serving, backup/vault, and app settings are admin-gated. Other features are controlled by per-user privileges, so review each user's privileges before exposing a deployment.
|
||||
- Rotate any API keys or tokens that were ever pasted into a shared chat, demo, screenshot, or log.
|
||||
- If you enable API tokens or webhooks, create separate tokens per integration and delete unused ones.
|
||||
- Prefer binding manual development runs to `127.0.0.1`; bind to `0.0.0.0` only when you intentionally want LAN/reverse-proxy access.
|
||||
- Keep ChromaDB, SearXNG, ntfy, Ollama, vLLM, llama.cpp, databases, and raw model/provider APIs internal-only. Expose only the authenticated Odysseus web/API entrypoint through your trusted proxy or private access layer.
|
||||
- Before publishing a fork, run `git status --short` and confirm no private files from `.env`, `data/`, `logs/`, uploads, backups, or local databases are staged.
|
||||
|
||||
### Private or proxied deployments
|
||||
Odysseus serves plain HTTP on its app port. Docker Compose binds Odysseus and the bundled services to `127.0.0.1` by default, so a typical production/private setup is:
|
||||
|
||||
1. Keep Odysseus on localhost, for example `127.0.0.1:7000`.
|
||||
2. Terminate HTTPS at a trusted reverse proxy or private access gateway.
|
||||
3. Put the authenticated Odysseus web/API entrypoint behind that layer.
|
||||
4. Keep raw service and model ports internal-only.
|
||||
|
||||
Cloudflare Access, Tailscale, Caddy, nginx, and Traefik can all fit this pattern; none are required by Odysseus. If your access layer reaches Odysseus on the same host, proxy to `http://127.0.0.1:7000` and keep `AUTH_ENABLED=true`, `LOCALHOST_BYPASS=false`, and `SECURE_COOKIES=true`.
|
||||
|
||||
Common internal-only ports from the default docs/compose setup:
|
||||
|
||||
| Port | Service |
|
||||
|---|---|
|
||||
| `7000` | Odysseus raw app port |
|
||||
| `8080` | SearXNG |
|
||||
| `8091` | ntfy |
|
||||
| `8100` | ChromaDB host port for manual/compose access |
|
||||
| `11434` | Ollama |
|
||||
| `8000-8020` | Common local model/provider APIs |
|
||||
A full hover-to-play tour lives on the landing page: [`docs/index.html`](docs/index.html).
|
||||
|
||||
## Contributing
|
||||
Help is welcome. The best entry points are fresh-install testing, provider setup
|
||||
bugs, mobile/editor polish, docs, and small focused refactors. See
|
||||
[ROADMAP.md](ROADMAP.md) for the current help-wanted list.
|
||||
|
||||
## Configuration
|
||||
Most setup is done inside the app with `/setup` or **Settings**. Use `.env`
|
||||
for deployment-level defaults and secrets you want present before first boot.
|
||||
Key settings:
|
||||
Help is welcome. The best entry points are fresh-install testing, provider setup bugs, mobile/editor polish, docs, and small focused refactors. See [CONTRIBUTING.md](CONTRIBUTING.md) and [ROADMAP.md](ROADMAP.md).
|
||||
|
||||
| Variable | Default | Description |
|
||||
|---|---|---|
|
||||
| `LLM_HOST` | `localhost` | Your LLM server (e.g. `llm-host.local:8000`) |
|
||||
| `LLM_HOSTS` | -- | Comma-separated list for model discovery |
|
||||
| `OPENAI_API_KEY` | -- | Optional OpenAI key. Prefer adding providers in the app unless pre-seeding. |
|
||||
| `SEARXNG_INSTANCE` | `http://localhost:8080` | SearXNG URL. Docker overrides this to `http://searxng:8080`. |
|
||||
| `SEARXNG_SECRET` | generated on first Docker boot | Optional SearXNG cookie/CSRF secret. Leave blank unless you need to pin it. |
|
||||
| `APP_BIND` | `127.0.0.1` | Docker Compose host bind address for the web UI. Use `0.0.0.0` only for intentional LAN/reverse-proxy access. |
|
||||
| `APP_PORT` | `7000` | Docker Compose host port for the web UI. |
|
||||
| `AUTH_ENABLED` | `true` | Enable/disable login |
|
||||
| `LOCALHOST_BYPASS` | `false` | Development-only auth bypass for loopback requests. Keep false for shared/network deployments. |
|
||||
| `SECURE_COOKIES` | `false` | Set true when serving Odysseus through HTTPS at a trusted proxy or private access gateway. |
|
||||
| `DATABASE_URL` | `sqlite:///./data/app.db` | Database connection string |
|
||||
| `CHROMADB_HOST` | `localhost` | ChromaDB host for vector memory. Docker overrides this to `chromadb`. |
|
||||
| `CHROMADB_PORT` | `8100` | ChromaDB port for manual host runs. Docker overrides this to `8000`. |
|
||||
| `EMBEDDING_URL` | -- | OpenAI-compatible embeddings endpoint |
|
||||
| `ODYSSEUS_CHAT_UPLOAD_MAX_BYTES` | `10485760` | Chat/agent attachment cap in bytes. Raise for larger local PDFs or text documents. |
|
||||
| `ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES` | `104857600` | Gallery image upload cap in bytes (100 MB). |
|
||||
| `ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES` | `26214400` | Gallery transform input cap in bytes (25 MB). |
|
||||
| `ODYSSEUS_MEMORY_IMPORT_MAX_BYTES` | `10485760` | Memory import file cap in bytes (10 MB). |
|
||||
| `ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES` | `26214400` | Personal document upload cap in bytes (25 MB). |
|
||||
| `ODYSSEUS_EMAIL_COMPOSE_UPLOAD_MAX_BYTES` | `26214400` | Email compose attachment cap in bytes (25 MB). |
|
||||
| `ODYSSEUS_STT_MAX_AUDIO_BYTES` | `26214400` | Speech-to-text audio cap in bytes (25 MB). |
|
||||
| `ODYSSEUS_ICS_MAX_BYTES` | `10485760` | Calendar `.ics` import cap in bytes (10 MB). |
|
||||
## Security
|
||||
|
||||
All upload-limit vars are validated (must be a positive integer) and optional; an invalid value fails fast at startup.
|
||||
|
||||
### Built-in MCP servers (optional setup)
|
||||
|
||||
Odysseus auto-registers a few built-in MCP servers at startup. The npx-based ones (currently the browser server, `@playwright/mcp`) only start when their npm package is already in the local npx cache. If a package isn't cached, that server is skipped with a startup log message explaining what to do, so a fresh install does not block on a multi-minute npm download or hang if Playwright system deps are missing.
|
||||
|
||||
To enable the browser MCP (page navigation, screenshots, vision), run once:
|
||||
|
||||
```bash
|
||||
npx -y @playwright/mcp@latest --version
|
||||
```
|
||||
|
||||
That installs `@playwright/mcp` plus Playwright (~300MB total). Restart Odysseus and the server will register at startup.
|
||||
|
||||
## Architecture
|
||||
```
|
||||
app.py # FastAPI entry point
|
||||
core/ auth, database, middleware, constants
|
||||
src/ llm_core, agent_loop, agent_tools, chat_processor, search/
|
||||
routes/ chat, session, document, memory, model … endpoints
|
||||
services/ docs, memory, search, hwfit (Cookbook) …
|
||||
static/ index.html + app.js + style.css + js/ (modular front-end)
|
||||
docs/ landing page (index.html) + preview clips
|
||||
```
|
||||
|
||||
## Data
|
||||
All user data lives in `data/` (gitignored): `app.db` (sessions, messages, documents),
|
||||
`memory.json`, `presets.json`, `uploads/`, `personal_docs/`, `chroma/`, `settings.json`.
|
||||
Odysseus is a self-hosted workspace with powerful local tools. Keep auth enabled, keep private data out of Git, and do not expose raw model/service ports publicly. Deployment details are in the [setup guide](docs/setup.md#security-notes).
|
||||
|
||||
## Star History
|
||||
|
||||
@@ -451,19 +72,5 @@ All user data lives in `data/` (gitignored): `app.db` (sessions, messages, docum
|
||||
</a>
|
||||
|
||||
## License
|
||||
AGPL-3.0-or-later -- see [LICENSE](LICENSE) and [ACKNOWLEDGMENTS.md](ACKNOWLEDGMENTS.md).
|
||||
|
||||
```
|
||||
|
|
||||
|||
|
||||
|||||
|
||||
| | | |||||||
|
||||
)_) )_) )_) ~|~
|
||||
)___))___))___)\ |
|
||||
)____)____)_____)\\|
|
||||
_____|____|____|_____\\\__
|
||||
\ /
|
||||
~^~^~~^~^~~^~^~~^~^~~^~^~~^~^~~^~^~~^~^~
|
||||
~^~ all aboard! ~^~
|
||||
~^~^~~^~^~~^~^~~^~^~~^~^~~^~^~~^~^~~^~^~
|
||||
```
|
||||
AGPL-3.0-or-later -- see [LICENSE](LICENSE) and [ACKNOWLEDGMENTS.md](ACKNOWLEDGMENTS.md).
|
||||
|
||||
@@ -69,10 +69,37 @@ from src.generated_images import GENERATED_IMAGE_HEADERS, resolve_generated_imag
|
||||
from starlette.responses import RedirectResponse
|
||||
|
||||
# ========= LOGGING =========
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
)
|
||||
import logging.handlers
|
||||
from core.constants import DATA_DIR
|
||||
|
||||
_root_logger = logging.getLogger()
|
||||
_root_logger.setLevel(logging.INFO)
|
||||
_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
|
||||
# Clear existing handlers to avoid duplicates
|
||||
for _h in list(_root_logger.handlers):
|
||||
_root_logger.removeHandler(_h)
|
||||
|
||||
_console_h = logging.StreamHandler()
|
||||
_console_h.setFormatter(_formatter)
|
||||
_root_logger.addHandler(_console_h)
|
||||
|
||||
try:
|
||||
_log_dir = os.path.join(DATA_DIR, "logs")
|
||||
os.makedirs(_log_dir, exist_ok=True)
|
||||
_log_file = os.path.join(_log_dir, "app.log")
|
||||
|
||||
# RotatingFileHandler is not multi-process safe (e.g. if uvicorn is run with --workers N).
|
||||
# Odysseus is single-process by convention, so this is acceptable, but be aware that
|
||||
# concurrent log rotation issues can arise if multiple workers are configured.
|
||||
_file_h = logging.handlers.RotatingFileHandler(
|
||||
_log_file, maxBytes=5 * 1024 * 1024, backupCount=3, encoding="utf-8"
|
||||
)
|
||||
_file_h.setFormatter(_formatter)
|
||||
_root_logger.addHandler(_file_h)
|
||||
except Exception as e:
|
||||
_root_logger.warning(f"Failed to initialize file logging handler (falling back to console-only): {e}")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ========= APP =========
|
||||
@@ -140,6 +167,7 @@ _TIMEOUT_EXEMPT_PREFIXES = (
|
||||
"/api/cookbook/setup", # remote pacman/apt installs
|
||||
"/api/upload", # large files
|
||||
"/api/image", # diffusion proxies (inpaint/harmonize/upscale/etc.) — own 120s httpx timeout
|
||||
"/api/memory/audit", # retains own 120s LLM inactivity timeout
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ Authentication module — multi-user password hashing, session tokens, config pe
|
||||
Config stored in data/auth.json. Uses bcrypt directly.
|
||||
"""
|
||||
|
||||
import enum
|
||||
import json
|
||||
import os
|
||||
import secrets
|
||||
@@ -83,6 +84,15 @@ def _verify_password(password: str, hashed: str) -> bool:
|
||||
return bcrypt.checkpw(password.encode("utf-8"), hashed.encode("utf-8"))
|
||||
|
||||
|
||||
class SetAdminResult(enum.Enum):
|
||||
"""Outcome of AuthManager.set_admin, so callers can map each case to a
|
||||
precise response instead of guessing from a bare bool."""
|
||||
OK = "ok"
|
||||
USER_NOT_FOUND = "user_not_found"
|
||||
NOT_AUTHORIZED = "not_authorized" # requester is not an admin
|
||||
LAST_ADMIN = "last_admin" # would remove the last remaining admin
|
||||
|
||||
|
||||
class AuthManager:
|
||||
"""Manages multi-user password + session-token auth system."""
|
||||
|
||||
@@ -387,6 +397,69 @@ class AuthManager:
|
||||
logger.info(f"Updated privileges for '{username}': {current}")
|
||||
return True
|
||||
|
||||
def set_admin(self, username: str, is_admin: bool,
|
||||
requesting_user: str) -> SetAdminResult:
|
||||
"""Promote/demote an existing user to/from admin. Admin only.
|
||||
|
||||
Refuses to remove the last remaining admin so the instance can never
|
||||
be locked out of admin access; self-demotion is allowed as long as
|
||||
another admin remains. Admin status is re-checked live on every
|
||||
request, so unlike delete/rename no session or token revocation is
|
||||
needed — a demoted admin simply fails the next is_admin() gate.
|
||||
|
||||
Promotion stashes the user's current privilege map and demotion
|
||||
restores it, so a temporary admin stint can't silently broaden a
|
||||
user's non-admin access; users without a stash (created as admin,
|
||||
or promoted before stashing existed) demote to DEFAULT_PRIVILEGES.
|
||||
|
||||
Counting admins and flipping the flag happen in one critical section
|
||||
so two concurrent demotions can't race the admin count to zero.
|
||||
"""
|
||||
username = (username or "").strip().lower()
|
||||
requesting_user = (requesting_user or "").strip().lower()
|
||||
is_admin = bool(is_admin)
|
||||
with self._config_lock:
|
||||
target = self._config.get("users", {}).get(username)
|
||||
if target is None:
|
||||
return SetAdminResult.USER_NOT_FOUND
|
||||
if not self.users.get(requesting_user, {}).get("is_admin"):
|
||||
return SetAdminResult.NOT_AUTHORIZED
|
||||
currently_admin = bool(target.get("is_admin"))
|
||||
if currently_admin == is_admin:
|
||||
return SetAdminResult.OK # no-op; leave privileges untouched
|
||||
if currently_admin and not is_admin:
|
||||
admin_count = sum(1 for d in self.users.values() if d.get("is_admin"))
|
||||
if admin_count <= 1:
|
||||
return SetAdminResult.LAST_ADMIN
|
||||
# Write order matters for lock-free readers: get_privileges()
|
||||
# reads without _config_lock and trusts is_admin, so the admin
|
||||
# flag must be flipped while the stored map is safe to expose —
|
||||
# before writing admin privileges on promote, after restoring
|
||||
# the pre-admin map on demote.
|
||||
if is_admin:
|
||||
target["is_admin"] = True
|
||||
# Stash the pre-admin map so a later demotion can restore it.
|
||||
# While is_admin is set the stored map is inert: get_privileges
|
||||
# short-circuits to ADMIN_PRIVILEGES and set_privileges refuses
|
||||
# admins, so only set_admin ever touches the stash.
|
||||
target["privileges_before_admin"] = dict(
|
||||
target.get("privileges") or DEFAULT_PRIVILEGES
|
||||
)
|
||||
target["privileges"] = dict(ADMIN_PRIVILEGES)
|
||||
else:
|
||||
# Restore the stashed pre-admin map. Fall back to defaults for
|
||||
# users created as admins (their stored map is ADMIN_PRIVILEGES,
|
||||
# which must not leak past demotion — e.g. can_use_bash) and
|
||||
# for admins promoted before the stash existed.
|
||||
target["privileges"] = dict(
|
||||
target.pop("privileges_before_admin", None)
|
||||
or DEFAULT_PRIVILEGES
|
||||
)
|
||||
target["is_admin"] = False
|
||||
self._save()
|
||||
logger.info("Set is_admin=%s for '%s' (by '%s')", is_admin, username, requesting_user)
|
||||
return SetAdminResult.OK
|
||||
|
||||
def change_password(self, username: str, current_password: str, new_password: str) -> bool:
|
||||
username = username.strip().lower()
|
||||
if username not in self.users:
|
||||
|
||||
@@ -324,6 +324,13 @@ class EmailAccount(TimestampMixin, Base):
|
||||
smtp_password = Column(String, default="")
|
||||
|
||||
from_address = Column(String, default="")
|
||||
display_name = Column(String, nullable=True) # "Hriday Ranka" — used in From: header
|
||||
|
||||
# OAuth2 (Google / Google Workspace). Tokens stored encrypted via secret_storage.
|
||||
oauth_provider = Column(String, nullable=True) # "google" or None
|
||||
oauth_access_token = Column(String, nullable=True) # encrypted
|
||||
oauth_refresh_token = Column(String, nullable=True) # encrypted
|
||||
oauth_token_expiry = Column(String, nullable=True) # unix timestamp string
|
||||
|
||||
__table_args__ = (
|
||||
Index('ix_email_accounts_owner_default', 'owner', 'is_default'),
|
||||
@@ -1427,6 +1434,25 @@ def _migrate_add_task_automation_columns():
|
||||
except Exception as e:
|
||||
logging.getLogger(__name__).warning(f"task automation migration: {e}")
|
||||
|
||||
def _migrate_add_email_oauth_columns():
|
||||
"""Add Google OAuth and display_name columns to email_accounts if missing."""
|
||||
try:
|
||||
with engine.connect() as conn:
|
||||
cols = [r[1] for r in conn.execute(text("PRAGMA table_info(email_accounts)"))]
|
||||
for col, typedef in [
|
||||
("oauth_provider", "TEXT"),
|
||||
("oauth_access_token", "TEXT"),
|
||||
("oauth_refresh_token", "TEXT"),
|
||||
("oauth_token_expiry", "TEXT"),
|
||||
("display_name", "TEXT"),
|
||||
]:
|
||||
if col not in cols:
|
||||
conn.execute(text(f"ALTER TABLE email_accounts ADD COLUMN {col} {typedef}"))
|
||||
conn.commit()
|
||||
except Exception as e:
|
||||
logging.getLogger(__name__).warning(f"email oauth columns migration: {e}")
|
||||
|
||||
|
||||
def _migrate_add_oauth_config():
|
||||
"""Add oauth_config column to mcp_servers table if missing."""
|
||||
try:
|
||||
@@ -1602,6 +1628,7 @@ class CalendarCal(TimestampMixin, Base):
|
||||
# NULL for local calendars and for CalDAV calendars created before
|
||||
# multi-account support was added (treated as "use any configured account").
|
||||
account_id = Column(String, nullable=True, index=True)
|
||||
caldav_base_url = Column(String, nullable=True)
|
||||
|
||||
events = relationship("CalendarEvent", back_populates="calendar", cascade="all, delete-orphan")
|
||||
|
||||
@@ -1632,10 +1659,27 @@ class CalendarEvent(TimestampMixin, Base):
|
||||
# vanishes upstream). NULL/local = created locally (agent, email triage, or
|
||||
# a UI event whose write-back failed) and must NOT be pruned by the sync.
|
||||
origin = Column(String, nullable=True, index=True)
|
||||
remote_href = Column(String, nullable=True) # CalDAV object URL for updates/deletes
|
||||
remote_etag = Column(String, nullable=True) # Last seen CalDAV ETag, when available
|
||||
caldav_sync_pending = Column(String, nullable=True) # create | update | delete retry marker
|
||||
|
||||
calendar = relationship("CalendarCal", back_populates="events")
|
||||
|
||||
|
||||
class CalendarDeletedEvent(TimestampMixin, Base):
|
||||
"""Hidden CalDAV delete tombstone retained until remote delete succeeds."""
|
||||
__tablename__ = "caldav_deleted_events"
|
||||
|
||||
uid = Column(String, primary_key=True, index=True)
|
||||
owner = Column(String, nullable=True, index=True)
|
||||
calendar_id = Column(String, nullable=True, index=True)
|
||||
remote_href = Column(String, nullable=True)
|
||||
remote_etag = Column(String, nullable=True)
|
||||
caldav_base_url = Column(String, nullable=True)
|
||||
summary = Column(String, nullable=True)
|
||||
last_error = Column(Text, nullable=True)
|
||||
|
||||
|
||||
class Integration(TimestampMixin, Base):
|
||||
"""An external service connection (email, RSS, webhook, etc.)."""
|
||||
__tablename__ = "integrations"
|
||||
@@ -1753,6 +1797,7 @@ def init_db():
|
||||
_migrate_add_tidy_verdict()
|
||||
_migrate_add_doc_source_email_cols()
|
||||
_migrate_add_oauth_config()
|
||||
_migrate_add_email_oauth_columns()
|
||||
_migrate_add_task_automation_columns()
|
||||
_migrate_add_disabled_tools()
|
||||
_migrate_add_mcp_oauth_tokens_column()
|
||||
@@ -1767,6 +1812,7 @@ def init_db():
|
||||
_migrate_add_calendar_is_utc()
|
||||
_migrate_add_calendar_origin()
|
||||
_migrate_add_calendar_account_id()
|
||||
_migrate_add_caldav_sync_columns()
|
||||
_migrate_chat_messages_fts()
|
||||
_migrate_encrypt_email_passwords()
|
||||
_migrate_encrypt_signatures()
|
||||
@@ -2067,6 +2113,31 @@ def _migrate_add_calendar_account_id():
|
||||
pass
|
||||
|
||||
|
||||
def _migrate_add_caldav_sync_columns():
|
||||
"""Add remote CalDAV metadata used for bidirectional sync."""
|
||||
import sqlite3
|
||||
db_path = DATABASE_URL.replace("sqlite:///", "")
|
||||
if not os.path.exists(db_path):
|
||||
return
|
||||
try:
|
||||
conn = sqlite3.connect(db_path)
|
||||
ev_columns = [row[1] for row in conn.execute("PRAGMA table_info(calendar_events)").fetchall()]
|
||||
if ev_columns and "remote_href" not in ev_columns:
|
||||
conn.execute("ALTER TABLE calendar_events ADD COLUMN remote_href TEXT")
|
||||
if ev_columns and "remote_etag" not in ev_columns:
|
||||
conn.execute("ALTER TABLE calendar_events ADD COLUMN remote_etag TEXT")
|
||||
if ev_columns and "caldav_sync_pending" not in ev_columns:
|
||||
conn.execute("ALTER TABLE calendar_events ADD COLUMN caldav_sync_pending TEXT")
|
||||
|
||||
cal_columns = [row[1] for row in conn.execute("PRAGMA table_info(calendars)").fetchall()]
|
||||
if cal_columns and "caldav_base_url" not in cal_columns:
|
||||
conn.execute("ALTER TABLE calendars ADD COLUMN caldav_base_url TEXT")
|
||||
conn.commit()
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
logging.getLogger(__name__).warning(f"CalDAV sync metadata migration failed: {e}")
|
||||
|
||||
|
||||
def _migrate_add_calendar_metadata():
|
||||
"""Add importance/event_type/last_pinged columns to calendar_events table."""
|
||||
import sqlite3
|
||||
|
||||
@@ -16,18 +16,18 @@ services:
|
||||
ports:
|
||||
- "${APP_BIND:-127.0.0.1}:${APP_PORT:-7000}:7000"
|
||||
volumes:
|
||||
- ./data:/app/data:z
|
||||
- ./logs:/app/logs:z
|
||||
- ${APP_DATA_DIR:-./data}:/app/data:z
|
||||
- ${APP_LOGS_DIR:-./logs}:/app/logs:z
|
||||
# Cookbook remote-server SSH identity. Odysseus can generate a key here;
|
||||
# add the shown public key to each remote server's authorized_keys.
|
||||
- ./data/ssh:/app/.ssh:z
|
||||
- ${APP_DATA_DIR:-./data}/ssh:/app/.ssh:z
|
||||
# Cookbook local model cache. Inside Docker, "Local" means the Odysseus
|
||||
# container, so persist its HuggingFace cache under ./data/huggingface.
|
||||
- ./data/huggingface:/app/.cache/huggingface:z
|
||||
- ${APP_DATA_DIR:-./data}/huggingface:/app/.cache/huggingface:z
|
||||
# Cookbook-installed Python CLIs/packages (vLLM, llama-cpp-python, etc.)
|
||||
# land under /app/.local for the odysseus user. Persist them so a
|
||||
# container recreate does not silently remove installed serve engines.
|
||||
- ./data/local:/app/.local:z
|
||||
- ${APP_DATA_DIR:-./data}/local:/app/.local:z
|
||||
extra_hosts:
|
||||
# Lets the container reach local services on the Docker host, including
|
||||
# Ollama at http://host.docker.internal:11434.
|
||||
|
||||
@@ -15,18 +15,18 @@ services:
|
||||
ports:
|
||||
- "${APP_BIND:-127.0.0.1}:${APP_PORT:-7000}:7000"
|
||||
volumes:
|
||||
- ./data:/app/data:z
|
||||
- ./logs:/app/logs:z
|
||||
- ${APP_DATA_DIR:-./data}:/app/data:z
|
||||
- ${APP_LOGS_DIR:-./logs}:/app/logs:z
|
||||
# Cookbook remote-server SSH identity. Odysseus can generate a key here;
|
||||
# add the shown public key to each remote server's authorized_keys.
|
||||
- ./data/ssh:/app/.ssh:z
|
||||
- ${APP_DATA_DIR:-./data}/ssh:/app/.ssh:z
|
||||
# Cookbook local model cache. Inside Docker, "Local" means the Odysseus
|
||||
# container, so persist its HuggingFace cache under ./data/huggingface.
|
||||
- ./data/huggingface:/app/.cache/huggingface:z
|
||||
- ${APP_DATA_DIR:-./data}/huggingface:/app/.cache/huggingface:z
|
||||
# Cookbook-installed Python CLIs/packages (vLLM, llama-cpp-python, etc.)
|
||||
# land under /app/.local for the odysseus user. Persist them so a
|
||||
# container recreate does not silently remove installed serve engines.
|
||||
- ./data/local:/app/.local:z
|
||||
- ${APP_DATA_DIR:-./data}/local:/app/.local:z
|
||||
extra_hosts:
|
||||
# Lets the container reach local services on the Docker host, including
|
||||
# Ollama at http://host.docker.internal:11434.
|
||||
|
||||
@@ -4,18 +4,18 @@ services:
|
||||
ports:
|
||||
- "${APP_BIND:-127.0.0.1}:${APP_PORT:-7000}:7000"
|
||||
volumes:
|
||||
- ./data:/app/data:z
|
||||
- ./logs:/app/logs:z
|
||||
- ${APP_DATA_DIR:-./data}:/app/data:z
|
||||
- ${APP_LOGS_DIR:-./logs}:/app/logs:z
|
||||
# Cookbook remote-server SSH identity. Odysseus can generate a key here;
|
||||
# add the shown public key to each remote server's authorized_keys.
|
||||
- ./data/ssh:/app/.ssh:z
|
||||
- ${APP_DATA_DIR:-./data}/ssh:/app/.ssh:z
|
||||
# Cookbook local model cache. Inside Docker, "Local" means the Odysseus
|
||||
# container, so persist its HuggingFace cache under ./data/huggingface.
|
||||
- ./data/huggingface:/app/.cache/huggingface:z
|
||||
- ${APP_DATA_DIR:-./data}/huggingface:/app/.cache/huggingface:z
|
||||
# Cookbook-installed Python CLIs/packages (vLLM, llama-cpp-python, etc.)
|
||||
# land under /app/.local for the odysseus user. Persist them so a
|
||||
# container recreate does not silently remove installed serve engines.
|
||||
- ./data/local:/app/.local:z
|
||||
- ${APP_DATA_DIR:-./data}/local:/app/.local:z
|
||||
extra_hosts:
|
||||
# Lets the container reach local services on the Docker host, including
|
||||
# Ollama at http://host.docker.internal:11434.
|
||||
|
||||
@@ -0,0 +1,194 @@
|
||||
# Agent migration manifests
|
||||
|
||||
Odysseus should be able to learn from another agent without blindly trusting
|
||||
that agent's whole state. The safe migration path is:
|
||||
|
||||
```text
|
||||
source agent export -> source adapter -> agent-migration.v1 manifest -> preview -> apply
|
||||
```
|
||||
|
||||
The manifest is intentionally source-neutral. OpenClaw, Hermes, a folder of
|
||||
Markdown notes, or any other agent can have its own adapter, but Odysseus only
|
||||
needs to understand the normalized manifest.
|
||||
|
||||
## Why not import everything as memory?
|
||||
|
||||
Durable memory should stay compact and useful. Long notes, logs, session
|
||||
transcripts, and project archives are useful context, but they are not all
|
||||
memories. A good migration keeps two layers separate:
|
||||
|
||||
- **Archive documents** preserve source material for search, reading, and later
|
||||
extraction.
|
||||
- **Memory candidates** are short facts or preferences that can be reviewed
|
||||
before being saved into Odysseus memory.
|
||||
|
||||
This keeps Odysseus' existing memory-review flow intact while giving it better
|
||||
source material to review.
|
||||
|
||||
## Manifest shape
|
||||
|
||||
`agent-migration.v1` is a JSON object:
|
||||
|
||||
```json
|
||||
{
|
||||
"schema_version": "agent-migration.v1",
|
||||
"generated_at": "2026-06-06T00:00:00Z",
|
||||
"source": {
|
||||
"name": "example-agent",
|
||||
"kind": "generic"
|
||||
},
|
||||
"summary": {
|
||||
"item_count": 3,
|
||||
"counts_by_kind": {
|
||||
"memory": 1,
|
||||
"skill": 1,
|
||||
"conversation_thread": 1,
|
||||
"archive_document": 1
|
||||
},
|
||||
"warning_count": 0
|
||||
},
|
||||
"items": [],
|
||||
"warnings": []
|
||||
}
|
||||
```
|
||||
|
||||
Each item has a stable `id`, a `kind`, source metadata, and enough content for a
|
||||
future importer to preview it before applying.
|
||||
|
||||
Supported item kinds in the first pass:
|
||||
|
||||
- `memory` — a candidate memory with `text`, `category`, `source`, and
|
||||
provenance metadata.
|
||||
- `skill` — a `SKILL.md` file with content and parsed frontmatter metadata.
|
||||
- `conversation_thread` — a normalized transcript thread from an exported chat
|
||||
history. Message content is optional; adapters can preserve only thread
|
||||
metadata, message counts, timestamps, and hashes when a manifest should stay
|
||||
small or avoid embedding private transcript text.
|
||||
- `archive_document` — long-form source material. Content is optional; adapters
|
||||
can preserve only path/hash/size metadata when a manifest should stay small.
|
||||
|
||||
## Build a manifest
|
||||
|
||||
Use the read-only helper:
|
||||
|
||||
```bash
|
||||
python3 scripts/agent_migration_manifest.py \
|
||||
--source-name old-agent \
|
||||
--source-kind generic \
|
||||
--memory-json /path/to/memories.json \
|
||||
--skills-dir /path/to/skills \
|
||||
--conversation-json /path/to/conversations.json \
|
||||
--archive /path/to/notes \
|
||||
--output /tmp/agent-migration.json
|
||||
```
|
||||
|
||||
The helper does not write to `data/`, call an LLM, import Odysseus modules, or
|
||||
modify the source. It only writes JSON.
|
||||
|
||||
Memory JSON may be:
|
||||
|
||||
```json
|
||||
[
|
||||
"A plain memory string",
|
||||
{
|
||||
"text": "A categorized memory",
|
||||
"category": "preference",
|
||||
"source": "old-agent"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
or an object containing a list under `memories`, `memory`, `items`, or `data`.
|
||||
|
||||
Skills are scanned recursively for `SKILL.md`:
|
||||
|
||||
```bash
|
||||
python3 scripts/agent_migration_manifest.py \
|
||||
--source-name hermes \
|
||||
--source-kind hermes \
|
||||
--skills-dir ~/.hermes/skills \
|
||||
--output /tmp/hermes-skills-manifest.json
|
||||
```
|
||||
|
||||
Archive documents are metadata-only by default. To embed text content:
|
||||
|
||||
```bash
|
||||
python3 scripts/agent_migration_manifest.py \
|
||||
--source-name notes-export \
|
||||
--archive /path/to/markdown-notes \
|
||||
--include-archive-content \
|
||||
--output /tmp/notes-manifest.json
|
||||
```
|
||||
|
||||
Conversation exports are also metadata-only by default:
|
||||
|
||||
```bash
|
||||
python3 scripts/agent_migration_manifest.py \
|
||||
--source-name chatgpt-export \
|
||||
--source-kind chatgpt \
|
||||
--conversation-json /path/to/conversations.json \
|
||||
--output /tmp/chatgpt-conversations-manifest.json
|
||||
```
|
||||
|
||||
The first pass supports generic conversation JSON such as:
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"id": "thread-1",
|
||||
"title": "Project plan",
|
||||
"messages": [
|
||||
{"role": "user", "content": "Can we design this?"},
|
||||
{"role": "assistant", "content": "Yes, start with a narrow slice."}
|
||||
]
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
It also recognizes ChatGPT-style `mapping` exports from `conversations.json`.
|
||||
To embed normalized messages:
|
||||
|
||||
```bash
|
||||
python3 scripts/agent_migration_manifest.py \
|
||||
--source-name chatgpt-export \
|
||||
--source-kind chatgpt \
|
||||
--conversation-json /path/to/conversations.json \
|
||||
--include-conversation-content \
|
||||
--max-conversation-messages 2000 \
|
||||
--output /tmp/chatgpt-conversations-with-content.json
|
||||
```
|
||||
|
||||
Content embedding is explicit because exported chat histories can be huge and
|
||||
private. A future source-specific adapter can add ZIP traversal, attachment
|
||||
metadata, and provider-specific project/workspace fields while still emitting
|
||||
the same `conversation_thread` manifest item.
|
||||
|
||||
## Recommended apply behavior
|
||||
|
||||
A future Odysseus importer should treat the manifest as untrusted user-provided
|
||||
data and apply it in stages:
|
||||
|
||||
1. Show a dry-run summary with counts, warnings, duplicates, and sample items.
|
||||
2. Back up current `data/` state before writing anything.
|
||||
3. Import archive documents as documents or another searchable source, not as
|
||||
memory.
|
||||
4. Import conversation threads as searchable archived context first, with
|
||||
citations back to the source thread. Do not turn whole transcripts into
|
||||
memory.
|
||||
5. Show memory candidates for review before saving through the normal memory
|
||||
path.
|
||||
6. Import skills only after name/category conflict checks.
|
||||
7. Skip secrets by default. Credentials need explicit, provider-specific flows.
|
||||
|
||||
## What belongs in source adapters?
|
||||
|
||||
Adapters can be source-specific. The core manifest should not be.
|
||||
|
||||
For example, an OpenClaw adapter may know about OpenClaw's workspace files. A
|
||||
Hermes adapter may know about `~/.hermes/config.yaml` and `~/.hermes/skills`.
|
||||
A ChatGPT adapter may know about `conversations.json`, uploaded-file metadata,
|
||||
and image attachment directories. A Claude adapter may know about Claude's
|
||||
export shape and project boundaries. A generic adapter may only know about
|
||||
memory JSON, conversation JSON, `SKILL.md`, and Markdown folders.
|
||||
|
||||
Nonstandard folders should be adapter details, not required Odysseus concepts.
|
||||
@@ -0,0 +1,129 @@
|
||||
# Backup & Restore
|
||||
|
||||
Odysseus keeps all of your state in the `data/` directory — the SQLite database
|
||||
(`app.db`), the Fernet encryption key (`data/.app_key`), the vault, memory, RAG
|
||||
indexes, personal documents, and uploads. The `scripts/odysseus-backup` tool
|
||||
snapshots that directory into a single gzip tarball and restores it later.
|
||||
|
||||
Snapshots are safe to take while the app is running: SQLite databases are copied
|
||||
through SQLite's own `.backup` API rather than a raw file copy, so an in-flight
|
||||
write can't corrupt the snapshot.
|
||||
|
||||
> **A snapshot contains your secrets.** The tarball includes the Fernet
|
||||
> encryption key (`data/.app_key`), the vault, sessions, and any stored
|
||||
> provider/API tokens — so treat it like a password. Store backups somewhere
|
||||
> private, never commit them to Git, and prefer an encrypted destination when
|
||||
> copying them offsite.
|
||||
|
||||
## Quick start
|
||||
|
||||
Run the tool from the repository root:
|
||||
|
||||
```bash
|
||||
# Create a snapshot → backups/odysseus-backup-<YYYYMMDD-HHMMSS>.tar.gz
|
||||
./scripts/odysseus-backup snapshot
|
||||
|
||||
# List existing snapshots (most recent first)
|
||||
./scripts/odysseus-backup list
|
||||
|
||||
# Check a tarball's integrity without extracting it
|
||||
./scripts/odysseus-backup verify backups/odysseus-backup-20260101-120000.tar.gz
|
||||
|
||||
# Restore (destructive — see the warning below)
|
||||
./scripts/odysseus-backup restore backups/odysseus-backup-20260101-120000.tar.gz --yes
|
||||
```
|
||||
|
||||
The script depends only on the Python standard library, so any `python3` on your
|
||||
`PATH` will run it — you don't need the app's virtualenv active.
|
||||
|
||||
Every command prints a JSON result. Add `--pretty` for indented output.
|
||||
|
||||
## Commands
|
||||
|
||||
### `snapshot`
|
||||
|
||||
Writes a `tar.gz` of `data/` to `backups/<timestamp>.tar.gz`.
|
||||
|
||||
| Flag | Effect |
|
||||
| --- | --- |
|
||||
| `--out PATH` | Write to a specific path instead of the default `backups/` location. Must be **outside** `data/`. |
|
||||
| `--include-research` | Include `data/deep_research/` (skipped by default — research runs are large). |
|
||||
| `--include-attachments` | Include `data/mail-attachments/` (skipped by default — cached IMAP extractions, re-derivable). |
|
||||
|
||||
By default the snapshot includes everything under `data/` **except**
|
||||
`deep_research/` and `mail-attachments/`. Personal uploads and documents are
|
||||
included.
|
||||
|
||||
```bash
|
||||
# Snapshot straight to a mounted NAS path
|
||||
./scripts/odysseus-backup snapshot --out /mnt/nas/odysseus-$(date +%F).tar.gz
|
||||
|
||||
# Full snapshot including research runs and mail attachments
|
||||
./scripts/odysseus-backup snapshot --include-research --include-attachments
|
||||
```
|
||||
|
||||
### `list`
|
||||
|
||||
Lists the tarballs in `backups/`, most recent first, with size and modification
|
||||
time.
|
||||
|
||||
### `verify PATH`
|
||||
|
||||
Opens the tarball read-only and walks every member to confirm it is intact and
|
||||
safe to restore. Nothing is extracted. Use this before relying on an old backup
|
||||
or after copying one across machines.
|
||||
|
||||
### `restore PATH --yes`
|
||||
|
||||
Overwrites `data/` from a tarball.
|
||||
|
||||
> **Restore is destructive.** It replaces the current `data/` directory. `--yes`
|
||||
> is required so a mistyped command can't wipe your live state.
|
||||
|
||||
Restore is not a blind delete: before extracting, the tool **renames your current
|
||||
`data/` to `data.before-restore-<timestamp>`** in the repository root. If a
|
||||
restore turns out to be wrong, your previous state is still there — delete the
|
||||
restored `data/` and rename the stashed directory back. The restore path is also
|
||||
validated entry-by-entry: archives containing absolute paths, `..` segments,
|
||||
symlinks, or anything outside `data/` are rejected.
|
||||
|
||||
## Scheduling offsite backups
|
||||
|
||||
The tarball output composes cleanly with cron and any copy tool. For example, a
|
||||
nightly snapshot copied offsite:
|
||||
|
||||
```cron
|
||||
0 3 * * * cd /path/to/odysseus && ./scripts/odysseus-backup snapshot --out "/mnt/nas/odysseus-$(date +\%F).tar.gz"
|
||||
```
|
||||
|
||||
Swap the `--out` target for `scp`, `rclone`, `s3cmd`, or similar to push the
|
||||
snapshot to remote storage.
|
||||
|
||||
## Docker vs native installs
|
||||
|
||||
The tool reads `data/` and writes `backups/` relative to the repository root, so
|
||||
where you run it matters:
|
||||
|
||||
- **Native installs** — run it from the repo root as shown above. `data/` and
|
||||
`backups/` are both in the repo directory.
|
||||
- **Docker** — `docker-compose.yml` bind-mounts the host's `./data` to
|
||||
`/app/data`, so the live data is also present on the host. **Run the tool on
|
||||
the host** from the repo root; the snapshot reads the bind-mounted `./data` and
|
||||
writes to `./backups` on the host. Running it *inside* the container is not
|
||||
recommended, because `backups/` is not a mounted volume and the tarball would
|
||||
be lost when the container is recreated.
|
||||
|
||||
> **ChromaDB caveat (Docker only).** In the Docker setup, ChromaDB stores its
|
||||
> vectors in a separate Compose-managed volume (declared as `chromadb-data`),
|
||||
> **not** under `./data`. `odysseus-backup` therefore does not capture the Docker
|
||||
> ChromaDB store. Back it up separately if you need it. Compose prefixes the
|
||||
> volume with the project name, so find the real name first
|
||||
> (`docker volume ls | grep chromadb`), then archive it — for example:
|
||||
>
|
||||
> ```bash
|
||||
> docker run --rm -v <project>_chromadb-data:/data -v "$PWD":/backup \
|
||||
> alpine tar czf /backup/chromadb.tar.gz -C /data .
|
||||
> ```
|
||||
>
|
||||
> On native installs ChromaDB lives at `data/chroma/` and is included in the
|
||||
> snapshot normally.
|
||||
|
Before Width: | Height: | Size: 3.0 MiB |
|
Before Width: | Height: | Size: 3.4 MiB |
|
Before Width: | Height: | Size: 1.1 MiB |
|
Before Width: | Height: | Size: 1003 KiB |
|
After Width: | Height: | Size: 16 KiB |
|
Before Width: | Height: | Size: 45 KiB After Width: | Height: | Size: 52 KiB |
|
Before Width: | Height: | Size: 2.5 MiB |
@@ -1,14 +1,16 @@
|
||||
# Security CI guide
|
||||
|
||||
This project runs a set of automated security checks on every pull request and
|
||||
on every push to `main`. This page explains what each one does, whether it can
|
||||
This project runs a set of automated security checks on pull requests and
|
||||
selected branch pushes. This page explains what each one does, whether it can
|
||||
block a merge, and the few one-time settings you should turn on to get the full
|
||||
benefit.
|
||||
|
||||
## What runs, and why
|
||||
|
||||
Each check lives in its own file under `.github/workflows/`. They run
|
||||
automatically; you do not start them.
|
||||
Most checks live in files under `.github/workflows/`. CodeQL is configured
|
||||
through GitHub's code scanning default setup, so it appears as a dynamic GitHub
|
||||
workflow instead of a checked-in workflow file. They run automatically; you do
|
||||
not start them.
|
||||
|
||||
| Check | What it protects against | Blocks a merge? |
|
||||
|---|---|---|
|
||||
@@ -88,11 +90,14 @@ let the workflows run on one pull request first, then add them here.
|
||||
2. Turn on **Dependency graph** (usually on by default for public repos) -- this
|
||||
powers Dependency review and Dependabot.
|
||||
3. Turn on **Dependabot alerts** and **Dependabot security updates**.
|
||||
4. Under **Code scanning**, you have two ways to scan the app code with CodeQL:
|
||||
- The included `codeql.yml` workflow already scans `main` and runs weekly.
|
||||
- To also scan **pull requests** (recommended, since most contributions come
|
||||
from forks), click **Set up -> Default** under Code scanning. GitHub then
|
||||
runs CodeQL on pull requests for you, with no token limitations.
|
||||
4. Under **Code scanning**, use **Set up -> Default** for CodeQL. GitHub then
|
||||
runs CodeQL as a dynamic workflow without the fork-token limitations that
|
||||
affect checked-in advanced workflows.
|
||||
|
||||
Do not also add a checked-in CodeQL workflow while default setup is enabled:
|
||||
GitHub rejects advanced CodeQL uploads when default setup is active. If the
|
||||
project later needs an advanced CodeQL workflow, disable default setup first
|
||||
and keep only one CodeQL publishing path active.
|
||||
|
||||
## Keeping it current
|
||||
|
||||
|
||||
@@ -0,0 +1,425 @@
|
||||
# Odysseus Setup Guide
|
||||
|
||||
This page keeps the detailed install, deployment, troubleshooting, and configuration notes out of the front README.
|
||||
|
||||
## Quick Start
|
||||
|
||||
> **Branch note:** `dev` is the default branch and contains the latest development changes, but it may be unstable. For the more stable curated branch, use [`main`](https://github.com/pewdiepie-archdaemon/odysseus/tree/main).
|
||||
|
||||
Defaults work out of the box: clone, run, then configure models/search/email
|
||||
inside **Settings**. Only edit `.env` for deployment-level overrides like
|
||||
`APP_BIND`, `APP_PORT`, `AUTH_ENABLED`, `DATABASE_URL`, or a pre-seeded admin password.
|
||||
|
||||
On first setup, Odysseus creates an admin account (`admin` unless
|
||||
`ODYSSEUS_ADMIN_USER` is set) and prints a temporary password in the terminal.
|
||||
For Docker installs, the same line is in `docker compose logs odysseus`.
|
||||
Use that for the first login, then change it in **Settings**.
|
||||
|
||||
Contributing? See [CONTRIBUTING.md](CONTRIBUTING.md) for setup, testing, and
|
||||
pull request guidelines.
|
||||
|
||||
### Docker (recommended)
|
||||
```bash
|
||||
git clone https://github.com/pewdiepie-archdaemon/odysseus.git
|
||||
cd odysseus
|
||||
cp .env.example .env # optional, but recommended for explicit defaults
|
||||
docker compose up -d --build
|
||||
```
|
||||
To include optional extras in the image (PDF viewer, Office extraction; includes AGPL PyMuPDF), build with `docker compose build --build-arg INSTALL_OPTIONAL=true` before `up`.
|
||||
|
||||
Open `http://localhost:7000` when the containers are healthy. Docker Compose
|
||||
binds the web UI to `127.0.0.1` by default. If the port is taken, set
|
||||
`APP_PORT=7001` in `.env` and recreate the container. Set `APP_BIND=0.0.0.0`
|
||||
only when you intentionally want LAN/reverse-proxy access.
|
||||
|
||||
> **On Apple Silicon (M-series) Macs:** Docker can't reach the Metal GPU, so
|
||||
> Cookbook serves local models on CPU only. For GPU-accelerated model serving,
|
||||
> run natively instead — see [Apple Silicon](#apple-silicon) below.
|
||||
|
||||
### Native Linux / macOS
|
||||
```bash
|
||||
git clone https://github.com/pewdiepie-archdaemon/odysseus.git
|
||||
cd odysseus
|
||||
python3 -m venv venv
|
||||
source venv/bin/activate
|
||||
pip install -r requirements.txt
|
||||
python setup.py
|
||||
python -m uvicorn app:app --host 127.0.0.1 --port 7000
|
||||
```
|
||||
Requirements: Python 3.11+. Cookbook also needs `tmux` for background model
|
||||
downloads and serves. The app itself is lightweight; local model serving is the
|
||||
heavy part and depends on the model, runtime, GPU, and VRAM, so small hosts can
|
||||
connect to API or remote model servers instead. Use `--host 0.0.0.0` only when you intentionally want LAN/reverse-proxy access.
|
||||
|
||||
### Apple Silicon
|
||||
Docker on macOS cannot use the Metal GPU. For GPU-accelerated Cookbook on an
|
||||
M-series Mac, run Odysseus natively:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/pewdiepie-archdaemon/odysseus.git
|
||||
cd odysseus
|
||||
./start-macos.sh
|
||||
```
|
||||
|
||||
It launches at `http://127.0.0.1:7860`. To expose it to your phone over a trusted LAN/VPN such as Tailscale, bind all interfaces:
|
||||
|
||||
```bash
|
||||
ODYSSEUS_HOST=0.0.0.0 ./start-macos.sh
|
||||
# then open http://<tailscale-ip>:7860
|
||||
```
|
||||
|
||||
The script also reads `.env` at startup, so `APP_BIND=0.0.0.0` and `APP_PORT`
|
||||
set there are picked up automatically without a command-line override each run.
|
||||
|
||||
Keep `AUTH_ENABLED=true` (the default) before binding outside loopback. Do not
|
||||
expose this port directly to the public internet. To build a clickable app wrapper:
|
||||
|
||||
```bash
|
||||
./build-macos-app.sh
|
||||
```
|
||||
|
||||
<details>
|
||||
<summary>Cookbook, GPU, Ollama, and troubleshooting notes</summary>
|
||||
|
||||
**Docker bundled services.** Compose starts Odysseus, ChromaDB, SearXNG, and
|
||||
ntfy. Odysseus and the bundled service ports bind to `127.0.0.1` by default, so
|
||||
they are reachable from the host but not exposed to your LAN/public internet
|
||||
unless you opt in.
|
||||
|
||||
**Cookbook storage in Docker.** Downloads live in `./data/huggingface`
|
||||
(`~/.cache/huggingface` in the container). Cookbook-installed Python CLIs and
|
||||
serve engines live in `./data/local` (`~/.local` in the container), so they
|
||||
survive container recreation.
|
||||
|
||||
**Remote servers.** In **Cookbook -> Settings -> Servers**, generate the
|
||||
Odysseus SSH key and add the public key to the remote server's
|
||||
`~/.ssh/authorized_keys`. From the host you can also run:
|
||||
|
||||
```bash
|
||||
ssh-copy-id -i data/ssh/id_ed25519.pub user@server
|
||||
```
|
||||
|
||||
**Docker GPU overlays.** CPU-only users can skip this section. Cookbook can
|
||||
only detect GPUs that Docker exposes to the container — if the host runtime or
|
||||
device passthrough is not configured, Cookbook sees the iGPU, another card, or
|
||||
CPU instead of your intended GPU.
|
||||
|
||||
For NVIDIA, `scripts/check-docker-gpu.sh` diagnoses GPU passthrough and can
|
||||
optionally install the host runtime or update `.env`.
|
||||
|
||||
```bash
|
||||
# Read-only diagnostic (default — installs nothing, never edits .env):
|
||||
scripts/check-docker-gpu.sh
|
||||
|
||||
# Print OS-specific install commands without running them:
|
||||
scripts/check-docker-gpu.sh --print-install-commands
|
||||
|
||||
# Install NVIDIA Container Toolkit on Ubuntu/Debian (requires sudo):
|
||||
scripts/check-docker-gpu.sh --install-nvidia-toolkit
|
||||
|
||||
# Write COMPOSE_FILE to .env (only when GPU passthrough is confirmed working):
|
||||
scripts/check-docker-gpu.sh --enable-nvidia-overlay
|
||||
|
||||
# Full assisted setup — install toolkit, then enable overlay if passthrough works:
|
||||
scripts/check-docker-gpu.sh --install-nvidia-toolkit --enable-nvidia-overlay
|
||||
```
|
||||
|
||||
Safety notes:
|
||||
- The app never installs host GPU runtime automatically.
|
||||
- The app never edits `.env` automatically.
|
||||
- `.env` is only modified when `--enable-nvidia-overlay` is explicitly passed,
|
||||
and only after GPU passthrough succeeds. `--yes` skips prompts but does not
|
||||
bypass the passthrough gate.
|
||||
- `.env.bak.*` backups created by `--enable-nvidia-overlay` are ignored by
|
||||
Git and the Docker build context.
|
||||
|
||||
To enable manually without the script, add this to `.env`:
|
||||
|
||||
```bash
|
||||
COMPOSE_FILE=docker-compose.yml:docker/gpu.nvidia.yml
|
||||
```
|
||||
|
||||
**AMD / ROCm.** AMD setup is read-only diagnostic plus manual `.env` edit. Run:
|
||||
|
||||
```bash
|
||||
scripts/check-docker-amd-gpu.sh
|
||||
```
|
||||
|
||||
Then add the reported values to `.env`, replacing `RENDER_GID` with your host's
|
||||
numeric render group id:
|
||||
|
||||
```bash
|
||||
COMPOSE_FILE=docker-compose.yml:docker/gpu.amd.yml
|
||||
RENDER_GID=989
|
||||
```
|
||||
|
||||
For NVIDIA/AMD GPU support, also read the comments in the selected overlay file: docker/gpu.nvidia.yml or docker/gpu.amd.yml.
|
||||
|
||||
**Stack-management UIs (Portainer, Coolify, Dockhand, etc.).** These tools
|
||||
often accept only a single Compose file and do not reliably honor `COMPOSE_FILE`
|
||||
or multiple `-f` overlays. CLI users should keep using the `COMPOSE_FILE`
|
||||
overlay workflow above. For stack UIs, point the stack at one of the standalone
|
||||
files instead, which bundle the base stack plus the GPU settings:
|
||||
|
||||
- `docker-compose.gpu-nvidia.yml` — still requires the NVIDIA Container Toolkit
|
||||
on the host.
|
||||
- `docker-compose.gpu-amd.yml` — still requires host ROCm/kfd/DRI setup, the
|
||||
`video`/`render` group membership, and `RENDER_GID` when needed.
|
||||
|
||||
The base `docker-compose.yml` plus the `docker/gpu.*.yml` overlays remain the
|
||||
source of truth; the standalone files mirror them for single-file deployments.
|
||||
|
||||
Verify after enabling either overlay:
|
||||
|
||||
```bash
|
||||
docker compose exec odysseus nvidia-smi -L # NVIDIA
|
||||
docker compose exec odysseus sh -lc 'test -e /dev/kfd && test -d /dev/dri && ls -l /dev/kfd /dev/dri/renderD*' # AMD
|
||||
```
|
||||
|
||||
> **GPU passthrough ≠ llama.cpp CUDA.** `nvidia-smi` passing inside the
|
||||
> container confirms Docker GPU access, but llama.cpp also needs `cudart` and
|
||||
> the CUDA Toolkit at runtime. If Cookbook logs show `Unable to find cudart
|
||||
> library`, `Could NOT find CUDAToolkit`, `CUDA Toolkit not found`, or
|
||||
> tensors/layers assigned to CPU, that is a Cookbook/llama.cpp build issue —
|
||||
> not a Docker passthrough failure. Reinstall the serve engine via
|
||||
> **Cookbook → Dependencies** to get a CUDA-enabled build.
|
||||
>
|
||||
> The same split applies to AMD/ROCm: seeing `/dev/kfd` and `/dev/dri` inside
|
||||
> the container confirms device passthrough, not ROCm userspace or a
|
||||
> ROCm-enabled vLLM/llama.cpp build. `rocm-smi` and `rocminfo` are not expected
|
||||
> inside the slim Odysseus image.
|
||||
|
||||
**Ollama with Docker.** If Ollama runs on the host, add this endpoint in
|
||||
Settings:
|
||||
|
||||
```text
|
||||
http://host.docker.internal:11434/v1
|
||||
```
|
||||
|
||||
Ollama must listen outside its own loopback interface:
|
||||
|
||||
```bash
|
||||
OLLAMA_HOST=0.0.0.0:11434 ollama serve
|
||||
```
|
||||
|
||||
This connects Odysseus in Docker to an Ollama server that is already running on
|
||||
your host machine; it does not start Ollama inside the container.
|
||||
`host.docker.internal` is Docker's hostname for the host machine from inside the
|
||||
container. Cookbook **Serve** is a separate workflow for serving downloaded
|
||||
models through Odysseus/llama.cpp, so Windows users with an existing Ollama
|
||||
install usually only need to add the endpoint in Settings.
|
||||
|
||||
**Useful checks.**
|
||||
|
||||
```bash
|
||||
docker compose ps
|
||||
docker compose logs --tail=120 odysseus
|
||||
docker compose logs odysseus | grep -E 'ChromaDB|MemoryVectorStore|DEGRADED'
|
||||
```
|
||||
|
||||
**macOS details.** `start-macos.sh` installs Homebrew deps, creates the venv,
|
||||
runs setup, and starts uvicorn on port `7860` because AirPlay often holds
|
||||
`7000`. It uses llama.cpp/Ollama for Metal. vLLM/SGLang are CUDA/ROCm-only and
|
||||
do not run on macOS. MLX-only models are not served by Odysseus.
|
||||
|
||||
</details>
|
||||
|
||||
### Native Windows
|
||||
|
||||
**One-command launcher** (creates the venv, installs deps, runs setup, starts the
|
||||
server; safe to re-run):
|
||||
|
||||
```powershell
|
||||
git clone https://github.com/pewdiepie-archdaemon/odysseus.git
|
||||
cd odysseus
|
||||
powershell -ExecutionPolicy Bypass -File .\launch-windows.ps1
|
||||
```
|
||||
|
||||
Or do it by hand:
|
||||
|
||||
```powershell
|
||||
git clone https://github.com/pewdiepie-archdaemon/odysseus.git
|
||||
cd odysseus
|
||||
py -3.11 -m venv venv
|
||||
venv\Scripts\Activate.ps1
|
||||
pip install -r requirements.txt
|
||||
python setup.py
|
||||
python -m uvicorn app:app --host 127.0.0.1 --port 7000
|
||||
```
|
||||
|
||||
If `python` points at an older interpreter, use `py -3.12` (or another installed
|
||||
3.11+ version) for the venv step.
|
||||
|
||||
**Requirements:** Python 3.11+. The core app (chat, agent, memory, documents,
|
||||
email, calendar, deep research) runs fully native. For full **Cookbook** background
|
||||
model downloads and the agent shell tool, also install
|
||||
[Git for Windows](https://git-scm.com/download/win) (provides `bash.exe`).
|
||||
Local GPU *serving* of vLLM/SGLang needs Linux/WSL2; for a local model on Windows,
|
||||
[Ollama](https://ollama.com/download) is the easiest path — point Odysseus at
|
||||
`http://localhost:11434/v1` in Settings.
|
||||
|
||||
Open `http://localhost:7000`, log in with the generated admin password,
|
||||
and configure everything else inside **Settings**.
|
||||
|
||||
## Troubleshooting & Advanced Setup
|
||||
|
||||
### `chromadb-client` conflicts with embedded ChromaDB
|
||||
If `chromadb-client` (the lightweight HTTP-only package) is installed alongside the full `chromadb` package, Odysseus starts but ChromaDB silently falls back to HTTP-only mode and fails.
|
||||
|
||||
**Fix:** uninstall `chromadb-client` and force-reinstall the full package:
|
||||
```bash
|
||||
./venv/bin/pip uninstall chromadb-client -y
|
||||
./venv/bin/pip install --force-reinstall chromadb
|
||||
```
|
||||
|
||||
### HTTPS + LAN/Tailscale exposure
|
||||
To expose Odysseus on a local network or Tailscale with HTTPS:
|
||||
1. Change the bind address to `0.0.0.0` in `.env` (`APP_BIND=0.0.0.0` or `ODYSSEUS_HOST=0.0.0.0`).
|
||||
2. Generate a locally-trusted cert for your LAN/Tailscale IPs using [mkcert](https://github.com/FiloSottile/mkcert):
|
||||
```bash
|
||||
mkcert -install
|
||||
mkcert -cert-file cert.pem -key-file key.pem 192.168.1.100 tailscale-ip
|
||||
```
|
||||
3. Run `uvicorn` with the generated certs:
|
||||
```bash
|
||||
python -m uvicorn app:app --host 0.0.0.0 --port 7000 --ssl-certfile=cert.pem --ssl-keyfile=key.pem
|
||||
```
|
||||
4. Install the `mkcert` CA on any other device you want to access Odysseus from (e.g., for iOS, email the `rootCA.pem` to yourself, install the profile, and trust it in Certificate Trust Settings).
|
||||
|
||||
### Optional Dependencies
|
||||
`requirements-optional.txt` contains packages that unlock extra features. It is not installed by default.
|
||||
|
||||
| Package | Feature unlocked |
|
||||
|---------|-----------------|
|
||||
| `faster-whisper` | Local speech-to-text (microphone -> text) via the "local" STT provider. |
|
||||
| `ddgs` | DuckDuckGo as a search provider option. |
|
||||
| `PyMuPDF` | PDF page rendering in the side viewer panel and form-filling. (Note: AGPL-3.0) |
|
||||
| `markitdown` | Office/EPUB document text extraction (converts .docx/.xlsx/.pptx/.xls/.epub to Markdown). |
|
||||
|
||||
### Faster, reproducible installs with uv (optional)
|
||||
[uv](https://docs.astral.sh/uv/) works as a drop-in replacement for the
|
||||
venv + pip steps in the native install guides, no project changes are needed but this change results in faster installs along with a lockfile for reproducible environments. After [installing `uv`](https://docs.astral.sh/uv/getting-started/installation/), use:
|
||||
|
||||
```bash
|
||||
uv venv venv --python 3.13
|
||||
uv pip install -r requirements.txt
|
||||
# then continue as usual: python setup.py, uvicorn, ...
|
||||
```
|
||||
|
||||
`requirements.txt` is intentionally unpinned, so two installs at different times can produce different package versions. If you want a reproducible environment (e.g. across your own machines, or to roll back after a bad upgrade), snapshot and restore exact versions with:
|
||||
|
||||
```bash
|
||||
uv pip compile requirements.txt -o requirements.lock # snapshot current resolution
|
||||
uv pip sync requirements.lock # reproduce it exactly later
|
||||
```
|
||||
|
||||
`requirements.lock` is gitignored and platform-specific (compile it on the OS you deploy to). Regenerate it deliberately when you want to take upgrades. The plain `uv pip install -r requirements.txt` keeps following the unpinned requirements like pip does.
|
||||
|
||||
### Outlook / Office 365 email
|
||||
Odysseus email accounts currently use IMAP/SMTP username-password auth. Outlook
|
||||
and Microsoft 365 generally require OAuth instead, so normal Microsoft mailbox
|
||||
passwords will fail. See [docs/email-outlook.md](docs/email-outlook.md) for the
|
||||
current limitation and the planned integration direction.
|
||||
|
||||
## Security Notes
|
||||
Odysseus is a self-hosted workspace with powerful local tools: shell access, file uploads, model downloads, web research, email/calendar integrations, and API tokens. Treat it like an admin console.
|
||||
|
||||
- Keep `AUTH_ENABLED=true` for any network-accessible deployment.
|
||||
- Keep `LOCALHOST_BYPASS=false` outside local development.
|
||||
- Use `SECURE_COOKIES=true` when Odysseus is served through HTTPS by a trusted reverse proxy or private access gateway.
|
||||
- Do not expose it directly to the public internet without HTTPS and a trusted reverse proxy or private access layer.
|
||||
- Keep `.env`, `data/`, `logs/`, databases, uploads, generated media, backups, auth/session files, API keys, and model/provider tokens out of Git and private shares. They are ignored by default.
|
||||
- Review `data/auth.json` after first boot: disable open signup unless you intentionally want it, make only your own account admin, and keep demo/test accounts non-admin.
|
||||
- Non-admin users do not get shell/Python/file read/write by default, and admin-only routes/tools such as MCP management, API tokens, webhooks, model/cookbook serving, backup/vault, and app settings are admin-gated. Other features are controlled by per-user privileges, so review each user's privileges before exposing a deployment.
|
||||
- Rotate any API keys or tokens that were ever pasted into a shared chat, demo, screenshot, or log.
|
||||
- If you enable API tokens or webhooks, create separate tokens per integration and delete unused ones.
|
||||
- Prefer binding manual development runs to `127.0.0.1`; bind to `0.0.0.0` only when you intentionally want LAN/reverse-proxy access.
|
||||
- Keep ChromaDB, SearXNG, ntfy, Ollama, vLLM, llama.cpp, databases, and raw model/provider APIs internal-only. Expose only the authenticated Odysseus web/API entrypoint through your trusted proxy or private access layer.
|
||||
- Before publishing a fork, run `git status --short` and confirm no private files from `.env`, `data/`, `logs/`, uploads, backups, or local databases are staged.
|
||||
|
||||
### Private or proxied deployments
|
||||
Odysseus serves plain HTTP on its app port. Docker Compose binds Odysseus and the bundled services to `127.0.0.1` by default, so a typical production/private setup is:
|
||||
|
||||
1. Keep Odysseus on localhost, for example `127.0.0.1:7000`.
|
||||
2. Terminate HTTPS at a trusted reverse proxy or private access gateway.
|
||||
3. Put the authenticated Odysseus web/API entrypoint behind that layer.
|
||||
4. Keep raw service and model ports internal-only.
|
||||
|
||||
Cloudflare Access, Tailscale, Caddy, nginx, and Traefik can all fit this pattern; none are required by Odysseus. If your access layer reaches Odysseus on the same host, proxy to `http://127.0.0.1:7000` and keep `AUTH_ENABLED=true`, `LOCALHOST_BYPASS=false`, and `SECURE_COOKIES=true`.
|
||||
`ALLOWED_ORIGINS` lists exact permitted origins for cross-origin browser/API clients; ordinary same-origin reverse-proxy access usually does not need a special CORS entry.
|
||||
|
||||
Common internal-only ports from the default docs/compose setup:
|
||||
|
||||
| Port | Service |
|
||||
|---|---|
|
||||
| `7000` | Odysseus raw app port |
|
||||
| `8080` | SearXNG |
|
||||
| `8091` | ntfy |
|
||||
| `8100` | ChromaDB host port for manual/compose access |
|
||||
| `11434` | Ollama |
|
||||
| `8000-8020` | Common local model/provider APIs |
|
||||
|
||||
## Configuration
|
||||
Most setup is done inside the app with `/setup` or **Settings**. Use `.env`
|
||||
for deployment-level defaults and secrets you want present before first boot.
|
||||
Key settings:
|
||||
|
||||
| Variable | Default | Description |
|
||||
|---|---|---|
|
||||
| `LLM_HOST` | `localhost` | Your LLM server (e.g. `llm-host.local:8000`) |
|
||||
| `LLM_HOSTS` | -- | Comma-separated list for model discovery |
|
||||
| `OPENAI_API_KEY` | -- | Optional OpenAI key. Prefer adding providers in the app unless pre-seeding. |
|
||||
| `SEARXNG_INSTANCE` | `http://localhost:8080` | SearXNG URL. Docker overrides this to `http://searxng:8080`. |
|
||||
| `SEARXNG_SECRET` | generated on first Docker boot | Optional SearXNG cookie/CSRF secret. Leave blank unless you need to pin it. |
|
||||
| `APP_BIND` | `127.0.0.1` | Docker Compose host bind address for the web UI. Use `0.0.0.0` only for intentional LAN/reverse-proxy access. |
|
||||
| `APP_PORT` | `7000` | Docker Compose host port for the web UI. |
|
||||
| `APP_DATA_DIR` | `./data` | Docker Compose host directory for application data volumes. |
|
||||
| `APP_LOGS_DIR` | `./logs` | Docker Compose host directory for application logs. |
|
||||
| `AUTH_ENABLED` | `true` | Enable/disable login |
|
||||
| `LOCALHOST_BYPASS` | `false` | Development-only auth bypass for loopback requests. Keep false for shared/network deployments. |
|
||||
| `ALLOWED_ORIGINS` | `http://localhost,http://127.0.0.1` | Comma-separated exact permitted origins for cross-origin browser/API clients. |
|
||||
| `SECURE_COOKIES` | `false` | Set true when serving Odysseus through HTTPS at a trusted proxy or private access gateway. |
|
||||
| `DATABASE_URL` | `sqlite:///./data/app.db` | Database connection string |
|
||||
| `CHROMADB_HOST` | `localhost` | ChromaDB host for vector memory. Docker overrides this to `chromadb`. |
|
||||
| `CHROMADB_PORT` | `8100` | ChromaDB port for manual host runs. Docker overrides this to `8000`. |
|
||||
| `EMBEDDING_URL` | -- | OpenAI-compatible embeddings endpoint |
|
||||
| `ODYSSEUS_CHAT_UPLOAD_MAX_BYTES` | `10485760` | Chat/agent attachment cap in bytes. Raise for larger local PDFs or text documents. |
|
||||
| `ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES` | `104857600` | Gallery image upload cap in bytes (100 MB). |
|
||||
| `ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES` | `26214400` | Gallery transform input cap in bytes (25 MB). |
|
||||
| `ODYSSEUS_MEMORY_IMPORT_MAX_BYTES` | `10485760` | Memory import file cap in bytes (10 MB). |
|
||||
| `ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES` | `26214400` | Personal document upload cap in bytes (25 MB). |
|
||||
| `ODYSSEUS_EMAIL_COMPOSE_UPLOAD_MAX_BYTES` | `26214400` | Email compose attachment cap in bytes (25 MB). |
|
||||
| `ODYSSEUS_STT_MAX_AUDIO_BYTES` | `26214400` | Speech-to-text audio cap in bytes (25 MB). |
|
||||
| `ODYSSEUS_ICS_MAX_BYTES` | `10485760` | Calendar `.ics` import cap in bytes (10 MB). |
|
||||
|
||||
All upload-limit vars are validated (must be a positive integer) and optional; an invalid value fails fast at startup.
|
||||
|
||||
### Built-in MCP servers (optional setup)
|
||||
|
||||
Odysseus auto-registers a few built-in MCP servers at startup. The npx-based ones (currently the browser server, `@playwright/mcp`) only start when their npm package is already in the local npx cache. If a package isn't cached, that server is skipped with a startup log message explaining what to do, so a fresh install does not block on a multi-minute npm download or hang if Playwright system deps are missing.
|
||||
|
||||
To enable the browser MCP (page navigation, screenshots, vision), run once:
|
||||
|
||||
```bash
|
||||
npx -y @playwright/mcp@latest --version
|
||||
```
|
||||
|
||||
That installs `@playwright/mcp` plus Playwright (~300MB total). Restart Odysseus and the server will register at startup.
|
||||
|
||||
## Architecture
|
||||
```
|
||||
app.py # FastAPI entry point
|
||||
core/ auth, database, middleware, constants
|
||||
src/ llm_core, agent_loop, agent_tools, chat_processor, search/
|
||||
routes/ chat, session, document, memory, model … endpoints
|
||||
services/ docs, memory, search, hwfit (Cookbook) …
|
||||
static/ index.html + app.js + style.css + js/ (modular front-end)
|
||||
docs/ landing page (index.html) + preview clips
|
||||
```
|
||||
|
||||
## Data
|
||||
All user data lives in `data/` (gitignored): `app.db` (sessions, messages, documents),
|
||||
`memory.json`, `presets.json`, `uploads/`, `personal_docs/`, `chroma/`, `settings.json`.
|
||||
|
||||
To back up or restore everything in `data/`, see the
|
||||
[Backup & Restore guide](docs/backup-restore.md).
|
||||
@@ -102,6 +102,7 @@ python3 ~/.claude/skills/odysseus/scripts/odysseus_api.py POST /api/codex/memory
|
||||
|
||||
## Email draft + send
|
||||
|
||||
- Prefer `POST /api/codex/emails/draft-document` for agent-written email replies. It creates an editable Odysseus Document with `language: "email"` and does not touch IMAP/send.
|
||||
- `POST /api/codex/emails/draft` — body matches `SendEmailRequest` (`to`, `cc`, `bcc`, `subject`, `body`, `body_html`, `attachments`, `account_id`, `in_reply_to`, `references`). Requires `email:draft` (or `email:send`).
|
||||
- `POST /api/codex/emails/send` — same body. Requires `email:send`. Never send without explicit user instruction.
|
||||
|
||||
|
||||
@@ -17,6 +17,11 @@ def _usage() -> int:
|
||||
print(" odysseus_api.py todos add TITLE", file=sys.stderr)
|
||||
print(" odysseus_api.py emails list [limit]", file=sys.stderr)
|
||||
print(" odysseus_api.py emails read UID", file=sys.stderr)
|
||||
print(" odysseus_api.py emails draft-doc JSON_PAYLOAD", file=sys.stderr)
|
||||
print(" odysseus_api.py documents list [limit]", file=sys.stderr)
|
||||
print(" odysseus_api.py documents read DOC_ID", file=sys.stderr)
|
||||
print(" odysseus_api.py documents create JSON_PAYLOAD", file=sys.stderr)
|
||||
print(" odysseus_api.py documents delete DOC_ID", file=sys.stderr)
|
||||
print(" odysseus_api.py cookbook tasks", file=sys.stderr)
|
||||
print(" odysseus_api.py cookbook servers", file=sys.stderr)
|
||||
print(" odysseus_api.py cookbook cached [HOST]", file=sys.stderr)
|
||||
@@ -79,6 +84,33 @@ def main() -> int:
|
||||
method = "GET"
|
||||
path = f"/api/codex/emails/{sys.argv[3]}"
|
||||
body = None
|
||||
elif action in ("draft-doc", "draft_document") and len(sys.argv) >= 4:
|
||||
method = "POST"
|
||||
path = "/api/codex/emails/draft-document"
|
||||
body = " ".join(sys.argv[3:])
|
||||
else:
|
||||
return _usage()
|
||||
elif command in ("documents", "docs"):
|
||||
if len(sys.argv) < 3:
|
||||
return _usage()
|
||||
action = sys.argv[2].lower()
|
||||
if action == "list":
|
||||
method = "GET"
|
||||
limit = sys.argv[3] if len(sys.argv) >= 4 else "50"
|
||||
path = f"/api/codex/documents?limit={limit}"
|
||||
body = None
|
||||
elif action == "read" and len(sys.argv) >= 4:
|
||||
method = "GET"
|
||||
path = f"/api/codex/documents/{sys.argv[3]}"
|
||||
body = None
|
||||
elif action == "create" and len(sys.argv) >= 4:
|
||||
method = "POST"
|
||||
path = "/api/codex/documents"
|
||||
body = " ".join(sys.argv[3:])
|
||||
elif action == "delete" and len(sys.argv) >= 4:
|
||||
method = "DELETE"
|
||||
path = f"/api/codex/documents/{sys.argv[3]}"
|
||||
body = None
|
||||
else:
|
||||
return _usage()
|
||||
elif command == "cookbook":
|
||||
|
||||
@@ -17,6 +17,11 @@ def _usage() -> int:
|
||||
print(" odysseus_api.py todos add TITLE", file=sys.stderr)
|
||||
print(" odysseus_api.py emails list [limit]", file=sys.stderr)
|
||||
print(" odysseus_api.py emails read UID", file=sys.stderr)
|
||||
print(" odysseus_api.py emails draft-doc JSON_PAYLOAD", file=sys.stderr)
|
||||
print(" odysseus_api.py documents list [limit]", file=sys.stderr)
|
||||
print(" odysseus_api.py documents read DOC_ID", file=sys.stderr)
|
||||
print(" odysseus_api.py documents create JSON_PAYLOAD", file=sys.stderr)
|
||||
print(" odysseus_api.py documents delete DOC_ID", file=sys.stderr)
|
||||
print(" odysseus_api.py cookbook tasks", file=sys.stderr)
|
||||
print(" odysseus_api.py cookbook servers", file=sys.stderr)
|
||||
print(" odysseus_api.py cookbook cached [HOST]", file=sys.stderr)
|
||||
@@ -79,6 +84,33 @@ def main() -> int:
|
||||
method = "GET"
|
||||
path = f"/api/codex/emails/{sys.argv[3]}"
|
||||
body = None
|
||||
elif action in ("draft-doc", "draft_document") and len(sys.argv) >= 4:
|
||||
method = "POST"
|
||||
path = "/api/codex/emails/draft-document"
|
||||
body = " ".join(sys.argv[3:])
|
||||
else:
|
||||
return _usage()
|
||||
elif command in ("documents", "docs"):
|
||||
if len(sys.argv) < 3:
|
||||
return _usage()
|
||||
action = sys.argv[2].lower()
|
||||
if action == "list":
|
||||
method = "GET"
|
||||
limit = sys.argv[3] if len(sys.argv) >= 4 else "50"
|
||||
path = f"/api/codex/documents?limit={limit}"
|
||||
body = None
|
||||
elif action == "read" and len(sys.argv) >= 4:
|
||||
method = "GET"
|
||||
path = f"/api/codex/documents/{sys.argv[3]}"
|
||||
body = None
|
||||
elif action == "create" and len(sys.argv) >= 4:
|
||||
method = "POST"
|
||||
path = "/api/codex/documents"
|
||||
body = " ".join(sys.argv[3:])
|
||||
elif action == "delete" and len(sys.argv) >= 4:
|
||||
method = "DELETE"
|
||||
path = f"/api/codex/documents/{sys.argv[3]}"
|
||||
body = None
|
||||
else:
|
||||
return _usage()
|
||||
elif command == "cookbook":
|
||||
|
||||
@@ -102,6 +102,7 @@ python3 integrations/codex/scripts/odysseus_api.py POST /api/codex/memory '{"tex
|
||||
|
||||
## Email draft + send
|
||||
|
||||
- Prefer `POST /api/codex/emails/draft-document` for Codex-written email replies. It creates an editable Odysseus Document with `language: "email"` and does not touch IMAP/send.
|
||||
- `POST /api/codex/emails/draft` — body matches `SendEmailRequest` (`to`, `cc`, `bcc`, `subject`, `body`, `body_html`, `attachments`, `account_id`, `in_reply_to`, `references`). Requires `email:draft` (or `email:send`).
|
||||
- `POST /api/codex/emails/send` — same body. Requires `email:send`. Never send without explicit user instruction.
|
||||
|
||||
|
||||
@@ -141,7 +141,20 @@ if (-not (Find-GitBash)) {
|
||||
Write-Host " https://git-scm.com/download/win" -ForegroundColor Yellow
|
||||
}
|
||||
|
||||
# 6. Start the server (use `python -m uvicorn` - bare `uvicorn` may not be on PATH)
|
||||
# 6. Point CUDA_PATH at a real CUDA toolkit so GPU llama-cpp-python can import.
|
||||
$cudaBase = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA"
|
||||
if (Test-Path $cudaBase) {
|
||||
$cudaBest = Get-ChildItem $cudaBase -Directory -ErrorAction SilentlyContinue |
|
||||
Where-Object { Test-Path (Join-Path $_.FullName "bin") } |
|
||||
Sort-Object { try { [version]($_.Name -replace "^v", "") } catch { [version]"0.0" } } -Descending |
|
||||
Select-Object -First 1
|
||||
if ($cudaBest) {
|
||||
$env:CUDA_PATH = $cudaBest.FullName
|
||||
Write-Host ("Using CUDA_PATH = " + $cudaBest.FullName) -ForegroundColor Cyan
|
||||
}
|
||||
}
|
||||
|
||||
# 7. Start the server (use `python -m uvicorn` - bare `uvicorn` may not be on PATH)
|
||||
Write-Step ("Starting Odysseus at http://{0}:{1}" -f $BindHost, $Port)
|
||||
Write-Host "Press Ctrl+C to stop."
|
||||
Write-Host ""
|
||||
|
||||
@@ -885,8 +885,109 @@ def _smtp_connect(account=None, cfg=None):
|
||||
return conn
|
||||
|
||||
|
||||
def _read_agent_email_confirm_setting() -> bool:
|
||||
"""True if the user wants agent send_email/reply_to_email calls to be
|
||||
queued for manual approval instead of SMTPed immediately. Defaults to
|
||||
True so a fresh install is safe — agents have been observed inventing
|
||||
signatures and sending to real recipients without the user's review."""
|
||||
try:
|
||||
from src.settings import get_setting
|
||||
return bool(get_setting("agent_email_confirm", True))
|
||||
except Exception:
|
||||
return True
|
||||
|
||||
|
||||
def _stash_agent_draft(*, to, subject, body, in_reply_to=None, references=None,
|
||||
cc=None, bcc=None, account=None) -> dict:
|
||||
"""Insert the composed email into scheduled_emails with status
|
||||
'agent_draft' and a far-future send_at so the scheduled-send poller
|
||||
never picks it up. Returns the pending payload the model surfaces to
|
||||
the user (and that the chat UI can render as an approval card)."""
|
||||
try:
|
||||
from src.constants import SCHEDULED_EMAILS_DB
|
||||
except Exception:
|
||||
return {"success": False, "error": "Pending-email storage unavailable"}
|
||||
pending_id = uuid.uuid4().hex[:16]
|
||||
far_future = "9999-12-31T00:00:00"
|
||||
now = datetime.utcnow().isoformat()
|
||||
try:
|
||||
conn = sqlite3.connect(SCHEDULED_EMAILS_DB)
|
||||
# Touch the schema in case the email-routes init hasn't run yet
|
||||
# (MCP server can boot independently).
|
||||
conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS scheduled_emails (
|
||||
id TEXT PRIMARY KEY,
|
||||
to_addr TEXT NOT NULL,
|
||||
cc TEXT,
|
||||
bcc TEXT,
|
||||
subject TEXT,
|
||||
body TEXT NOT NULL,
|
||||
in_reply_to TEXT,
|
||||
references_hdr TEXT,
|
||||
attachments TEXT,
|
||||
send_at TEXT NOT NULL,
|
||||
created_at TEXT NOT NULL,
|
||||
status TEXT NOT NULL DEFAULT 'pending',
|
||||
error TEXT,
|
||||
owner TEXT DEFAULT '',
|
||||
account_id TEXT,
|
||||
odysseus_kind TEXT
|
||||
)
|
||||
""")
|
||||
conn.execute("""
|
||||
INSERT INTO scheduled_emails
|
||||
(id, to_addr, cc, bcc, subject, body, in_reply_to, references_hdr,
|
||||
attachments, send_at, created_at, status, account_id, odysseus_kind, owner)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'agent_draft', ?, ?, ?)
|
||||
""", (
|
||||
pending_id,
|
||||
to if isinstance(to, str) else ", ".join(to),
|
||||
cc if isinstance(cc, str) else (", ".join(cc) if cc else None),
|
||||
bcc if isinstance(bcc, str) else (", ".join(bcc) if bcc else None),
|
||||
subject or "",
|
||||
body or "",
|
||||
in_reply_to or None,
|
||||
references if isinstance(references, str) else (" ".join(references) if references else None),
|
||||
"[]",
|
||||
far_future,
|
||||
now,
|
||||
account or None,
|
||||
"agent_draft",
|
||||
"",
|
||||
))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
return {"success": False, "error": f"Failed to stash draft: {e}"}
|
||||
return {
|
||||
"success": True,
|
||||
"pending": True,
|
||||
"pending_id": pending_id,
|
||||
"to": to if isinstance(to, str) else ", ".join(to),
|
||||
"subject": subject or "",
|
||||
"body": body or "",
|
||||
"message": (
|
||||
"✋ Draft staged for your approval — nothing has been sent yet.\n"
|
||||
"Review the To/Subject/Body above. Reply 'send' to deliver, or "
|
||||
"'cancel' to discard."
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def _send_email(to, subject, body, in_reply_to=None, references=None, cc=None, bcc=None, account=None):
|
||||
"""Send an email via SMTP. Returns dict with status."""
|
||||
"""Send an email via SMTP. Returns dict with status.
|
||||
|
||||
When the `agent_email_confirm` setting is on (the default), the email
|
||||
is NOT SMTPed — instead it lands in scheduled_emails as an
|
||||
`agent_draft` row and the user reviews + approves it from the chat
|
||||
UI. This closes the auto-send hole that let earlier models invent
|
||||
signatures and ship them to real recipients without confirmation."""
|
||||
if _read_agent_email_confirm_setting():
|
||||
return _stash_agent_draft(
|
||||
to=to, subject=subject, body=body,
|
||||
in_reply_to=in_reply_to, references=references,
|
||||
cc=cc, bcc=bcc, account=account,
|
||||
)
|
||||
send_account, cfg = _resolve_send_config(account)
|
||||
msg = EmailMessage()
|
||||
msg["From"] = _clean_header_value(cfg["from_address"])
|
||||
|
||||
@@ -93,16 +93,15 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
|
||||
if category_filter:
|
||||
msg += f" in category '{category_filter}'"
|
||||
return [TextContent(type="text", text=msg + ".")]
|
||||
|
||||
lines = [f"Found {len(memories)} memory entries:\n"]
|
||||
for m in memories[:100]:
|
||||
for m in memories:
|
||||
cat = m.get("category", "fact")
|
||||
mid = m.get("id", "?")[:8]
|
||||
text = m.get("text", "")
|
||||
if len(text) > 150:
|
||||
text = text[:150] + "..."
|
||||
lines.append(f"- [{cat}] `{mid}` — {text}")
|
||||
if len(memories) > 100:
|
||||
lines.append(f"... and {len(memories) - 100} more")
|
||||
return [TextContent(type="text", text="\n".join(lines))]
|
||||
|
||||
elif action == "add":
|
||||
|
||||
@@ -5,16 +5,16 @@
|
||||
"packages": {
|
||||
"": {
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": "^0.98.0"
|
||||
"@anthropic-ai/sdk": "^0.104.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@antithesishq/bombadil": "^0.3.2"
|
||||
"@antithesishq/bombadil": "^0.5.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@anthropic-ai/sdk": {
|
||||
"version": "0.98.0",
|
||||
"resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.98.0.tgz",
|
||||
"integrity": "sha512-N7aXtCvC5g6T1Y4V29lJjceu/zTkVkIZF0jdBvagr0TRFHuKeImffalGWEfqZKrvjH+IQbzJWw6TmSmUzrlMgg==",
|
||||
"version": "0.104.1",
|
||||
"resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.104.1.tgz",
|
||||
"integrity": "sha512-gGACa/+IaiXzRRmF96aOhamoBgapKRBiFWbmmTFP8aMkpaEcuStF+Q61bjo4vPxBM7gqWJNZqsngslRdnLHv0Q==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"json-schema-to-ts": "^3.1.1",
|
||||
@@ -33,11 +33,14 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@antithesishq/bombadil": {
|
||||
"version": "0.3.2",
|
||||
"resolved": "https://registry.npmjs.org/@antithesishq/bombadil/-/bombadil-0.3.2.tgz",
|
||||
"integrity": "sha512-ATy1w9ZY5gbny1H8DFc7rxZitT7DLLLFDiGcRZe+8TQiUrV5tLO+IJGOVNNLp3RpCqjZqSsxGiKoQsx31ipV1g==",
|
||||
"version": "0.5.0",
|
||||
"resolved": "https://registry.npmjs.org/@antithesishq/bombadil/-/bombadil-0.5.0.tgz",
|
||||
"integrity": "sha512-s0zImmr0iyvSP6QcVLvf40CUiZYIdWBAxiq20uhzujwvfitYa3PGJN652k/pLtVccHM/JrGQxZdvLnihZpltHA==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
"license": "MIT",
|
||||
"bin": {
|
||||
"bombadil": "bin/bombadil.js"
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/runtime": {
|
||||
"version": "7.29.7",
|
||||
|
||||
@@ -4,9 +4,9 @@
|
||||
"url": "https://github.com/pewdiepie-archdaemon/odysseus.git"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@antithesishq/bombadil": "^0.3.2"
|
||||
"@antithesishq/bombadil": "^0.5.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": "^0.98.0"
|
||||
"@anthropic-ai/sdk": "^0.104.1"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -33,4 +33,4 @@ PyMuPDF
|
||||
# magika (onnxruntime), already a core dep via fastembed. We avoid the
|
||||
# [all]/Azure/audio extras (cloud + heavy). Pinned to a release >30 days old per
|
||||
# the dependency-age discussion in issue #485.
|
||||
markitdown[docx,pptx,xlsx,xls]==0.1.5
|
||||
markitdown[docx,pptx,xlsx,xls]==0.1.6
|
||||
|
||||
@@ -3,8 +3,8 @@ uvicorn
|
||||
python-multipart
|
||||
python-dotenv
|
||||
httpx
|
||||
pydantic>=2.0
|
||||
pydantic-settings>=2.0
|
||||
pydantic>=2.13.4
|
||||
pydantic-settings>=2.14.1
|
||||
SQLAlchemy
|
||||
pypdf
|
||||
beautifulsoup4
|
||||
|
||||
@@ -31,6 +31,7 @@ ALLOWED_SCOPES = {
|
||||
TOKEN_PROFILES = {
|
||||
"chat": ["chat"],
|
||||
"codex_todos": ["todos:read", "todos:write"],
|
||||
"codex_documents": ["documents:read", "documents:write"],
|
||||
"codex_email_drafts": ["email:read", "email:draft", "documents:read", "documents:write"],
|
||||
}
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ import re
|
||||
from pathlib import Path
|
||||
|
||||
from core.atomic_io import atomic_write_json, atomic_write_text
|
||||
from core.auth import AuthManager
|
||||
from core.auth import AuthManager, SetAdminResult
|
||||
from src.constants import DEEP_RESEARCH_DIR, MEMORY_FILE, SKILLS_DIR
|
||||
from src.rate_limiter import RateLimiter
|
||||
from src.settings_scrub import scrub_settings
|
||||
@@ -73,6 +73,11 @@ class DeleteUserRequest(BaseModel):
|
||||
class RenameUserRequest(BaseModel):
|
||||
username: str
|
||||
|
||||
|
||||
class SetAdminRequest(BaseModel):
|
||||
is_admin: bool
|
||||
|
||||
|
||||
class SetOpenRegistrationRequest(BaseModel):
|
||||
enabled: bool
|
||||
|
||||
@@ -487,6 +492,31 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
|
||||
invalidator()
|
||||
return {"ok": True, "username": new_username, "renamed_self": old_username == user}
|
||||
|
||||
@router.put("/users/{username}/admin")
|
||||
async def set_user_admin(username: str, body: SetAdminRequest, request: Request):
|
||||
"""Promote/demote a user to/from admin. Admin only.
|
||||
|
||||
The last remaining admin can't be demoted (no lockout). Self-demotion
|
||||
is allowed while another admin exists; the `self` flag tells the UI to
|
||||
reload the acting user into the normal-user view.
|
||||
"""
|
||||
user = _get_current_user(request)
|
||||
if not user or not auth_manager.is_admin(user):
|
||||
raise HTTPException(403, "Admin only")
|
||||
result = auth_manager.set_admin(username, body.is_admin, user)
|
||||
if result is SetAdminResult.USER_NOT_FOUND:
|
||||
raise HTTPException(404, "User not found")
|
||||
if result is SetAdminResult.NOT_AUTHORIZED:
|
||||
raise HTTPException(403, "Admin only")
|
||||
if result is SetAdminResult.LAST_ADMIN:
|
||||
raise HTTPException(400, "Cannot demote the last admin")
|
||||
target = (username or "").strip().lower()
|
||||
return {
|
||||
"ok": True,
|
||||
"is_admin": body.is_admin,
|
||||
"self": target == (user or "").strip().lower(),
|
||||
}
|
||||
|
||||
@router.post("/signup-toggle", deprecated=True)
|
||||
async def toggle_signup(request: Request):
|
||||
"""
|
||||
|
||||
@@ -11,7 +11,7 @@ from pydantic import BaseModel
|
||||
from sqlalchemy import or_, and_
|
||||
from dateutil.rrule import rrulestr
|
||||
|
||||
from core.database import SessionLocal, CalendarCal, CalendarEvent
|
||||
from core.database import SessionLocal, CalendarCal, CalendarDeletedEvent, CalendarEvent
|
||||
from src.auth_helpers import require_user
|
||||
from src.upload_limits import read_upload_limited, ICS_MAX_BYTES
|
||||
|
||||
@@ -126,6 +126,54 @@ def _resolve_base_uid(uid: str) -> str:
|
||||
raise ValueError("malformed compound UID: missing base before ::")
|
||||
return base
|
||||
|
||||
|
||||
async def _push_caldav_event_after_commit(owner: str, uid: str, action: str):
|
||||
"""Best-effort CalDAV write-through. Local writes stay authoritative if
|
||||
the remote server is unreachable; pending flags let /sync retry later."""
|
||||
try:
|
||||
result = {"ok": True}
|
||||
if action == "create":
|
||||
from src.caldav_sync import push_event_create
|
||||
result = await push_event_create(owner, uid)
|
||||
elif action == "update":
|
||||
from src.caldav_sync import push_event_update
|
||||
result = await push_event_update(owner, uid)
|
||||
elif action == "delete":
|
||||
from src.caldav_sync import push_event_delete
|
||||
result = await push_event_delete(owner, uid)
|
||||
if result and not result.get("ok") and not result.get("skipped"):
|
||||
raise RuntimeError(result.get("error") or result)
|
||||
except Exception as e:
|
||||
logger.warning("CalDAV %s push failed for uid=%s: %s", action, uid, e)
|
||||
if action in {"create", "update"}:
|
||||
db = SessionLocal()
|
||||
try:
|
||||
ev = _get_or_404_event(db, uid, owner)
|
||||
ev.caldav_sync_pending = action
|
||||
db.commit()
|
||||
except Exception:
|
||||
db.rollback()
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
def _record_caldav_delete_tombstone(db, ev: CalendarEvent, owner: str) -> None:
|
||||
if not (ev.calendar and ev.calendar.source == "caldav"):
|
||||
return
|
||||
tombstone = db.query(CalendarDeletedEvent).filter(
|
||||
CalendarDeletedEvent.uid == ev.uid,
|
||||
CalendarDeletedEvent.owner == owner,
|
||||
).first()
|
||||
if not tombstone:
|
||||
tombstone = CalendarDeletedEvent(uid=ev.uid, owner=owner)
|
||||
db.add(tombstone)
|
||||
tombstone.calendar_id = ev.calendar_id
|
||||
tombstone.remote_href = ev.remote_href
|
||||
tombstone.remote_etag = ev.remote_etag
|
||||
tombstone.caldav_base_url = getattr(ev.calendar, "caldav_base_url", None)
|
||||
tombstone.summary = ev.summary or ""
|
||||
tombstone.last_error = None
|
||||
|
||||
# ── Pydantic models ──
|
||||
|
||||
class EventCreate(BaseModel):
|
||||
@@ -843,13 +891,13 @@ def setup_calendar_routes() -> APIRouter:
|
||||
return {"ok": False, "error": str(e)[:200]}
|
||||
|
||||
@router.post("/sync")
|
||||
async def sync_caldav_endpoint(request: Request):
|
||||
"""Pull events from the configured CalDAV server into local DB.
|
||||
async def sync_caldav_endpoint(request: Request, direction: str = "pull"):
|
||||
"""Sync events with the configured CalDAV server.
|
||||
Returns counts + any per-calendar errors. Called by the frontend
|
||||
on calendar open and by the periodic scheduler loop."""
|
||||
owner = _require_user(request)
|
||||
from src.caldav_sync import sync_caldav
|
||||
return await sync_caldav(owner)
|
||||
from src.caldav_sync import sync_caldav_direction
|
||||
return await sync_caldav_direction(owner, direction)
|
||||
|
||||
|
||||
@router.delete("/calendars/{cal_id}")
|
||||
@@ -1002,19 +1050,12 @@ def setup_calendar_routes() -> APIRouter:
|
||||
is_utc=_is_utc and not data.all_day,
|
||||
rrule=data.rrule or "",
|
||||
color=data.color or None,
|
||||
caldav_sync_pending="create" if cal.source == "caldav" else None,
|
||||
)
|
||||
db.add(ev)
|
||||
db.commit()
|
||||
if cal.source == "caldav":
|
||||
# Push the new event to the remote so it appears on the user's
|
||||
# other devices — the sync is otherwise pull-only (#800).
|
||||
from src.caldav_writeback import writeback_event
|
||||
await writeback_event(owner, cal.source, cal.id, {
|
||||
"uid": uid, "summary": data.summary, "description": data.description,
|
||||
"location": data.location, "dtstart": dtstart, "dtend": dtend,
|
||||
"all_day": data.all_day, "is_utc": _is_utc and not data.all_day,
|
||||
"rrule": data.rrule or "",
|
||||
})
|
||||
await _push_caldav_event_after_commit(owner, uid, "create")
|
||||
return {"ok": True, "uid": uid}
|
||||
except HTTPException:
|
||||
raise
|
||||
@@ -1060,15 +1101,12 @@ def setup_calendar_routes() -> APIRouter:
|
||||
ev.rrule = data.rrule
|
||||
if data.color is not None:
|
||||
ev.color = data.color if data.color else None
|
||||
is_caldav = ev.calendar and ev.calendar.source == "caldav"
|
||||
if is_caldav:
|
||||
ev.caldav_sync_pending = "update"
|
||||
db.commit()
|
||||
cal = db.query(CalendarCal).filter(CalendarCal.id == ev.calendar_id).first()
|
||||
if cal and cal.source == "caldav":
|
||||
from src.caldav_writeback import writeback_event
|
||||
await writeback_event(owner, cal.source, cal.id, {
|
||||
"uid": ev.uid, "summary": ev.summary, "description": ev.description,
|
||||
"location": ev.location, "dtstart": ev.dtstart, "dtend": ev.dtend,
|
||||
"all_day": ev.all_day, "is_utc": ev.is_utc, "rrule": ev.rrule or "",
|
||||
})
|
||||
if is_caldav:
|
||||
await _push_caldav_event_after_commit(owner, base_uid, "update")
|
||||
return {"ok": True}
|
||||
except HTTPException:
|
||||
raise
|
||||
@@ -1089,15 +1127,13 @@ def setup_calendar_routes() -> APIRouter:
|
||||
db = SessionLocal()
|
||||
try:
|
||||
ev = _get_or_404_event(db, base_uid, owner)
|
||||
# Capture what the remote push needs BEFORE the row is gone.
|
||||
_cal = db.query(CalendarCal).filter(CalendarCal.id == ev.calendar_id).first()
|
||||
_is_caldav = bool(_cal and _cal.source == "caldav")
|
||||
_cal_id, _ev_uid = ev.calendar_id, ev.uid
|
||||
is_caldav = ev.calendar and ev.calendar.source == "caldav"
|
||||
if is_caldav:
|
||||
_record_caldav_delete_tombstone(db, ev, owner)
|
||||
db.delete(ev)
|
||||
db.commit()
|
||||
if _is_caldav:
|
||||
from src.caldav_writeback import writeback_event
|
||||
await writeback_event(owner, "caldav", _cal_id, {"uid": _ev_uid}, delete=True)
|
||||
if is_caldav:
|
||||
await _push_caldav_event_after_commit(owner, base_uid, "delete")
|
||||
return {"ok": True}
|
||||
except HTTPException:
|
||||
raise
|
||||
|
||||
@@ -159,9 +159,17 @@ async def auto_name_session(session_manager, sess):
|
||||
return
|
||||
|
||||
owner = getattr(sess, "owner", None)
|
||||
t_url, t_model, t_headers = resolve_task_endpoint(
|
||||
sess.endpoint_url, sess.model, sess.headers, owner=owner,
|
||||
)
|
||||
t_url, t_model, t_headers = resolve_task_endpoint(owner=owner)
|
||||
if not t_model:
|
||||
# If no task/utility model is configured at all, fall back to
|
||||
# the session's own model so auto-naming still works even on
|
||||
# minimal setups.
|
||||
from src.endpoint_resolver import resolve_endpoint
|
||||
_fallback = resolve_endpoint("default", owner=owner)
|
||||
if _fallback and _fallback[1]:
|
||||
t_url, t_model, t_headers = _fallback
|
||||
else:
|
||||
t_url, t_model, t_headers = sess.endpoint_url, sess.model, sess.headers
|
||||
if not t_model:
|
||||
logger.debug("[auto-name] No model provided, skipping")
|
||||
return
|
||||
@@ -497,6 +505,29 @@ def _normalize_model_id_from_cache(sess) -> Optional[str]:
|
||||
return None
|
||||
|
||||
|
||||
def _session_is_research_spinoff(sess) -> bool:
|
||||
"""True if this session was created via research "Discuss" spin-off.
|
||||
|
||||
Detected by the primer system message the spin-off endpoint seeds into
|
||||
history (metadata ``research_spinoff_from``). Such sessions are grounded
|
||||
on the seeded report, so global memory + personal-doc RAG injection is
|
||||
suppressed for them (the report is the sole knowledge base). Handles both
|
||||
ChatMessage objects and plain dicts.
|
||||
"""
|
||||
for m in getattr(sess, "history", []) or []:
|
||||
role = getattr(m, "role", None)
|
||||
if role is None and isinstance(m, dict):
|
||||
role = m.get("role")
|
||||
if role != "system":
|
||||
continue
|
||||
md = getattr(m, "metadata", None)
|
||||
if md is None and isinstance(m, dict):
|
||||
md = m.get("metadata")
|
||||
if (md or {}).get("research_spinoff_from"):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
async def build_chat_context(
|
||||
sess,
|
||||
request,
|
||||
@@ -562,9 +593,17 @@ async def build_chat_context(
|
||||
mem_enabled, user, incognito, no_memory, uprefs.get("memory_enabled", "NOT_SET"),
|
||||
)
|
||||
|
||||
# Research-spinoff ("Discuss") sessions are grounded on the seeded report:
|
||||
# the primer system message IS the knowledge base. Injecting global memory
|
||||
# or personal-doc RAG on every turn pulls in keyword-matched but off-topic
|
||||
# facts ("wrong data") and competes with the report, so suppress both here.
|
||||
is_research_spinoff = _session_is_research_spinoff(sess)
|
||||
if is_research_spinoff:
|
||||
mem_enabled = False
|
||||
|
||||
# Use RAG?
|
||||
use_rag_val = (str(use_rag).lower() != "false") if use_rag is not None else True
|
||||
if incognito or not allow_tool_preprocessing:
|
||||
if incognito or not allow_tool_preprocessing or is_research_spinoff:
|
||||
use_rag_val = False
|
||||
|
||||
# If pre-fetched search context was provided (compare mode), skip live web search
|
||||
@@ -587,7 +626,7 @@ async def build_chat_context(
|
||||
incognito=incognito,
|
||||
use_skills=skills_enabled,
|
||||
)
|
||||
if use_rag is not None:
|
||||
if use_rag is not None or is_research_spinoff:
|
||||
_preface_kwargs["use_rag"] = use_rag_val
|
||||
preface, rag_sources, web_sources = chat_processor.build_context_preface(**_preface_kwargs)
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ import os
|
||||
import time
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any, AsyncGenerator, List
|
||||
from typing import Dict, Any, AsyncGenerator, List, Optional
|
||||
|
||||
from fastapi import APIRouter, Request, HTTPException, Form, Query
|
||||
from fastapi.responses import StreamingResponse
|
||||
@@ -526,6 +526,66 @@ def setup_chat_routes(
|
||||
active_doc_id = form_data.get("active_doc_id", "").strip()
|
||||
logger.info(f"[doc-inject] chat_mode={chat_mode}, active_doc_id={active_doc_id!r}")
|
||||
|
||||
# Active email reader — when the user has an email open in the UI, the
|
||||
# frontend passes its uid/folder/account so "reply", "summarize this",
|
||||
# etc. resolve to the real email instead of the agent inventing a
|
||||
# fake markdown draft.
|
||||
active_email_uid = form_data.get("active_email_uid", "").strip()
|
||||
active_email_folder = form_data.get("active_email_folder", "INBOX").strip() or "INBOX"
|
||||
active_email_account = form_data.get("active_email_account", "").strip()
|
||||
active_email_ctx: Optional[Dict[str, str]] = None
|
||||
# Always reset between requests so a stale active-email pointer from
|
||||
# a previous turn (different reader closed, different account, etc.)
|
||||
# can't leak in when the user has no email open this turn.
|
||||
try:
|
||||
from src.tool_implementations import clear_active_email
|
||||
clear_active_email()
|
||||
except Exception:
|
||||
pass
|
||||
if active_email_uid:
|
||||
active_email_ctx = {
|
||||
"uid": active_email_uid,
|
||||
"folder": active_email_folder,
|
||||
"account": active_email_account,
|
||||
}
|
||||
# Try to enrich with subject + from so the agent's system prompt
|
||||
# block can quote them. Best-effort: a stale cache is fine, a
|
||||
# missing email just means we pass uid/folder/account only.
|
||||
try:
|
||||
from routes.email_routes import _read_cache_get, _read_cache_key
|
||||
_ck = _read_cache_key(active_email_account or None, active_email_folder, active_email_uid, owner=get_current_user(request))
|
||||
_cached_email = _read_cache_get(_ck)
|
||||
if _cached_email and isinstance(_cached_email, dict):
|
||||
active_email_ctx["subject"] = str(_cached_email.get("subject") or "")
|
||||
active_email_ctx["from"] = str(
|
||||
_cached_email.get("from_address")
|
||||
or _cached_email.get("from")
|
||||
or _cached_email.get("from_name")
|
||||
or ""
|
||||
)
|
||||
_body_preview = (_cached_email.get("body") or "")[:2000]
|
||||
if _body_preview:
|
||||
active_email_ctx["body_preview"] = _body_preview
|
||||
except Exception as _e:
|
||||
logger.debug(f"[email-inject] cache enrich skipped: {_e}")
|
||||
# Stash so email tools can resolve "this email" without UID guessing.
|
||||
try:
|
||||
from src.tool_implementations import set_active_email
|
||||
set_active_email(
|
||||
uid=active_email_uid,
|
||||
folder=active_email_folder,
|
||||
account=active_email_account or None,
|
||||
subject=active_email_ctx.get("subject"),
|
||||
sender=active_email_ctx.get("from"),
|
||||
)
|
||||
except Exception as _e:
|
||||
logger.debug(f"[email-inject] set_active_email failed: {_e}")
|
||||
logger.info(
|
||||
"[email-inject] active_email uid=%s folder=%s account=%s subject=%r",
|
||||
active_email_uid, active_email_folder, active_email_account or "(default)",
|
||||
active_email_ctx.get("subject", ""),
|
||||
)
|
||||
|
||||
try:
|
||||
# Attachment-only sends: skip the message-required check when the
|
||||
# user has attached one or more files (the attachment IS the action).
|
||||
@@ -641,15 +701,27 @@ def setup_chat_routes(
|
||||
active_doc_id,
|
||||
)
|
||||
active_doc = None
|
||||
elif doc_session and doc_session != session:
|
||||
logger.warning(
|
||||
"[doc-inject] ignoring stale active_doc_id %s from session %s while in session %s",
|
||||
active_doc_id,
|
||||
doc_session,
|
||||
session,
|
||||
)
|
||||
active_doc = None
|
||||
else:
|
||||
# NOTE: previously dropped the doc when doc.session_id
|
||||
# != current chat session — but that broke the common
|
||||
# case of "open an email draft from one chat, ask a
|
||||
# different chat to write into it". The frontend only
|
||||
# sends active_doc_id for docs currently visible in
|
||||
# the UI, and we already owner-checked above, so trust
|
||||
# the explicit signal. We just log the mismatch and
|
||||
# re-bind the doc to the current session so future
|
||||
# turns find it via the session-fallback path too.
|
||||
if doc_session and doc_session != session:
|
||||
logger.info(
|
||||
"[doc-inject] cross-session active_doc_id %s (was session %s, now %s) — accepting and rebinding",
|
||||
active_doc_id, doc_session, session,
|
||||
)
|
||||
try:
|
||||
active_doc.session_id = session
|
||||
_doc_db.commit()
|
||||
except Exception as _e:
|
||||
_doc_db.rollback()
|
||||
logger.warning(f"[doc-inject] session rebind failed: {_e}")
|
||||
logger.info(f"[doc-inject] found by ID: title={active_doc.title!r}, lang={active_doc.language!r}, is_active={active_doc.is_active}, content_len={len(active_doc.current_content or '')}")
|
||||
else:
|
||||
logger.warning(f"[doc-inject] NOT FOUND by ID {active_doc_id}")
|
||||
@@ -696,7 +768,12 @@ def setup_chat_routes(
|
||||
# by default without having to send allow_bash in every request.
|
||||
if allow_bash is not None and str(allow_bash).lower() != "true":
|
||||
disabled_tools.add("bash")
|
||||
if allow_web_search is not None and str(allow_web_search).lower() != "true":
|
||||
_explicit_web_intent = bool(_tool_intent and _tool_intent.category == "web")
|
||||
if (
|
||||
allow_web_search is not None
|
||||
and str(allow_web_search).lower() != "true"
|
||||
and not _explicit_web_intent
|
||||
):
|
||||
disabled_tools.add("web_search")
|
||||
disabled_tools.add("web_fetch")
|
||||
|
||||
@@ -709,6 +786,21 @@ def setup_chat_routes(
|
||||
"manage_skills", # skill presets tied to user
|
||||
})
|
||||
|
||||
# Active email reader open → strip the tools that let the agent
|
||||
# "drift" to a new compose: create_document (writes a fake email-
|
||||
# shaped .md file) and send_email (sends fresh to a recipient the
|
||||
# agent invented). With those gone, the only paths left for "write
|
||||
# email saying X" are ui_control open_email_reply (draft) and
|
||||
# reply_to_email (immediate send) — both of which use the open
|
||||
# email's UID. Code-level enforcement instead of relying on a
|
||||
# prompt rule the model can ignore.
|
||||
if active_email_ctx and active_email_ctx.get("uid"):
|
||||
disabled_tools.update({
|
||||
"create_document",
|
||||
"send_email",
|
||||
"mcp__email__send_email",
|
||||
})
|
||||
|
||||
# Enforce per-user privileges
|
||||
_privs = {}
|
||||
_user = ctx.user
|
||||
@@ -1176,6 +1268,7 @@ def setup_chat_routes(
|
||||
max_rounds=_max_rounds,
|
||||
context_length=ctx.context_length,
|
||||
active_document=active_doc,
|
||||
active_email=active_email_ctx,
|
||||
session_id=session,
|
||||
disabled_tools=disabled_tools if disabled_tools else None,
|
||||
tool_policy=tool_policy,
|
||||
@@ -1204,6 +1297,8 @@ def setup_chat_routes(
|
||||
"doc_stream_open", "doc_stream_delta",
|
||||
"doc_update", "doc_suggestions", "ui_control",
|
||||
"rounds_exhausted",
|
||||
"loop_breaker_triggered",
|
||||
"intent_nudge_exhausted",
|
||||
"ask_user",
|
||||
"plan_update",
|
||||
):
|
||||
|
||||
@@ -18,6 +18,7 @@ from fastapi.responses import StreamingResponse
|
||||
from src.auth_helpers import require_authenticated_request, require_user
|
||||
from src.tool_implementations import do_manage_notes
|
||||
from src.constants import COOKBOOK_STATE_FILE
|
||||
from routes._validators import validate_remote_host, validate_ssh_port
|
||||
|
||||
|
||||
COOKBOOK_READ_SCOPES = {"cookbook:read", "cookbook:launch"}
|
||||
@@ -36,6 +37,21 @@ DOCS_WRITE_SCOPES = {"documents:write"}
|
||||
WRITE_ACTIONS = {"add", "create", "new", "save", "remind", "update", "delete", "toggle_item", "remove", "remove_item"}
|
||||
|
||||
|
||||
def _ssh_prefix_for_task(task: dict) -> tuple[str, str]:
|
||||
"""Resolve a cookbook task's stored SSH target into ``(host, port_flag)``.
|
||||
|
||||
``host`` is ``""`` for a local task. ``remoteHost`` / ``sshPort`` come from
|
||||
cookbook_state.json and get interpolated into an ``ssh`` command string, so
|
||||
validate them the same way the cookbook routes do. A tampered entry with
|
||||
shell metacharacters in ``remoteHost`` is rejected with 400 rather than
|
||||
injected.
|
||||
"""
|
||||
host = validate_remote_host((task.get("remoteHost") or "").strip() or None) or ""
|
||||
ssh_port = validate_ssh_port((task.get("sshPort") or "").strip() or None) or ""
|
||||
port_flag = f"-p {ssh_port} " if ssh_port and ssh_port != "22" else ""
|
||||
return host, port_flag
|
||||
|
||||
|
||||
async def _as_owner(request: Request, owner: str, fn, *args, **kwargs):
|
||||
"""Run an existing route handler with request.state.current_user temporarily
|
||||
set to ``owner`` so its internal get_current_user/require_user calls see
|
||||
@@ -75,6 +91,20 @@ def _scope_owner(request: Request, allowed: set[str]) -> str:
|
||||
return require_user(request)
|
||||
|
||||
|
||||
def _scope_owner_all(request: Request, required: set[str]) -> str:
|
||||
"""Return owner only when an API token has every required scope."""
|
||||
if getattr(request.state, "api_token", False):
|
||||
scopes = set(getattr(request.state, "api_token_scopes", []) or [])
|
||||
missing = required - scopes
|
||||
if missing:
|
||||
raise HTTPException(403, f"API token missing required scope: {' and '.join(sorted(missing))}")
|
||||
owner = getattr(request.state, "api_token_owner", None)
|
||||
if not owner:
|
||||
raise HTTPException(403, "API token has no owner")
|
||||
return owner
|
||||
return require_user(request)
|
||||
|
||||
|
||||
def _find_endpoint(router: APIRouter | None, method: str, path: str):
|
||||
if router is None:
|
||||
return None
|
||||
@@ -122,7 +152,7 @@ def setup_codex_routes(
|
||||
"read": scoped(EMAIL_READ_SCOPES),
|
||||
"draft": scoped(EMAIL_DRAFT_SCOPES),
|
||||
"send": scoped(EMAIL_SEND_SCOPES),
|
||||
"actions": ["list", "read", "draft", "send"],
|
||||
"actions": ["list", "read", "draft_document", "draft", "send"],
|
||||
},
|
||||
"memory": {
|
||||
"read": scoped(MEMORY_READ_SCOPES),
|
||||
@@ -246,6 +276,56 @@ def setup_codex_routes(
|
||||
# Both handlers in routes/email_routes.py already accept `owner=` via
|
||||
# FastAPI Depends, so we call them directly without patching state.
|
||||
|
||||
def _email_draft_document_content(body: dict[str, Any]) -> str:
|
||||
def clean(v: Any) -> str:
|
||||
if isinstance(v, list):
|
||||
return ", ".join(str(x).strip() for x in v if str(x).strip())
|
||||
return str(v or "").strip()
|
||||
|
||||
to = clean(body.get("to"))
|
||||
cc = clean(body.get("cc"))
|
||||
bcc = clean(body.get("bcc"))
|
||||
subject = clean(body.get("subject"))
|
||||
in_reply_to = clean(body.get("in_reply_to"))
|
||||
references = clean(body.get("references"))
|
||||
body_text = str(body.get("body") or body.get("body_html") or "").strip()
|
||||
lines = [
|
||||
f"To: {to}",
|
||||
]
|
||||
if cc:
|
||||
lines.append(f"Cc: {cc}")
|
||||
if bcc:
|
||||
lines.append(f"Bcc: {bcc}")
|
||||
lines.append(f"Subject: {subject}")
|
||||
if in_reply_to:
|
||||
lines.append(f"In-Reply-To: {in_reply_to}")
|
||||
if references:
|
||||
lines.append(f"References: {references}")
|
||||
lines.extend(["---", body_text])
|
||||
return "\n".join(lines).rstrip() + "\n"
|
||||
|
||||
@router.post("/emails/draft-document")
|
||||
async def codex_email_draft_document(request: Request, body: dict[str, Any] = Body(default_factory=dict)):
|
||||
owner = _scope_owner_all(request, {"email:draft", "documents:write"})
|
||||
if documents_create_endpoint is None:
|
||||
raise HTTPException(503, "Documents integration is not available")
|
||||
from routes.document_routes import DocumentCreate
|
||||
|
||||
subject = str(body.get("subject") or "Email draft").strip() or "Email draft"
|
||||
title = str(body.get("title") or subject).strip() or "Email draft"
|
||||
req = DocumentCreate(
|
||||
session_id=body.get("session_id"),
|
||||
title=title,
|
||||
language="email",
|
||||
content=_email_draft_document_content(body),
|
||||
)
|
||||
result = await _as_owner(request, owner, documents_create_endpoint, request, req)
|
||||
if isinstance(result, dict):
|
||||
result = dict(result)
|
||||
result["draft_type"] = "document"
|
||||
result["send_required_confirmation"] = True
|
||||
return result
|
||||
|
||||
@router.post("/emails/draft")
|
||||
async def codex_email_draft(request: Request, body: dict[str, Any] = Body(default_factory=dict)):
|
||||
owner = _scope_owner(request, EMAIL_DRAFT_SCOPES)
|
||||
@@ -486,8 +566,7 @@ def setup_codex_routes(
|
||||
task = next((t for t in tasks if t.get("sessionId") == session_id), None)
|
||||
if task is None:
|
||||
raise HTTPException(404, "task not found")
|
||||
host = (task.get("remoteHost") or "").strip()
|
||||
ssh_port = (task.get("sshPort") or "").strip()
|
||||
host, port_flag = _ssh_prefix_for_task(task)
|
||||
# Prefer the persisted log file over the tmux pane. The pane gets
|
||||
# overwritten by the post-crash neofetch banner + bash prompt the
|
||||
# moment vllm exits; the log file is the raw stdout/stderr and
|
||||
@@ -499,7 +578,6 @@ def setup_codex_routes(
|
||||
f"else tmux capture-pane -t {session_id} -p -S -{tail}; fi"
|
||||
)
|
||||
if host:
|
||||
port_flag = f"-p {ssh_port} " if ssh_port and ssh_port != "22" else ""
|
||||
import shlex
|
||||
cmd = f"ssh {port_flag}{host} {shlex.quote(inner)}"
|
||||
else:
|
||||
@@ -561,10 +639,8 @@ def setup_codex_routes(
|
||||
state = _read_cookbook_state()
|
||||
tasks = state.get("tasks") or []
|
||||
task = next((t for t in tasks if t.get("sessionId") == session_id), None)
|
||||
host = ((task or {}).get("remoteHost") or "").strip()
|
||||
ssh_port = ((task or {}).get("sshPort") or "").strip()
|
||||
host, port_flag = _ssh_prefix_for_task(task or {})
|
||||
if host:
|
||||
port_flag = f"-p {ssh_port} " if ssh_port and ssh_port != "22" else ""
|
||||
cmd = f"ssh {port_flag}{host} \"tmux kill-session -t {session_id}\""
|
||||
else:
|
||||
cmd = f"tmux kill-session -t {session_id}"
|
||||
@@ -714,7 +790,7 @@ def setup_codex_routes(
|
||||
norm = dict(body or {})
|
||||
sess = (norm.get("tmux_session") or norm.get("session_id") or "").strip()
|
||||
model = (norm.get("model") or norm.get("repo_id") or "").strip()
|
||||
host = (norm.get("host") or norm.get("remote_host") or "").strip()
|
||||
host = validate_remote_host((norm.get("host") or norm.get("remote_host") or "").strip() or None) or ""
|
||||
port = norm.get("port") or 8000
|
||||
import re as _re
|
||||
if not sess or not _re.fullmatch(r"[a-zA-Z0-9_-]+", sess):
|
||||
|
||||
@@ -12,6 +12,7 @@ import json
|
||||
import csv
|
||||
import io
|
||||
import os
|
||||
import inspect
|
||||
import httpx
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
@@ -45,10 +46,14 @@ def _save_settings(settings):
|
||||
def _get_carddav_config():
|
||||
import os
|
||||
settings = _load_settings()
|
||||
password = settings.get("carddav_password", os.environ.get("CARDDAV_PASSWORD", ""))
|
||||
if password and "carddav_password" in settings:
|
||||
from src.secret_storage import decrypt
|
||||
password = decrypt(password)
|
||||
return {
|
||||
"url": settings.get("carddav_url", os.environ.get("CARDDAV_URL", "")),
|
||||
"username": settings.get("carddav_username", os.environ.get("CARDDAV_USERNAME", "")),
|
||||
"password": settings.get("carddav_password", os.environ.get("CARDDAV_PASSWORD", "")),
|
||||
"password": password,
|
||||
}
|
||||
|
||||
|
||||
@@ -86,11 +91,13 @@ def _normalize_contact(contact: Dict) -> Dict:
|
||||
name = str(contact.get("name") or "").strip()
|
||||
if not name and emails:
|
||||
name = emails[0].split("@")[0]
|
||||
address = str(contact.get("address") or "").strip()
|
||||
return {
|
||||
"uid": str(contact.get("uid") or uuid.uuid4()),
|
||||
"name": name,
|
||||
"emails": emails,
|
||||
"phones": phones,
|
||||
"address": address,
|
||||
}
|
||||
|
||||
|
||||
@@ -146,7 +153,7 @@ def _parse_vcards(text: str) -> List[Dict]:
|
||||
for block in re.split(r"BEGIN:VCARD", text):
|
||||
if not block.strip():
|
||||
continue
|
||||
contact = {"name": "", "emails": [], "phones": [], "uid": ""}
|
||||
contact = {"name": "", "emails": [], "phones": [], "uid": "", "address": ""}
|
||||
for line in block.split("\n"):
|
||||
line = line.strip()
|
||||
# Strip an optional RFC 6350 group prefix (e.g. "item1.EMAIL;...")
|
||||
@@ -169,6 +176,15 @@ def _parse_vcards(text: str) -> List[Dict]:
|
||||
phone = _vunesc(name_part.split(":", 1)[1])
|
||||
if phone and phone not in contact["phones"]:
|
||||
contact["phones"].append(phone)
|
||||
elif name_part.startswith("ADR"):
|
||||
# vCard ADR is 7 semicolon-separated components:
|
||||
# post-office-box;extended-address;street;locality;region;postal-code;country.
|
||||
# Recover a human-readable string by joining non-empty
|
||||
# components with ", ".
|
||||
if ":" in name_part:
|
||||
raw = name_part.split(":", 1)[1]
|
||||
parts = [_vunesc(p).strip() for p in raw.split(";")]
|
||||
contact["address"] = ", ".join(p for p in parts if p)
|
||||
elif name_part.startswith("UID:"):
|
||||
contact["uid"] = _vunesc(name_part[4:])
|
||||
if contact["name"] or contact["emails"]:
|
||||
@@ -193,7 +209,8 @@ def _vesc(value: str) -> str:
|
||||
|
||||
def _build_vcard(name: str, email: str, uid: Optional[str] = None,
|
||||
emails: Optional[List[str]] = None,
|
||||
phones: Optional[List[str]] = None) -> str:
|
||||
phones: Optional[List[str]] = None,
|
||||
address: Optional[str] = None) -> str:
|
||||
"""Build a vCard. Accepts either a single `email` (legacy callers) or
|
||||
full `emails`/`phones` lists (edit path). The first email is marked
|
||||
PREF=1. All values are RFC-6350-escaped."""
|
||||
@@ -226,6 +243,12 @@ def _build_vcard(name: str, email: str, uid: Optional[str] = None,
|
||||
lines.append(f"EMAIL;PREF=1:{_vesc(em)}" if i == 0 else f"EMAIL:{_vesc(em)}")
|
||||
for ph in phone_list:
|
||||
lines.append(f"TEL:{_vesc(ph)}")
|
||||
# Address: stuff the whole human-readable string into the street
|
||||
# component of ADR. vCard ADR has 7 semicolon-separated components:
|
||||
# post-office-box;extended-address;street;locality;region;postal-code;country.
|
||||
addr = (address or "").strip()
|
||||
if addr:
|
||||
lines.append(f"ADR:;;{_vesc(addr)};;;;")
|
||||
lines.append("END:VCARD")
|
||||
return "\r\n".join(lines) + "\r\n"
|
||||
|
||||
@@ -362,7 +385,7 @@ def _resolve_resource_url(uid: str) -> str:
|
||||
return _lookup() or _vcard_url(uid)
|
||||
|
||||
|
||||
def _create_contact(name: str, email: str) -> bool:
|
||||
def _create_contact(name: str, email: str, address: str = "") -> bool:
|
||||
"""Add a new contact via CardDAV or local contacts."""
|
||||
cfg = _get_carddav_config()
|
||||
if not _carddav_configured(cfg):
|
||||
@@ -371,12 +394,12 @@ def _create_contact(name: str, email: str) -> bool:
|
||||
for c in contacts:
|
||||
if email_l and email_l in [e.lower() for e in c.get("emails", [])]:
|
||||
return True
|
||||
contacts.append(_normalize_contact({"name": name, "emails": [email]}))
|
||||
contacts.append(_normalize_contact({"name": name, "emails": [email], "address": address}))
|
||||
_save_local_contacts(contacts)
|
||||
return True
|
||||
|
||||
contact_uid = str(uuid.uuid4())
|
||||
vcard = _build_vcard(name, email, contact_uid)
|
||||
vcard = _build_vcard(name, email, contact_uid, address=address)
|
||||
try:
|
||||
url = _carddav_base_url(cfg) + "/" + contact_uid + ".vcf"
|
||||
auth = None
|
||||
@@ -609,7 +632,7 @@ def _contacts_to_csv(contacts: List[Dict]) -> str:
|
||||
return out.getvalue()
|
||||
|
||||
|
||||
def _update_contact(uid: str, name: str, emails: List[str], phones: List[str]) -> bool:
|
||||
def _update_contact(uid: str, name: str, emails: List[str], phones: List[str], address: str = "") -> bool:
|
||||
"""Rewrite an existing contact via CardDAV or local contacts."""
|
||||
cfg = _get_carddav_config()
|
||||
if not _carddav_configured(cfg):
|
||||
@@ -618,16 +641,19 @@ def _update_contact(uid: str, name: str, emails: List[str], phones: List[str]) -
|
||||
out = []
|
||||
for c in contacts:
|
||||
if c.get("uid") == uid:
|
||||
out.append(_normalize_contact({"uid": uid, "name": name, "emails": emails, "phones": phones}))
|
||||
# Preserve existing address when caller passes "" (only
|
||||
# updating name/emails/phones, not touching address).
|
||||
addr = address if address else c.get("address", "")
|
||||
out.append(_normalize_contact({"uid": uid, "name": name, "emails": emails, "phones": phones, "address": addr}))
|
||||
found = True
|
||||
else:
|
||||
out.append(c)
|
||||
if not found:
|
||||
out.append(_normalize_contact({"uid": uid, "name": name, "emails": emails, "phones": phones}))
|
||||
out.append(_normalize_contact({"uid": uid, "name": name, "emails": emails, "phones": phones, "address": address}))
|
||||
_save_local_contacts(out)
|
||||
return True
|
||||
|
||||
vcard = _build_vcard(name, "", uid=uid, emails=emails, phones=phones)
|
||||
vcard = _build_vcard(name, "", uid=uid, emails=emails, phones=phones, address=address)
|
||||
# Use the real resource href (handles externally-created contacts whose
|
||||
# filename != UID); falls back to the <uid>.vcf guess.
|
||||
try:
|
||||
@@ -714,16 +740,39 @@ def setup_contacts_routes():
|
||||
"""Add a new contact."""
|
||||
name = (data.get("name") or "").strip()
|
||||
email = (data.get("email") or "").strip()
|
||||
phone = (data.get("phone") or "").strip()
|
||||
address = (data.get("address") or "").strip()
|
||||
if not email:
|
||||
return {"success": False, "error": "Email required"}
|
||||
# Check if already exists
|
||||
contacts = _fetch_contacts()
|
||||
for c in contacts:
|
||||
if email.lower() in [e.lower() for e in c["emails"]]:
|
||||
return {"success": True, "message": "Already exists", "contact": c}
|
||||
# Check if already exists by email
|
||||
if email:
|
||||
contacts = _fetch_contacts()
|
||||
for c in contacts:
|
||||
if email.lower() in [e.lower() for e in c["emails"]]:
|
||||
return {"success": True, "message": "Already exists", "contact": c}
|
||||
if not name:
|
||||
name = email.split("@")[0]
|
||||
ok = _create_contact(name, email)
|
||||
create_params = inspect.signature(_create_contact).parameters
|
||||
if len(create_params) >= 3:
|
||||
ok = _create_contact(name, email, address)
|
||||
else:
|
||||
ok = _create_contact(name, email)
|
||||
# If a phone was provided, do an immediate update to thread it
|
||||
# through (the simple _create_contact signature only takes name +
|
||||
# email + address; phones happen via update).
|
||||
if ok and phone:
|
||||
try:
|
||||
fresh = _fetch_contacts(force=True)
|
||||
created = next((c for c in fresh if name == c.get("name") and (not email or email in c.get("emails", []))), None)
|
||||
if created:
|
||||
_update_contact(
|
||||
created["uid"], name,
|
||||
created.get("emails", []),
|
||||
[phone],
|
||||
address,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
return {"success": ok}
|
||||
|
||||
@router.post("/import")
|
||||
@@ -785,7 +834,11 @@ def setup_contacts_routes():
|
||||
except ValueError as e:
|
||||
raise HTTPException(400, str(e))
|
||||
else:
|
||||
settings[key] = data[key]
|
||||
value = data[key]
|
||||
if key == "carddav_password" and value:
|
||||
from src.secret_storage import encrypt
|
||||
value = encrypt(value)
|
||||
settings[key] = value
|
||||
_save_settings(settings)
|
||||
# Force re-fetch
|
||||
_contact_cache["fetched_at"] = None
|
||||
@@ -802,7 +855,7 @@ def setup_contacts_routes():
|
||||
# match PUT /{uid} with uid="config".
|
||||
@router.put("/{uid}")
|
||||
async def edit_contact(uid: str, data: dict, _admin: str = Depends(require_admin)):
|
||||
"""Edit an existing contact — name / emails / phones."""
|
||||
"""Edit an existing contact — name / emails / phones / address."""
|
||||
name = (data.get("name") or "").strip()
|
||||
emails = data.get("emails")
|
||||
phones = data.get("phones")
|
||||
@@ -810,11 +863,12 @@ def setup_contacts_routes():
|
||||
emails = [data["email"]]
|
||||
emails = [e.strip() for e in (emails or []) if e and e.strip()]
|
||||
phones = [p.strip() for p in (phones or []) if p and p.strip()]
|
||||
if not name and not emails:
|
||||
return {"success": False, "error": "Name or email required"}
|
||||
address = (data.get("address") or "").strip()
|
||||
if not name and not emails and not address:
|
||||
return {"success": False, "error": "Name, email, or address required"}
|
||||
if not name and emails:
|
||||
name = emails[0].split("@")[0]
|
||||
ok = _update_contact(uid, name, emails, phones)
|
||||
ok = _update_contact(uid, name, emails, phones, address)
|
||||
return {"success": ok}
|
||||
|
||||
@router.delete("/{uid}")
|
||||
|
||||
@@ -362,7 +362,12 @@ def _user_shell_path_bootstrap() -> list[str]:
|
||||
' ODYSSEUS_USER_PATH="$("$ODYSSEUS_USER_SHELL" -ic \'printf "__ODYSSEUS_PATH__%s\\n" "$PATH"\' 2>/dev/null | sed -n \'s/^__ODYSSEUS_PATH__//p\' | tail -n 1 || true)"',
|
||||
' if [ -n "$ODYSSEUS_USER_PATH" ]; then export PATH="$ODYSSEUS_USER_PATH:$PATH"; fi',
|
||||
'fi',
|
||||
'command -v python3 >/dev/null 2>&1 || python3() { python "$@"; }',
|
||||
# Windows can expose python3 as a Microsoft Store App Execution Alias
|
||||
# under WindowsApps. Git Bash sees that stub as present, but it exits
|
||||
# before running Python. A Windows venv usually has python.exe, not
|
||||
# python3.exe, so treat a missing or WindowsApps python3 as absent.
|
||||
'_odys_py3="$(command -v python3 2>/dev/null || true)"',
|
||||
'case "$_odys_py3" in ""|*[Ww]indows[Aa]pps*) python3() { python "$@"; } ;; esac',
|
||||
'command -v python >/dev/null 2>&1 || python() { python3 "$@"; }',
|
||||
]
|
||||
|
||||
@@ -573,6 +578,36 @@ _GGUF_PRELUDE_RE = re.compile(
|
||||
_OLLAMA_HOST_ASSIGNMENT_RE = re.compile(r"(?:^|\s)OLLAMA_HOST=([^\s]+)")
|
||||
_OLLAMA_BIND_RE = re.compile(r"^\[([^\]]+)\]:(\d+)$|^([^:]+):(\d+)$")
|
||||
_OLLAMA_BIND_HOST_RE = re.compile(r"^[A-Za-z0-9._:-]+$")
|
||||
_LLAMA_CPP_PYTHON_GGML_TYPES = {
|
||||
"f32": "0",
|
||||
"f16": "1",
|
||||
"q4_0": "2",
|
||||
"q4_1": "3",
|
||||
"q5_0": "6",
|
||||
"q5_1": "7",
|
||||
"q8_0": "8",
|
||||
"q8_1": "9",
|
||||
"q2_k": "10",
|
||||
"q3_k": "11",
|
||||
"q4_k": "12",
|
||||
"q5_k": "13",
|
||||
"q6_k": "14",
|
||||
"q8_k": "15",
|
||||
"iq2_xxs": "16",
|
||||
"iq2_xs": "17",
|
||||
"iq3_xxs": "18",
|
||||
"iq1_s": "19",
|
||||
"iq4_nl": "20",
|
||||
"iq3_s": "21",
|
||||
"iq2_s": "22",
|
||||
"iq4_xs": "23",
|
||||
"mxfp4": "39",
|
||||
"nvfp4": "40",
|
||||
"q1_0": "41",
|
||||
}
|
||||
_LLAMA_CPP_PYTHON_TYPE_FLAG_RE = re.compile(
|
||||
r"(?P<flag>--type_[kv])(?P<sep>\s+|=)(?P<quote>['\"]?)(?P<value>[A-Za-z0-9_]+)(?P=quote)"
|
||||
)
|
||||
|
||||
|
||||
def _ollama_bind_from_cmd(cmd: str | None, *, default_host: str = "127.0.0.1") -> tuple[str, str]:
|
||||
@@ -604,6 +639,22 @@ def _ollama_bind_from_cmd(cmd: str | None, *, default_host: str = "127.0.0.1") -
|
||||
return f"[{host}]" if bracketed_host else host, port
|
||||
|
||||
|
||||
def _normalize_llama_cpp_python_cache_types(cmd: str | None) -> str | None:
|
||||
"""Map llama.cpp KV cache type names to llama-cpp-python's integer enum."""
|
||||
if not cmd or "llama_cpp.server" not in cmd:
|
||||
return cmd
|
||||
|
||||
def repl(match: re.Match[str]) -> str:
|
||||
value = match.group("value")
|
||||
mapped = _LLAMA_CPP_PYTHON_GGML_TYPES.get(value.lower())
|
||||
if not mapped:
|
||||
return match.group(0)
|
||||
quote = match.group("quote")
|
||||
return f"{match.group('flag')}{match.group('sep')}{quote}{mapped}{quote}"
|
||||
|
||||
return _LLAMA_CPP_PYTHON_TYPE_FLAG_RE.sub(repl, cmd)
|
||||
|
||||
|
||||
def _check_serve_binary(seg: str) -> None:
|
||||
"""Validate that a single command segment starts with an allowlisted binary
|
||||
(after skipping leading env-var assignments like `CUDA_VISIBLE_DEVICES=0`)."""
|
||||
@@ -742,6 +793,7 @@ def _append_llama_cpp_linux_accel_build_lines(runner_lines: list[str]) -> None:
|
||||
runner_lines.append(' done')
|
||||
# rm -rf build so a prior poisoned CMakeCache.txt (e.g. from a failed CUDA
|
||||
# or HIP attempt) doesn't cause the next configure to reuse stale settings.
|
||||
runner_lines.append(' mkdir -p ~/bin')
|
||||
runner_lines.append(' cd ~/llama.cpp && rm -rf build')
|
||||
runner_lines.append(' if command -v hipconfig &>/dev/null || [ -d /opt/rocm ] || [ -n "$ROCM_PATH" ] || [ -n "$HIP_PATH" ]; then')
|
||||
runner_lines.append(' if command -v hipconfig &>/dev/null; then')
|
||||
@@ -1046,6 +1098,16 @@ def _diagnose_serve_output(text: str) -> dict | None:
|
||||
"vLLM is not installed or not in PATH on this server.",
|
||||
[{"label": "install vLLM in Cookbook Dependencies", "op": "dependency", "package": "vllm"}],
|
||||
),
|
||||
(
|
||||
r"sgl_kernel[\s\S]*(Python\.h|libnuma\.so\.1|common_ops)|"
|
||||
r"(Python\.h|libnuma\.so\.1|common_ops)[\s\S]*sgl_kernel|"
|
||||
r"Please ensure sgl_kernel is properly installed",
|
||||
"SGLang native dependencies are missing on this server.",
|
||||
[
|
||||
{"label": "install OS packages: libnuma-dev python3.12-dev build-essential", "op": "manual"},
|
||||
{"label": "upgrade sglang-kernel after OS packages are installed", "op": "manual"},
|
||||
],
|
||||
),
|
||||
(
|
||||
r"sglang.*command not found|No module named sglang|SGLang is not installed",
|
||||
"SGLang is not installed or not in PATH on this server.",
|
||||
|
||||
@@ -4,6 +4,62 @@ Kept dependency-free (no FastAPI / SQLAlchemy imports) so the behavior can be
|
||||
unit-tested without standing up the whole app.
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
_FETCHING_ZERO_FILES_RE = re.compile(r"Fetching\s+0\s+files", re.IGNORECASE)
|
||||
|
||||
# Probe scripts for the dead-session download check, run as
|
||||
# `python3 -c <PROBE> <repo_id> <cache_root>` (locally or over SSH).
|
||||
# cache_root is the task's custom download dir, '' for the default HF cache.
|
||||
# It has to be passed explicitly: the download runner exports
|
||||
# HF_HOME=<local_dir>, so that task's cache lives under <local_dir>/hub, and
|
||||
# the probe process's own environment knows nothing about it.
|
||||
HF_CACHE_COMPLETE_PROBE = (
|
||||
"import os,sys;"
|
||||
"repo=sys.argv[1];"
|
||||
"root=os.path.expanduser(sys.argv[2]) if len(sys.argv)>2 and sys.argv[2] else '';"
|
||||
"base=os.path.join(root,'hub') if root else (os.environ.get('HUGGINGFACE_HUB_CACHE') or os.path.join(os.environ.get('HF_HOME', os.path.expanduser('~/.cache/huggingface')), 'hub'));"
|
||||
"d=os.path.join(base,'models--'+repo.replace('/','--'));"
|
||||
"snap=os.path.join(d,'snapshots');"
|
||||
"ok=os.path.isdir(snap) and any(os.path.isdir(os.path.join(snap,x)) and os.listdir(os.path.join(snap,x)) for x in os.listdir(snap));"
|
||||
"inc=False;"
|
||||
"blobs=os.path.join(d,'blobs');"
|
||||
"inc=os.path.isdir(blobs) and any(x.endswith('.incomplete') for x in os.listdir(blobs));"
|
||||
"sys.exit(0 if ok and not inc else 1)"
|
||||
)
|
||||
|
||||
HF_CACHE_INCOMPLETE_PROBE = (
|
||||
"import os,sys;"
|
||||
"repo=sys.argv[1];"
|
||||
"root=os.path.expanduser(sys.argv[2]) if len(sys.argv)>2 and sys.argv[2] else '';"
|
||||
"base=os.path.join(root,'hub') if root else (os.environ.get('HUGGINGFACE_HUB_CACHE') or os.path.join(os.environ.get('HF_HOME', os.path.expanduser('~/.cache/huggingface')), 'hub'));"
|
||||
"d=os.path.join(base,'models--'+repo.replace('/','--'));"
|
||||
"blobs=os.path.join(d,'blobs');"
|
||||
"inc=os.path.isdir(blobs) and any(x.endswith('.incomplete') for x in os.listdir(blobs));"
|
||||
"sys.exit(0 if inc else 1)"
|
||||
)
|
||||
|
||||
|
||||
def classify_dead_download(full_snapshot: str):
|
||||
"""Resolve a dead download session's status from its runner markers.
|
||||
|
||||
The runner prints DOWNLOAD_OK only after exiting 0 (and DOWNLOAD_FAILED
|
||||
otherwise), so the markers stay trustworthy after the tmux pane is gone.
|
||||
Returns (status, zero_files), or None when the snapshot carries no marker
|
||||
and the caller has to fall back to the cache probe. Same precedence as
|
||||
the live-session branch: DOWNLOAD_OK wins, except a "Fetching 0 files"
|
||||
run is an error (nothing matched the include/quant pattern).
|
||||
"""
|
||||
if not full_snapshot:
|
||||
return None
|
||||
if "DOWNLOAD_OK" in full_snapshot:
|
||||
if _FETCHING_ZERO_FILES_RE.search(full_snapshot):
|
||||
return ("error", True)
|
||||
return ("completed", False)
|
||||
if "DOWNLOAD_FAILED" in full_snapshot:
|
||||
return ("error", False)
|
||||
return None
|
||||
|
||||
|
||||
def error_aware_output_tail(full_snapshot: str, status: str) -> str:
|
||||
"""Return the trailing slice of a task log for the status response.
|
||||
|
||||
@@ -30,7 +30,10 @@ from core.platform_compat import (
|
||||
which_tool,
|
||||
)
|
||||
from routes.shell_routes import TMUX_LOG_DIR
|
||||
from routes.cookbook_output import error_aware_output_tail
|
||||
from routes.cookbook_output import (
|
||||
error_aware_output_tail, classify_dead_download,
|
||||
HF_CACHE_COMPLETE_PROBE, HF_CACHE_INCOMPLETE_PROBE,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -46,6 +49,7 @@ from routes.cookbook_helpers import (
|
||||
_diagnose_serve_output, run_ssh_command_async,
|
||||
_ollama_bind_from_cmd, _pip_install_fallback_chain, _pip_install_no_cache,
|
||||
_user_shell_path_bootstrap, _venv_safe_local_pip_install_cmd,
|
||||
_normalize_llama_cpp_python_cache_types,
|
||||
ModelDownloadRequest, ServeRequest,
|
||||
)
|
||||
|
||||
@@ -54,7 +58,7 @@ _HF_TOKEN_STATUS_SNIPPET = (
|
||||
'echo "[odysseus] HF token: applied"; '
|
||||
'else '
|
||||
'echo "[odysseus] HF token: NOT SET — gated/private models will be denied. '
|
||||
'Add one in Odysseus Settings -> Cookbook -> HuggingFace Token."; '
|
||||
'Add one in Odysseus Cookbook -> Settings -> HuggingFace Token."; '
|
||||
'fi'
|
||||
)
|
||||
|
||||
@@ -170,6 +174,16 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
"vLLM is not installed or not in PATH on this server.",
|
||||
[{"label": "install vLLM in Cookbook Dependencies", "op": "dependency", "package": "vllm"}],
|
||||
),
|
||||
(
|
||||
r"sgl_kernel[\s\S]*(Python\.h|libnuma\.so\.1|common_ops)|"
|
||||
r"(Python\.h|libnuma\.so\.1|common_ops)[\s\S]*sgl_kernel|"
|
||||
r"Please ensure sgl_kernel is properly installed",
|
||||
"SGLang native dependencies are missing on this server.",
|
||||
[
|
||||
{"label": "install OS packages: libnuma-dev python3.12-dev build-essential", "op": "manual"},
|
||||
{"label": "upgrade sglang-kernel after OS packages are installed", "op": "manual"},
|
||||
],
|
||||
),
|
||||
(
|
||||
r"sglang.*command not found|No module named sglang|SGLang is not installed",
|
||||
"SGLang is not installed or not in PATH on this server.",
|
||||
@@ -353,7 +367,11 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
# all output to the log the poller reads. Paths handed to bash use
|
||||
# POSIX form + shell-quoting so drive paths / spaces survive.
|
||||
inner = TMUX_LOG_DIR / f"{session_id}_run.sh"
|
||||
inner.write_text("\n".join(bash_lines) + "\n", encoding="utf-8")
|
||||
pp = shlex.quote(pid_path.as_posix())
|
||||
inner.write_text(
|
||||
f"printf '%s\\n' \"$$\" > {pp}\n" + "\n".join(bash_lines) + "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
lp = shlex.quote(log_path.as_posix())
|
||||
ip = shlex.quote(inner.as_posix())
|
||||
script_path = TMUX_LOG_DIR / f"{session_id}.sh"
|
||||
@@ -658,7 +676,7 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
_spf = f"-p {_port} " if _port and _port != "22" else ""
|
||||
setup_cmd = (
|
||||
f"scp -O {_pf}-q '{runner_path}' {remote}:{remote_runner} && "
|
||||
f"ssh {_spf}{remote} 'chmod +x {remote_runner} && tmux new-session -d -s {session_id} \"./{remote_runner}\"'"
|
||||
f"ssh {_spf}{remote} 'chmod +x {remote_runner} && tmux set-option -g history-limit 100000 2>/dev/null; tmux new-session -d -s {session_id} \"./{remote_runner}\"'"
|
||||
)
|
||||
else:
|
||||
# Local: run hf download in the background (tmux on POSIX, a detached
|
||||
@@ -690,7 +708,7 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
lines.append('exec "${SHELL:-/bin/bash}"')
|
||||
wrapper_script.write_text("\n".join(lines) + "\n", encoding="utf-8")
|
||||
wrapper_script.chmod(0o755)
|
||||
setup_cmd = None if IS_WINDOWS else f"tmux new-session -d -s {session_id} {shlex.quote(str(wrapper_script))}"
|
||||
setup_cmd = None if IS_WINDOWS else f"tmux set-option -g history-limit 100000 2>/dev/null; tmux new-session -d -s {session_id} {shlex.quote(str(wrapper_script))}"
|
||||
|
||||
logger.info(f"Model download: {req.repo_id} (backend={'ollama' if is_ollama_download else 'hf'}, include={req.include}, session={session_id}, remote={remote})")
|
||||
logger.info(f"Download setup_cmd: {setup_cmd}")
|
||||
@@ -966,9 +984,9 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
ssh_args = ["ssh"]
|
||||
if ssh_port and ssh_port != "22":
|
||||
ssh_args.extend(["-p", str(ssh_port)])
|
||||
capture_cmd = ssh_args + [remote, "tmux", "capture-pane", "-t", session_id, "-p", "-S", "-200"]
|
||||
capture_cmd = ssh_args + [remote, "tmux", "capture-pane", "-t", session_id, "-p", "-S", "-2000"]
|
||||
else:
|
||||
capture_cmd = ["tmux", "capture-pane", "-t", session_id, "-p", "-S", "-200"]
|
||||
capture_cmd = ["tmux", "capture-pane", "-t", session_id, "-p", "-S", "-2000"]
|
||||
|
||||
_exit_re = re.compile(r"=== Process exited with code (-?\d+) ===")
|
||||
for wait_s in _waits:
|
||||
@@ -1211,6 +1229,7 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
# many downstream `"engine" in req.cmd` membership checks can't hit
|
||||
# `TypeError: argument of type 'NoneType'` (a 500 instead of a clean 400).
|
||||
req.cmd = _validate_serve_cmd(req.cmd) or ""
|
||||
req.cmd = _normalize_llama_cpp_python_cache_types(req.cmd) or ""
|
||||
req.cmd = _venv_safe_local_pip_install_cmd(
|
||||
req.cmd,
|
||||
local=not bool(req.remote_host),
|
||||
@@ -1558,10 +1577,10 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
setup_cmd = (
|
||||
f"{scp_extras}"
|
||||
f"scp -O {_Pf}-q '{runner_path}' {remote}:{remote_runner} && "
|
||||
f"ssh {_pf}{remote} 'chmod +x {remote_runner} && tmux new-session -d -s {session_id} \"./{remote_runner}\"'"
|
||||
f"ssh {_pf}{remote} 'chmod +x {remote_runner} && tmux set-option -g history-limit 100000 2>/dev/null; tmux new-session -d -s {session_id} \"./{remote_runner}\"'"
|
||||
)
|
||||
else:
|
||||
setup_cmd = f"tmux new-session -d -s {session_id} {shlex.quote(str(runner_path))}"
|
||||
setup_cmd = f"tmux set-option -g history-limit 100000 2>/dev/null; tmux new-session -d -s {session_id} {shlex.quote(str(runner_path))}"
|
||||
|
||||
if setup_cmd is None:
|
||||
# LOCAL Windows: launch the bash runner detached; no tmux setup_cmd.
|
||||
@@ -2606,6 +2625,193 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
"error": _ollama_library_cache["error"],
|
||||
}
|
||||
|
||||
# ── vLLM recipe scraper ─────────────────────────────────────────────
|
||||
# Fetches the official YAML recipe for a model from vllm-project/recipes
|
||||
# and normalizes it into a small JSON the frontend can consume. Cached
|
||||
# per-repo so the GitHub raw endpoint isn't hammered.
|
||||
_vllm_recipe_cache: dict[str, tuple[float, dict | None]] = {}
|
||||
# Manifest of all <org>/<model> ids that have a recipe in the upstream
|
||||
# repo. Cheap to fetch (one Git Tree API call), so we cache the whole
|
||||
# set for ~12h. Per-row "does this model have a recipe?" lookups hit
|
||||
# this set instead of doing 912 individual recipe fetches.
|
||||
_vllm_recipe_manifest: dict = {"fetched_at": 0.0, "models": set(), "error": ""}
|
||||
|
||||
@router.get("/api/cookbook/vllm-recipe-manifest")
|
||||
async def vllm_recipe_manifest(refresh: int = 0):
|
||||
"""Return the set of <org>/<model> ids known to have a vLLM recipe.
|
||||
One GitHub Tree API call, 12h cache. The frontend uses this to badge
|
||||
rows in the model list before the user expands them."""
|
||||
import time as _time
|
||||
import httpx as _httpx
|
||||
TTL = 12 * 3600.0
|
||||
now = _time.time()
|
||||
if (
|
||||
refresh
|
||||
or (now - _vllm_recipe_manifest["fetched_at"]) > TTL
|
||||
or not _vllm_recipe_manifest["models"]
|
||||
):
|
||||
url = (
|
||||
"https://api.github.com/repos/vllm-project/recipes/"
|
||||
"git/trees/main?recursive=1"
|
||||
)
|
||||
def _fetch_sync() -> tuple[int, dict | None, str]:
|
||||
try:
|
||||
headers = {"Accept": "application/vnd.github+json"}
|
||||
with _httpx.Client(timeout=10.0, follow_redirects=True) as client:
|
||||
r = client.get(url, headers=headers)
|
||||
if r.status_code != 200:
|
||||
return r.status_code, None, r.text[:200]
|
||||
return 200, r.json(), ""
|
||||
except Exception as e:
|
||||
return 0, None, f"fetch error: {e}"
|
||||
status, data, err = await asyncio.to_thread(_fetch_sync)
|
||||
if status == 200 and isinstance(data, dict):
|
||||
models: set[str] = set()
|
||||
for entry in data.get("tree") or []:
|
||||
path = (entry or {}).get("path") or ""
|
||||
if not path.startswith("models/") or not path.endswith(".yaml"):
|
||||
continue
|
||||
# path = "models/<org>/<model>.yaml" → "<org>/<model>"
|
||||
body = path[len("models/"):-len(".yaml")]
|
||||
if "/" in body:
|
||||
models.add(body)
|
||||
_vllm_recipe_manifest["models"] = models
|
||||
_vllm_recipe_manifest["fetched_at"] = now
|
||||
_vllm_recipe_manifest["error"] = ""
|
||||
else:
|
||||
_vllm_recipe_manifest["error"] = (
|
||||
f"HTTP {status}: {err}" if status else err
|
||||
)
|
||||
# Don't clobber a stale-but-usable list on transient failures.
|
||||
if not _vllm_recipe_manifest["models"]:
|
||||
return {
|
||||
"models": [],
|
||||
"count": 0,
|
||||
"error": _vllm_recipe_manifest["error"],
|
||||
}
|
||||
return {
|
||||
"models": sorted(_vllm_recipe_manifest["models"]),
|
||||
"count": len(_vllm_recipe_manifest["models"]),
|
||||
"fetched_at": _vllm_recipe_manifest["fetched_at"],
|
||||
"error": _vllm_recipe_manifest["error"],
|
||||
}
|
||||
|
||||
@router.get("/api/cookbook/vllm-recipe")
|
||||
async def vllm_recipe(repo: str, refresh: int = 0):
|
||||
"""Return the vLLM official recipe for a HuggingFace repo, if one
|
||||
exists at vllm-project/recipes. `repo` is the full HF id like
|
||||
'MiniMaxAI/MiniMax-M2'. Cached 6h."""
|
||||
import time as _time
|
||||
import httpx as _httpx
|
||||
import yaml as _yaml
|
||||
|
||||
TTL = 6 * 3600.0
|
||||
now = _time.time()
|
||||
repo = (repo or "").strip().strip("/")
|
||||
if "/" not in repo:
|
||||
return {"exists": False, "error": "repo must be <org>/<model>"}
|
||||
|
||||
cached = _vllm_recipe_cache.get(repo)
|
||||
if cached and not refresh and (now - cached[0]) < TTL:
|
||||
return cached[1] or {"exists": False, "cached": True}
|
||||
|
||||
url = (
|
||||
f"https://raw.githubusercontent.com/vllm-project/recipes/"
|
||||
f"main/models/{repo}.yaml"
|
||||
)
|
||||
|
||||
def _fetch_sync() -> tuple[int, str]:
|
||||
try:
|
||||
with _httpx.Client(timeout=8.0, follow_redirects=True) as client:
|
||||
r = client.get(url)
|
||||
return r.status_code, r.text
|
||||
except Exception as e:
|
||||
return 0, f"fetch error: {e}"
|
||||
|
||||
status, text = await asyncio.to_thread(_fetch_sync)
|
||||
if status == 404:
|
||||
_vllm_recipe_cache[repo] = (now, {"exists": False})
|
||||
return {"exists": False}
|
||||
if status != 200:
|
||||
return {"exists": False, "error": f"HTTP {status}", "transient": True}
|
||||
|
||||
try:
|
||||
doc = _yaml.safe_load(text) or {}
|
||||
except Exception as e:
|
||||
return {"exists": False, "error": f"yaml parse: {e}"}
|
||||
|
||||
meta = doc.get("meta") or {}
|
||||
model = doc.get("model") or {}
|
||||
features = doc.get("features") or {}
|
||||
deps = doc.get("dependencies") or []
|
||||
variants = doc.get("variants") or {}
|
||||
hw_overrides = doc.get("hardware_overrides") or {}
|
||||
strat_overrides = doc.get("strategy_overrides") or {}
|
||||
|
||||
# Tool-call + reasoning parsers, as flat arg arrays, so the frontend
|
||||
# can drop them straight into the launch command.
|
||||
tool_calling = features.get("tool_calling") or {}
|
||||
reasoning = features.get("reasoning") or {}
|
||||
|
||||
normalized = {
|
||||
"exists": True,
|
||||
"source_url": url,
|
||||
"title": meta.get("title") or "",
|
||||
"provider": meta.get("provider") or "",
|
||||
"description": meta.get("description") or "",
|
||||
"date_updated": str(meta.get("date_updated") or ""),
|
||||
"hardware_support": meta.get("hardware") or {},
|
||||
"model_id": model.get("model_id") or repo,
|
||||
"min_vllm_version": model.get("min_vllm_version") or "",
|
||||
"architecture": model.get("architecture") or "",
|
||||
"parameter_count": model.get("parameter_count") or "",
|
||||
"active_parameters": model.get("active_parameters") or "",
|
||||
"context_length": model.get("context_length") or 0,
|
||||
"base_args": list(model.get("base_args") or []),
|
||||
"base_env": dict(model.get("base_env") or {}),
|
||||
"tool_calling": {
|
||||
"description": tool_calling.get("description") or "",
|
||||
"args": list(tool_calling.get("args") or []),
|
||||
} if tool_calling else None,
|
||||
"reasoning": {
|
||||
"description": reasoning.get("description") or "",
|
||||
"args": list(reasoning.get("args") or []),
|
||||
} if reasoning else None,
|
||||
"dependencies": [
|
||||
{
|
||||
"note": (d.get("note") or "").strip(),
|
||||
"command": (d.get("command") or "").strip(),
|
||||
"optional": bool(d.get("optional", False)),
|
||||
}
|
||||
for d in deps if isinstance(d, dict)
|
||||
],
|
||||
"variants": {
|
||||
k: {
|
||||
"model_id": v.get("model_id") or model.get("model_id") or repo,
|
||||
"precision": v.get("precision") or "",
|
||||
"vram_minimum_gb": v.get("vram_minimum_gb") or 0,
|
||||
"description": v.get("description") or "",
|
||||
"extra_args": list(v.get("extra_args") or []),
|
||||
"extra_env": dict(v.get("extra_env") or {}),
|
||||
}
|
||||
for k, v in variants.items() if isinstance(v, dict)
|
||||
},
|
||||
"hardware_overrides": {
|
||||
hw: {
|
||||
"extra_args": list((ov or {}).get("extra_args") or []),
|
||||
"extra_env": dict((ov or {}).get("extra_env") or {}),
|
||||
}
|
||||
for hw, ov in hw_overrides.items() if isinstance(ov, dict)
|
||||
},
|
||||
"strategy_overrides": {
|
||||
strat: dict(ov or {})
|
||||
for strat, ov in strat_overrides.items() if isinstance(ov, dict)
|
||||
},
|
||||
"compatible_strategies": list(doc.get("compatible_strategies") or []),
|
||||
}
|
||||
_vllm_recipe_cache[repo] = (now, normalized)
|
||||
return normalized
|
||||
|
||||
@router.get("/api/cookbook/tasks/status")
|
||||
async def cookbook_tasks_status(request: Request):
|
||||
"""Check status of all active cookbook tmux sessions.
|
||||
@@ -2620,30 +2826,20 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
def _cookbook_tasks_status_sync():
|
||||
import subprocess
|
||||
|
||||
def _download_cache_complete(repo_id: str, remote_host: str = "", ssh_port: str = "") -> bool:
|
||||
def _download_cache_complete(repo_id: str, remote_host: str = "", ssh_port: str = "", cache_root: str = "") -> bool:
|
||||
"""Best-effort check for a completed HF cache entry.
|
||||
|
||||
tmux output can stop at a stale progress line if the pane/session
|
||||
disappears before Cookbook captures the final DOWNLOAD_OK marker.
|
||||
In that case, trust the cache shape: a snapshot directory with files
|
||||
and no *.incomplete blobs means HuggingFace finished materializing the
|
||||
model.
|
||||
model. cache_root is the task's custom download dir — the runner
|
||||
pointed HF_HOME there, so the cache lives under <cache_root>/hub,
|
||||
not wherever this probe's environment says.
|
||||
"""
|
||||
if not repo_id or "/" not in repo_id:
|
||||
return False
|
||||
py = (
|
||||
"import os,sys;"
|
||||
"repo=sys.argv[1];"
|
||||
"base=os.environ.get('HUGGINGFACE_HUB_CACHE') or os.path.join(os.environ.get('HF_HOME', os.path.expanduser('~/.cache/huggingface')), 'hub');"
|
||||
"d=os.path.join(base,'models--'+repo.replace('/','--'));"
|
||||
"snap=os.path.join(d,'snapshots');"
|
||||
"ok=os.path.isdir(snap) and any(os.path.isdir(os.path.join(snap,x)) and os.listdir(os.path.join(snap,x)) for x in os.listdir(snap));"
|
||||
"inc=False;"
|
||||
"blobs=os.path.join(d,'blobs');"
|
||||
"inc=os.path.isdir(blobs) and any(x.endswith('.incomplete') for x in os.listdir(blobs));"
|
||||
"sys.exit(0 if ok and not inc else 1)"
|
||||
)
|
||||
cmd = ["python3", "-c", py, repo_id]
|
||||
cmd = ["python3", "-c", HF_CACHE_COMPLETE_PROBE, repo_id, cache_root or ""]
|
||||
try:
|
||||
if remote_host:
|
||||
ssh_base = ["ssh"]
|
||||
@@ -2657,7 +2853,7 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def _download_cache_incomplete(repo_id: str, remote_host: str = "", ssh_port: str = "") -> bool:
|
||||
def _download_cache_incomplete(repo_id: str, remote_host: str = "", ssh_port: str = "", cache_root: str = "") -> bool:
|
||||
"""Best-effort check for resumable HF partial blobs.
|
||||
|
||||
A lost SSH/tmux session can leave a real download still incomplete.
|
||||
@@ -2666,16 +2862,7 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
"""
|
||||
if not repo_id or "/" not in repo_id:
|
||||
return False
|
||||
py = (
|
||||
"import os,sys;"
|
||||
"repo=sys.argv[1];"
|
||||
"base=os.environ.get('HUGGINGFACE_HUB_CACHE') or os.path.join(os.environ.get('HF_HOME', os.path.expanduser('~/.cache/huggingface')), 'hub');"
|
||||
"d=os.path.join(base,'models--'+repo.replace('/','--'));"
|
||||
"blobs=os.path.join(d,'blobs');"
|
||||
"inc=os.path.isdir(blobs) and any(x.endswith('.incomplete') for x in os.listdir(blobs));"
|
||||
"sys.exit(0 if inc else 1)"
|
||||
)
|
||||
cmd = ["python3", "-c", py, repo_id]
|
||||
cmd = ["python3", "-c", HF_CACHE_INCOMPLETE_PROBE, repo_id, cache_root or ""]
|
||||
try:
|
||||
if remote_host:
|
||||
ssh_base = ["ssh"]
|
||||
@@ -2880,7 +3067,7 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
and (
|
||||
".incomplete" in full_snapshot
|
||||
or bool(re.search(r'model-\d+-of-\d+\.[A-Za-z0-9_.-]+:\s+(?:[0-9]|[1-8][0-9])%', full_snapshot))
|
||||
or _download_cache_incomplete(_payload.get("repo_id") or model, remote, str(_tport or ""))
|
||||
or _download_cache_incomplete(_payload.get("repo_id") or model, remote, str(_tport or ""), _payload.get("local_dir") or "")
|
||||
)
|
||||
)
|
||||
if is_alive or (local_win_task and full_snapshot):
|
||||
@@ -2921,11 +3108,19 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
else:
|
||||
status = "running"
|
||||
else:
|
||||
# Session is dead — check if it completed or crashed
|
||||
if (
|
||||
# Session is dead — check if it completed or crashed. The
|
||||
# runner markers in the retained output are conclusive
|
||||
# (DOWNLOAD_OK only prints after exit 0), so check them before
|
||||
# the cache probe, which can't see ollama pulls at all.
|
||||
marker = classify_dead_download(full_snapshot) if task_type == "download" else None
|
||||
if marker is not None:
|
||||
status, download_zero_files = marker
|
||||
if status == "completed" and not progress_text:
|
||||
progress_text = "Download complete"
|
||||
elif (
|
||||
task_type == "download"
|
||||
and not download_has_incomplete_evidence
|
||||
and _download_cache_complete(_payload.get("repo_id") or model, remote, str(_tport or ""))
|
||||
and _download_cache_complete(_payload.get("repo_id") or model, remote, str(_tport or ""), _payload.get("local_dir") or "")
|
||||
):
|
||||
status = "completed"
|
||||
if not progress_text:
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
"""Diagnostics routes — /api/db/stats, /api/rag/stats, /api/test/youtube, /api/test-research."""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from typing import Dict, Any
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Form, Request
|
||||
|
||||
from services.youtube.youtube_handler import extract_youtube_id, extract_transcript_async
|
||||
from core.constants import DEFAULT_HOST
|
||||
from core.constants import DEFAULT_HOST, DATA_DIR
|
||||
from core.middleware import require_admin
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -28,6 +29,30 @@ def setup_diagnostics_routes(
|
||||
from src.service_health import collect_service_health
|
||||
return await collect_service_health(rag_manager, memory_vector)
|
||||
|
||||
@router.get("/api/diagnostics/logs")
|
||||
async def get_diagnostics_logs(request: Request, limit: int = 200) -> Dict[str, Any]:
|
||||
require_admin(request)
|
||||
limit = max(1, min(limit, 1000))
|
||||
try:
|
||||
log_file = os.path.join(DATA_DIR, "logs", "app.log")
|
||||
if not os.path.exists(log_file):
|
||||
return {"status": "success", "logs": []}
|
||||
|
||||
# Safe tail read of the log file (max 5MB via rotation)
|
||||
with open(log_file, "r", encoding="utf-8", errors="ignore") as f:
|
||||
lines = f.readlines()
|
||||
|
||||
tail_lines = lines[-limit:] if len(lines) > limit else lines
|
||||
tail_lines = [line.rstrip('\r\n') for line in tail_lines]
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"logs": tail_lines
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Diagnostics logs retrieval error: {e}")
|
||||
raise HTTPException(500, f"Failed to retrieve logs: {str(e)}")
|
||||
|
||||
@router.get("/api/db/stats")
|
||||
async def get_database_stats(request: Request) -> Dict[str, Any]:
|
||||
require_admin(request)
|
||||
|
||||
@@ -13,6 +13,8 @@ and `email_pollers.py` (the background loops):
|
||||
"""
|
||||
|
||||
import os
|
||||
import base64
|
||||
import time
|
||||
import imaplib
|
||||
import smtplib
|
||||
import email as email_mod
|
||||
@@ -38,6 +40,106 @@ from src.secret_storage import decrypt as _decrypt
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _xoauth2_raw(user: str, access_token: str) -> str:
|
||||
"""The SASL XOAUTH2 initial-response string (unencoded).
|
||||
|
||||
Both smtplib.SMTP.auth() and imaplib.IMAP4.authenticate() base64-encode
|
||||
the value their callback returns, so callers pass this raw form — never
|
||||
pre-encoded — to avoid double base64.
|
||||
"""
|
||||
return f"user={user}\x01auth=Bearer {access_token}\x01\x01"
|
||||
|
||||
|
||||
def _xoauth2_bytes(user: str, access_token: str) -> bytes:
|
||||
"""Raw XOAUTH2 bytes for imaplib's authenticate() callback."""
|
||||
return _xoauth2_raw(user, access_token).encode()
|
||||
|
||||
|
||||
def make_oauth_state(account_id: str, owner: str) -> str:
|
||||
"""Return an HMAC-signed, base64-encoded OAuth state token.
|
||||
|
||||
Encodes account_id + owner + a random nonce, signed with the app secret
|
||||
so the callback can validate that the flow was initiated by an
|
||||
authenticated, owning user (CSRF / state-forgery protection).
|
||||
"""
|
||||
import hmac as _hmac, hashlib as _hl, secrets as _sec
|
||||
from src.secret_storage import _load_or_create_key
|
||||
nonce = _sec.token_hex(16)
|
||||
payload = json.dumps({"a": account_id, "o": owner, "n": nonce}, separators=(",", ":"))
|
||||
sig = _hmac.new(_load_or_create_key(), payload.encode(), _hl.sha256).hexdigest()
|
||||
return base64.urlsafe_b64encode(f"{payload}|{sig}".encode()).decode()
|
||||
|
||||
|
||||
def verify_oauth_state(state: str) -> dict | None:
|
||||
"""Verify an OAuth state token's HMAC signature.
|
||||
|
||||
Returns the decoded payload dict ({"a", "o", "n"}) on success, or None if
|
||||
the token is malformed, tampered, or signed with a different key.
|
||||
"""
|
||||
import hmac as _hmac, hashlib as _hl
|
||||
from src.secret_storage import _load_or_create_key
|
||||
try:
|
||||
decoded = base64.urlsafe_b64decode(state.encode()).decode()
|
||||
payload, sig = decoded.rsplit("|", 1)
|
||||
expected = _hmac.new(_load_or_create_key(), payload.encode(), _hl.sha256).hexdigest()
|
||||
if not _hmac.compare_digest(sig, expected):
|
||||
return None
|
||||
return json.loads(payload)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _refresh_google_token(account_id: str) -> str | None:
|
||||
"""Exchange the stored refresh token for a new access token and persist it."""
|
||||
import httpx
|
||||
from core.database import SessionLocal as _SL, EmailAccount as _EA
|
||||
from src.secret_storage import encrypt as _enc, decrypt as _dec
|
||||
client_id = os.environ.get("GOOGLE_OAUTH_CLIENT_ID", "")
|
||||
client_secret = os.environ.get("GOOGLE_OAUTH_CLIENT_SECRET", "")
|
||||
if not client_id or not client_secret:
|
||||
return None
|
||||
db = _SL()
|
||||
try:
|
||||
row = db.get(_EA, account_id)
|
||||
if not row or not row.oauth_refresh_token:
|
||||
return None
|
||||
refresh_token = _dec(row.oauth_refresh_token or "")
|
||||
if not refresh_token:
|
||||
return None
|
||||
resp = httpx.post("https://oauth2.googleapis.com/token", data={
|
||||
"client_id": client_id,
|
||||
"client_secret": client_secret,
|
||||
"refresh_token": refresh_token,
|
||||
"grant_type": "refresh_token",
|
||||
}, timeout=10)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
access_token = data["access_token"]
|
||||
row.oauth_access_token = _enc(access_token)
|
||||
row.oauth_token_expiry = str(int(time.time()) + data.get("expires_in", 3600))
|
||||
db.commit()
|
||||
return access_token
|
||||
except Exception:
|
||||
logger.warning(f"Google token refresh failed for account {account_id}")
|
||||
return None
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
def _get_valid_google_token(account_id: str, cfg: dict) -> str | None:
|
||||
"""Return a valid Google access token, refreshing if expired or missing."""
|
||||
from src.secret_storage import decrypt as _dec
|
||||
access_token = _dec(cfg.get("oauth_access_token") or "")
|
||||
expiry_str = cfg.get("oauth_token_expiry") or ""
|
||||
if access_token and expiry_str:
|
||||
try:
|
||||
if int(expiry_str) - 60 > time.time():
|
||||
return access_token
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
return _refresh_google_token(account_id)
|
||||
|
||||
|
||||
def _smtp_security_mode(cfg: dict) -> str:
|
||||
raw = str(cfg.get("smtp_security") or "").strip().lower()
|
||||
if raw in {"ssl", "starttls", "none"}:
|
||||
@@ -54,20 +156,29 @@ def _send_smtp_message(cfg: dict, from_addr: str, recipients: list[str], message
|
||||
port = int(cfg.get("smtp_port") or 465)
|
||||
user = cfg.get("smtp_user") or ""
|
||||
password = cfg.get("smtp_password") or ""
|
||||
|
||||
def _auth_smtp(smtp):
|
||||
if cfg.get("oauth_provider") == "google":
|
||||
token = _get_valid_google_token(cfg.get("account_id"), cfg)
|
||||
if not token:
|
||||
raise RuntimeError("Google OAuth token unavailable — reconnect the account")
|
||||
smtp.ehlo()
|
||||
smtp.auth("XOAUTH2", lambda challenge=None: _xoauth2_raw(user, token), initial_response_ok=True)
|
||||
elif user and password:
|
||||
smtp.login(user, password)
|
||||
|
||||
security = _smtp_security_mode(cfg)
|
||||
|
||||
if security == "ssl":
|
||||
with smtplib.SMTP_SSL(host, port, timeout=timeout) as smtp:
|
||||
if user and password:
|
||||
smtp.login(user, password)
|
||||
_auth_smtp(smtp)
|
||||
smtp.sendmail(from_addr, recipients, message)
|
||||
return
|
||||
|
||||
with smtplib.SMTP(host, port, timeout=timeout) as smtp:
|
||||
if security == "starttls":
|
||||
smtp.starttls()
|
||||
if user and password:
|
||||
smtp.login(user, password)
|
||||
_auth_smtp(smtp)
|
||||
smtp.sendmail(from_addr, recipients, message)
|
||||
|
||||
|
||||
@@ -701,10 +812,16 @@ def _get_email_config(account_id: str | None = None, owner: str = "") -> dict:
|
||||
"imap_password": _decrypt(row.imap_password or ""),
|
||||
"imap_starttls": bool(row.imap_starttls),
|
||||
"from_address": row.from_address or row.imap_user or "",
|
||||
"oauth_provider": row.oauth_provider or "",
|
||||
"oauth_access_token": row.oauth_access_token or "",
|
||||
"oauth_refresh_token": row.oauth_refresh_token or "",
|
||||
"oauth_token_expiry": row.oauth_token_expiry or "",
|
||||
"display_name": row.display_name or "",
|
||||
}
|
||||
if not (cfg["smtp_host"] and cfg["smtp_user"] and cfg["smtp_password"]):
|
||||
is_oauth = bool(cfg.get("oauth_provider"))
|
||||
if not is_oauth and not (cfg["smtp_host"] and cfg["smtp_user"] and cfg["smtp_password"]):
|
||||
logger.warning(f"SMTP not configured for account {row.name!r}")
|
||||
if not (cfg["imap_host"] and cfg["imap_user"] and cfg["imap_password"]):
|
||||
if not is_oauth and not (cfg["imap_host"] and cfg["imap_user"] and cfg["imap_password"]):
|
||||
logger.warning(f"IMAP not configured for account {row.name!r}")
|
||||
return cfg
|
||||
finally:
|
||||
@@ -825,12 +942,19 @@ def _imap_connect(account_id: str | None = None, owner: str = "",
|
||||
timeout=timeout,
|
||||
)
|
||||
try:
|
||||
conn.login(cfg["imap_user"], cfg["imap_password"])
|
||||
if cfg.get("oauth_provider") == "google":
|
||||
token = _get_valid_google_token(cfg.get("account_id"), cfg)
|
||||
if not token:
|
||||
raise RuntimeError("Google OAuth token unavailable — reconnect the account in Settings → Integrations")
|
||||
conn.authenticate("XOAUTH2", lambda x: _xoauth2_bytes(cfg["imap_user"], token))
|
||||
else:
|
||||
conn.login(cfg["imap_user"], cfg["imap_password"])
|
||||
except Exception:
|
||||
# A failed AUTHENTICATE (e.g. an Office 365 app password on an
|
||||
# MFA-enabled tenant, #3174) otherwise orphans the already-connected
|
||||
# socket; close it before propagating so a misconfigured account
|
||||
# can't leak one descriptor per retry / background poller pass.
|
||||
# MFA-enabled tenant, #3174, or an expired/revoked OAuth token)
|
||||
# otherwise orphans the already-connected socket; close it before
|
||||
# propagating so a misconfigured account can't leak one descriptor
|
||||
# per retry / background poller pass.
|
||||
try:
|
||||
conn.shutdown()
|
||||
except Exception:
|
||||
|
||||
@@ -13,7 +13,9 @@ handlers need. The split is mechanical — no behavior change.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import sqlite3 as _sql3
|
||||
import time
|
||||
import email as email_mod
|
||||
import email.header
|
||||
import email.utils
|
||||
@@ -43,6 +45,7 @@ from routes.email_helpers import (
|
||||
_load_settings, _save_settings, _get_email_config,
|
||||
_send_smtp_message, _smtp_security_mode,
|
||||
_IMAP_TIMEOUT_SECONDS, _open_imap_connection,
|
||||
make_oauth_state, verify_oauth_state,
|
||||
_imap_connect, _imap, _decode_header, _detect_sent_folder, _detect_drafts_folder,
|
||||
_extract_attachment_text, _list_attachments_from_msg,
|
||||
_extract_attachment_to_disk, _extract_html, _extract_text,
|
||||
@@ -285,7 +288,9 @@ def _group_uid_fetch_records(msg_data) -> list:
|
||||
|
||||
|
||||
def _smtp_ready(cfg: dict) -> bool:
|
||||
return bool(cfg.get("smtp_host") and cfg.get("smtp_user") and cfg.get("smtp_password"))
|
||||
if not cfg.get("smtp_host") or not cfg.get("smtp_user"):
|
||||
return False
|
||||
return bool(cfg.get("smtp_password") or cfg.get("oauth_provider"))
|
||||
|
||||
|
||||
def _resolve_send_config(account_id: str | None = None, owner: str = "") -> dict:
|
||||
@@ -1087,14 +1092,22 @@ def setup_email_routes():
|
||||
return {"contacts": [], "error": "Mail operation failed"}
|
||||
|
||||
@router.get("/search")
|
||||
async def search_emails(
|
||||
# Sync def: the body is blocking IMAP I/O with no awaits. As `async def` it ran
|
||||
# directly on the event loop and stalled the whole app during a search; as a sync
|
||||
# def FastAPI runs it in a threadpool, keeping the loop responsive.
|
||||
def search_emails(
|
||||
q: str = Query(""),
|
||||
folder: str = Query("INBOX"),
|
||||
limit: int = Query(50),
|
||||
account_id: str | None = Query(None),
|
||||
owner: str = Depends(require_owner),
|
||||
):
|
||||
"""Search emails server-side via IMAP SEARCH. Matches subject, from, or body text."""
|
||||
"""Search emails server-side via IMAP SEARCH. Matches subject, from, or body text.
|
||||
|
||||
When the caller asks for INBOX and the account has an "All Mail"
|
||||
folder (Gmail does), we transparently swap to All Mail so the
|
||||
search surfaces archived / labelled emails too. Plain IMAP
|
||||
accounts fall back to whatever folder the caller specified."""
|
||||
if not q or len(q) < 2:
|
||||
return {"emails": [], "total": 0, "query": q}
|
||||
# CRLF in q would terminate the IMAP command early — reject defensively.
|
||||
@@ -1102,7 +1115,27 @@ def setup_email_routes():
|
||||
raise HTTPException(400, "Invalid query")
|
||||
try:
|
||||
with _imap(account_id, owner=owner) as conn:
|
||||
conn.select(_q(folder), readonly=True)
|
||||
# If the user asked for INBOX, try to upgrade to All Mail —
|
||||
# one folder == every email on Gmail-class servers.
|
||||
effective_folder = folder
|
||||
if (folder or "").upper() == "INBOX":
|
||||
try:
|
||||
status, folder_lines = conn.list()
|
||||
if status == "OK" and folder_lines:
|
||||
for raw in folder_lines:
|
||||
if isinstance(raw, bytes):
|
||||
raw = raw.decode("utf-8", errors="replace")
|
||||
m = re.match(r"\((?P<flags>[^)]*)\)\s+\"[^\"]*\"\s+(?P<name>.+)", raw)
|
||||
if not m:
|
||||
continue
|
||||
flags = (m.group("flags") or "").lower()
|
||||
name = m.group("name").strip().strip('"')
|
||||
if "\\all" in flags or "all mail" in name.lower():
|
||||
effective_folder = name
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
conn.select(_q(effective_folder), readonly=True)
|
||||
|
||||
# Escape backslash and quote for the IMAP-SEARCH quoted-string.
|
||||
q_escaped = q.replace('\\', '\\\\').replace('"', '\\"')
|
||||
@@ -1110,7 +1143,7 @@ def setup_email_routes():
|
||||
|
||||
status, data = _imap_uid_search(conn, search_cmd)
|
||||
if status != "OK" or not data[0]:
|
||||
return {"emails": [], "total": 0, "query": q}
|
||||
return {"emails": [], "total": 0, "query": q, "folder": effective_folder}
|
||||
|
||||
uid_list = data[0].split()
|
||||
total = len(uid_list)
|
||||
@@ -1175,6 +1208,13 @@ def setup_email_routes():
|
||||
"is_flagged": "\\Flagged" in flags,
|
||||
"flags": flags,
|
||||
"has_attachments": has_attachments,
|
||||
# Stamp the folder so the frontend opens each
|
||||
# email from the folder it actually lives in
|
||||
# (the search may have run against All Mail
|
||||
# even though the caller asked for INBOX),
|
||||
# otherwise clicks open whatever happens to
|
||||
# have the same UID in INBOX → wrong email.
|
||||
"folder": effective_folder,
|
||||
})
|
||||
except Exception as e:
|
||||
logger.warning(f"Error parsing search result {uid}: {e}")
|
||||
@@ -1721,6 +1761,22 @@ def setup_email_routes():
|
||||
logger.error(f"Failed to mark unread {uid}: {e}")
|
||||
return {"success": False, "error": "Mail operation failed"}
|
||||
|
||||
@router.post("/flag/{uid}")
|
||||
async def flag_email(uid: str, folder: str = Query("INBOX"), account_id: str | None = Query(None),
|
||||
on: bool = Query(True), owner: str = Depends(require_owner)):
|
||||
"""Toggle the \\Flagged flag (a.k.a. favorite / star) on an email.
|
||||
Pass `on=true` to favorite, `on=false` to unfavorite."""
|
||||
try:
|
||||
with _imap(account_id, owner=owner) as conn:
|
||||
conn.select(_q(folder))
|
||||
if not _store_email_flag(conn, uid, "\\Flagged", add=bool(on)):
|
||||
return {"success": False, "error": "Email not found"}
|
||||
_invalidate_list_cache(account_id, folder)
|
||||
return {"success": True, "flagged": bool(on)}
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to flag {uid}: {e}")
|
||||
return {"success": False, "error": "Mail operation failed"}
|
||||
|
||||
@router.post("/mark-read/{uid}")
|
||||
async def mark_read(uid: str, folder: str = Query("INBOX"), account_id: str | None = Query(None), owner: str = Depends(require_owner)):
|
||||
"""Mark an email as read (set \\Seen flag)."""
|
||||
@@ -1736,7 +1792,9 @@ def setup_email_routes():
|
||||
return {"success": False, "error": "Mail operation failed"}
|
||||
|
||||
@router.post("/archive/{uid}")
|
||||
async def archive_email(uid: str, folder: str = Query("INBOX"), account_id: str | None = Query(None), owner: str = Depends(require_owner)):
|
||||
# Sync def: blocking IMAP I/O with no awaits — see search_emails above. Runs in a
|
||||
# threadpool instead of blocking the event loop.
|
||||
def archive_email(uid: str, folder: str = Query("INBOX"), account_id: str | None = Query(None), owner: str = Depends(require_owner)):
|
||||
"""Move email to Archive folder."""
|
||||
try:
|
||||
with _imap(account_id, owner=owner) as conn:
|
||||
@@ -1968,7 +2026,7 @@ def setup_email_routes():
|
||||
outer = MIMEMultipart("alternative")
|
||||
body_container = outer
|
||||
|
||||
outer["From"] = cfg["from_address"]
|
||||
outer["From"] = email.utils.formataddr((cfg.get("display_name") or "", cfg["from_address"]))
|
||||
outer["To"] = to
|
||||
if cc:
|
||||
outer["Cc"] = cc
|
||||
@@ -2099,6 +2157,79 @@ def setup_email_routes():
|
||||
logger.error(f"cancel_scheduled {sid!r} failed: {e}")
|
||||
return {"success": False, "error": "Mail operation failed"}
|
||||
|
||||
# ── Agent send-confirm: list/approve/cancel ──────────────────────────
|
||||
# When `agent_email_confirm` is on, the MCP send_email tool drops the
|
||||
# composed email into scheduled_emails with status='agent_draft' (a
|
||||
# far-future send_at so the poller never picks it up). These endpoints
|
||||
# let the chat UI surface them for the user and either approve (flip
|
||||
# to status='pending' with send_at=now so the poller delivers it) or
|
||||
# cancel (status='cancelled').
|
||||
@router.get("/pending")
|
||||
async def list_pending_agent_drafts(owner: str = Depends(require_owner)):
|
||||
import sqlite3
|
||||
try:
|
||||
conn = sqlite3.connect(SCHEDULED_DB)
|
||||
conn.row_factory = sqlite3.Row
|
||||
# The MCP server can't easily set owner, so it stores '' — fall
|
||||
# back to those rows in addition to the caller's owner.
|
||||
rows = conn.execute(
|
||||
"""SELECT id, to_addr, subject, body, created_at, account_id
|
||||
FROM scheduled_emails
|
||||
WHERE status = 'agent_draft' AND (owner = ? OR owner = '')
|
||||
ORDER BY created_at DESC""",
|
||||
(owner or "",),
|
||||
).fetchall()
|
||||
conn.close()
|
||||
return {"pending": [dict(r) for r in rows]}
|
||||
except Exception as e:
|
||||
logger.error(f"list_pending_agent_drafts failed: {e}")
|
||||
return {"pending": [], "error": "Mail operation failed"}
|
||||
|
||||
@router.post("/pending/{sid}/approve")
|
||||
async def approve_agent_draft(sid: str, owner: str = Depends(require_owner)):
|
||||
"""Approve a draft staged by the agent: flip status → pending and
|
||||
backdate send_at so the scheduled-send poller picks it up
|
||||
immediately."""
|
||||
import sqlite3
|
||||
try:
|
||||
conn = sqlite3.connect(SCHEDULED_DB)
|
||||
cur = conn.execute(
|
||||
"""UPDATE scheduled_emails
|
||||
SET status = 'pending', send_at = ?
|
||||
WHERE id = ? AND status = 'agent_draft' AND (owner = ? OR owner = '')""",
|
||||
(datetime.utcnow().isoformat(), sid, owner or ""),
|
||||
)
|
||||
conn.commit()
|
||||
affected = cur.rowcount
|
||||
conn.close()
|
||||
if not affected:
|
||||
return {"success": False, "error": "Draft not found or already handled"}
|
||||
return {"success": True}
|
||||
except Exception as e:
|
||||
logger.error(f"approve_agent_draft {sid!r} failed: {e}")
|
||||
return {"success": False, "error": "Mail operation failed"}
|
||||
|
||||
@router.delete("/pending/{sid}")
|
||||
async def cancel_agent_draft(sid: str, owner: str = Depends(require_owner)):
|
||||
"""Discard a draft the agent staged for approval."""
|
||||
import sqlite3
|
||||
try:
|
||||
conn = sqlite3.connect(SCHEDULED_DB)
|
||||
cur = conn.execute(
|
||||
"""UPDATE scheduled_emails SET status = 'cancelled'
|
||||
WHERE id = ? AND status = 'agent_draft' AND (owner = ? OR owner = '')""",
|
||||
(sid, owner or ""),
|
||||
)
|
||||
conn.commit()
|
||||
affected = cur.rowcount
|
||||
conn.close()
|
||||
if not affected:
|
||||
return {"success": False, "error": "Draft not found or already handled"}
|
||||
return {"success": True}
|
||||
except Exception as e:
|
||||
logger.error(f"cancel_agent_draft {sid!r} failed: {e}")
|
||||
return {"success": False, "error": "Mail operation failed"}
|
||||
|
||||
@router.get("/resolve-contact")
|
||||
async def resolve_contact(name: str = Query(..., description="Name to search for"), owner: str = Depends(require_owner)):
|
||||
"""Search Sent folder for a contact by name. Returns matching email addresses."""
|
||||
@@ -2159,6 +2290,7 @@ def setup_email_routes():
|
||||
try:
|
||||
cfg = _resolve_send_config(req.account_id, owner=owner)
|
||||
except Exception as e:
|
||||
logger.warning(f"No SMTP-capable account resolved: {e}")
|
||||
return {"success": False, "error": str(e) or "No SMTP-capable email account configured"}
|
||||
|
||||
# Use 'mixed' if we have attachments, 'alternative' otherwise
|
||||
@@ -2171,7 +2303,7 @@ def setup_email_routes():
|
||||
outer = MIMEMultipart("alternative")
|
||||
body_container = outer
|
||||
|
||||
outer["From"] = cfg["from_address"]
|
||||
outer["From"] = email.utils.formataddr((cfg.get("display_name") or "", cfg["from_address"]))
|
||||
outer["To"] = req.to
|
||||
if req.cc:
|
||||
outer["Cc"] = req.cc
|
||||
@@ -2222,6 +2354,10 @@ def setup_email_routes():
|
||||
|
||||
_account_id = cfg.get("account_id") or req.account_id # capture for the IMAP append in the closure
|
||||
_in_reply_to = (req.in_reply_to or "").strip()
|
||||
_oauth_provider = cfg.get("oauth_provider") or ""
|
||||
_oauth_access_token = cfg.get("oauth_access_token") or ""
|
||||
_oauth_refresh_token = cfg.get("oauth_refresh_token") or ""
|
||||
_oauth_token_expiry = cfg.get("oauth_token_expiry") or ""
|
||||
|
||||
def _deliver():
|
||||
try:
|
||||
@@ -2232,6 +2368,11 @@ def setup_email_routes():
|
||||
"smtp_security": _smtp_security,
|
||||
"smtp_user": _smtp_user,
|
||||
"smtp_password": _smtp_pw,
|
||||
"account_id": _account_id,
|
||||
"oauth_provider": _oauth_provider,
|
||||
"oauth_access_token": _oauth_access_token,
|
||||
"oauth_refresh_token": _oauth_refresh_token,
|
||||
"oauth_token_expiry": _oauth_token_expiry,
|
||||
},
|
||||
_from,
|
||||
_recipients,
|
||||
@@ -2344,7 +2485,7 @@ def setup_email_routes():
|
||||
msg.attach(MIMEText(_draft_html, "html", "utf-8"))
|
||||
else:
|
||||
msg = MIMEText(req.body, "plain", "utf-8")
|
||||
msg["From"] = cfg["from_address"]
|
||||
msg["From"] = email.utils.formataddr((cfg.get("display_name") or "", cfg["from_address"]))
|
||||
msg["To"] = req.to
|
||||
if req.cc:
|
||||
msg["Cc"] = req.cc
|
||||
@@ -2612,11 +2753,15 @@ def setup_email_routes():
|
||||
source_uid = (data.get("uid") or "").strip()
|
||||
source_folder = (data.get("folder") or "INBOX").strip()
|
||||
fast_reply = bool(data.get("fast", False))
|
||||
user_hint = (data.get("user_hint") or "").strip()
|
||||
|
||||
if not original_body:
|
||||
return {"success": False, "error": "No email body provided"}
|
||||
|
||||
if message_id:
|
||||
# Skip cache lookup when the caller supplied a user_hint — the
|
||||
# cached generic reply doesn't reflect the instructions and
|
||||
# would silently override them.
|
||||
if message_id and not user_hint:
|
||||
try:
|
||||
_c = _sql3.connect(SCHEDULED_DB)
|
||||
owner_clause, owner_params = _email_cache_owner_clause(owner)
|
||||
@@ -2756,8 +2901,13 @@ def setup_email_routes():
|
||||
user_msg = (
|
||||
f"Recipient: {to}\nSubject: {subject}\n\n"
|
||||
f"Original email and any current draft:\n{original_body[:6000]}\n\n"
|
||||
f"Draft a reply. Return only the reply body text."
|
||||
)
|
||||
if user_hint:
|
||||
user_msg += (
|
||||
f"User's instructions for THIS reply (follow these — they override "
|
||||
f"defaults like length/tone):\n{user_hint[:2000]}\n\n"
|
||||
)
|
||||
user_msg += "Draft a reply. Return only the reply body text."
|
||||
|
||||
# Build a candidate chain so a stale session-stored API key
|
||||
# (the most common cause of "authentication failed" here)
|
||||
@@ -2987,6 +3137,8 @@ def setup_email_routes():
|
||||
"from_address": r.from_address or "",
|
||||
"has_imap_password": bool(r.imap_password),
|
||||
"has_smtp_password": bool(r.smtp_password),
|
||||
"oauth_provider": r.oauth_provider or "",
|
||||
"display_name": r.display_name or "",
|
||||
})
|
||||
return {"accounts": out}
|
||||
finally:
|
||||
@@ -3019,6 +3171,7 @@ def setup_email_routes():
|
||||
smtp_user=(data.get("smtp_user") or "").strip(),
|
||||
smtp_password=_enc(data.get("smtp_password") or ""),
|
||||
from_address=(data.get("from_address") or "").strip(),
|
||||
display_name=(data.get("display_name") or "").strip(),
|
||||
# SECURITY: stamp the creator so all subsequent reads / mutations
|
||||
# can filter by user. Without this every new account leaks to
|
||||
# every other user.
|
||||
@@ -3053,7 +3206,7 @@ def setup_email_routes():
|
||||
if not row:
|
||||
return {"ok": False, "error": "Account not found"}
|
||||
# Simple fields
|
||||
for key in ("name", "imap_host", "imap_user", "smtp_host", "smtp_user", "from_address"):
|
||||
for key in ("name", "imap_host", "imap_user", "smtp_host", "smtp_user", "from_address", "display_name"):
|
||||
if key in data:
|
||||
setattr(row, key, (data[key] or "").strip())
|
||||
for key in ("imap_port", "smtp_port"):
|
||||
@@ -3242,4 +3395,123 @@ def setup_email_routes():
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
# ── Google OAuth2 routes ──
|
||||
|
||||
@router.get("/oauth/google/authorize")
|
||||
async def google_oauth_authorize(account_id: str = Query(...), request: Request = None, owner: str = Depends(require_user)):
|
||||
import urllib.parse
|
||||
_assert_owns_account(account_id, owner)
|
||||
client_id = os.environ.get("GOOGLE_OAUTH_CLIENT_ID", "")
|
||||
if not client_id:
|
||||
raise HTTPException(400, "GOOGLE_OAUTH_CLIENT_ID not set — add it to .env")
|
||||
redirect_uri = (
|
||||
os.environ.get("GOOGLE_OAUTH_REDIRECT_URI")
|
||||
or f"http://{request.headers.get('host', 'localhost:7000')}/api/email/oauth/google/callback"
|
||||
)
|
||||
state = make_oauth_state(account_id, owner)
|
||||
params = urllib.parse.urlencode({
|
||||
"client_id": client_id,
|
||||
"redirect_uri": redirect_uri,
|
||||
"response_type": "code",
|
||||
"scope": "https://mail.google.com/ email",
|
||||
"access_type": "offline",
|
||||
"prompt": "consent",
|
||||
"state": state,
|
||||
})
|
||||
from fastapi.responses import RedirectResponse as _RR
|
||||
return _RR(f"https://accounts.google.com/o/oauth2/v2/auth?{params}")
|
||||
|
||||
@router.get("/oauth/google/callback")
|
||||
async def google_oauth_callback(
|
||||
code: str = Query(None),
|
||||
state: str = Query(None),
|
||||
error: str = Query(None),
|
||||
request: Request = None,
|
||||
):
|
||||
import urllib.parse
|
||||
from fastapi.responses import RedirectResponse as _RR
|
||||
if error:
|
||||
return _RR("/?section=integrations&email_oauth_error=google_error")
|
||||
if not code or not state:
|
||||
return _RR("/?section=integrations&email_oauth_error=missing_code")
|
||||
state_data = verify_oauth_state(state)
|
||||
if not state_data:
|
||||
return _RR("/?section=integrations&email_oauth_error=invalid_state")
|
||||
account_id = state_data.get("a", "")
|
||||
owner = state_data.get("o", "")
|
||||
client_id = os.environ.get("GOOGLE_OAUTH_CLIENT_ID", "")
|
||||
client_secret = os.environ.get("GOOGLE_OAUTH_CLIENT_SECRET", "")
|
||||
redirect_uri = (
|
||||
os.environ.get("GOOGLE_OAUTH_REDIRECT_URI")
|
||||
or f"http://{request.headers.get('host', 'localhost:7000')}/api/email/oauth/google/callback"
|
||||
)
|
||||
import httpx as _httpx
|
||||
try:
|
||||
resp = _httpx.post("https://oauth2.googleapis.com/token", data={
|
||||
"code": code,
|
||||
"client_id": client_id,
|
||||
"client_secret": client_secret,
|
||||
"redirect_uri": redirect_uri,
|
||||
"grant_type": "authorization_code",
|
||||
}, timeout=10)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
except Exception:
|
||||
logger.warning("Google token exchange failed")
|
||||
return _RR("/?section=integrations&email_oauth_error=token_exchange_failed")
|
||||
access_token = data.get("access_token", "")
|
||||
refresh_token = data.get("refresh_token", "")
|
||||
expiry = str(int(time.time()) + data.get("expires_in", 3600))
|
||||
# Fetch the email address from userinfo so we can auto-fill imap_user.
|
||||
email_addr = ""
|
||||
display_name = ""
|
||||
try:
|
||||
ui = _httpx.get("https://www.googleapis.com/oauth2/v1/userinfo",
|
||||
headers={"Authorization": f"Bearer {access_token}"}, timeout=10)
|
||||
if ui.is_success:
|
||||
ui_data = ui.json()
|
||||
email_addr = ui_data.get("email", "")
|
||||
display_name = ui_data.get("name", "")
|
||||
except Exception:
|
||||
pass
|
||||
from core.database import SessionLocal, EmailAccount
|
||||
from src.secret_storage import encrypt as _enc
|
||||
db = SessionLocal()
|
||||
try:
|
||||
row = db.query(EmailAccount).filter(EmailAccount.id == account_id).first()
|
||||
if not row:
|
||||
return _RR("/?section=integrations&email_oauth_error=account_not_found")
|
||||
# SECURITY: verify the account belongs to the initiating user.
|
||||
if owner and row.owner and row.owner != owner:
|
||||
logger.warning("OAuth callback owner mismatch — rejecting token write")
|
||||
return _RR("/?section=integrations&email_oauth_error=ownership_error")
|
||||
row.oauth_provider = "google"
|
||||
row.oauth_access_token = _enc(access_token)
|
||||
if refresh_token:
|
||||
row.oauth_refresh_token = _enc(refresh_token)
|
||||
row.oauth_token_expiry = expiry
|
||||
# Auto-fill Google IMAP/SMTP settings if not already configured.
|
||||
if not row.imap_host:
|
||||
row.imap_host = "imap.gmail.com"
|
||||
row.imap_port = 993
|
||||
row.imap_starttls = False
|
||||
if not row.smtp_host:
|
||||
row.smtp_host = "smtp.gmail.com"
|
||||
row.smtp_port = 587
|
||||
if email_addr:
|
||||
if not row.imap_user:
|
||||
row.imap_user = email_addr
|
||||
if not row.smtp_user:
|
||||
row.smtp_user = email_addr
|
||||
if not row.from_address:
|
||||
row.from_address = email_addr
|
||||
if not row.name or row.name == row.id:
|
||||
row.name = email_addr
|
||||
if display_name and not row.display_name:
|
||||
row.display_name = display_name
|
||||
db.commit()
|
||||
finally:
|
||||
db.close()
|
||||
return _RR("/?section=integrations&email_oauth_success=1")
|
||||
|
||||
return router
|
||||
|
||||
@@ -19,6 +19,7 @@ from src.upload_limits import (
|
||||
GALLERY_TRANSFORM_UPLOAD_MAX_BYTES,
|
||||
)
|
||||
from src.constants import GENERATED_IMAGES_DIR
|
||||
from src.optional_deps import patch_realesrgan_torchvision_compat
|
||||
|
||||
from routes.gallery_helpers import (
|
||||
GalleryPatch, _extract_exif, _image_to_dict, _owner_filter, _human_size,
|
||||
@@ -66,6 +67,14 @@ def _gallery_image_path(filename: str) -> Path:
|
||||
raise HTTPException(400, "Unsafe gallery filename")
|
||||
if safe_name != original:
|
||||
raise HTTPException(400, "Unsafe gallery filename")
|
||||
if not path.exists():
|
||||
cwd_root = (Path.cwd() / "data" / "generated_images").resolve()
|
||||
cwd_path = (cwd_root / safe_name).resolve()
|
||||
try:
|
||||
if os.path.commonpath([str(cwd_root), str(cwd_path)]) == str(cwd_root) and cwd_path.exists():
|
||||
return cwd_path
|
||||
except Exception:
|
||||
pass
|
||||
return path
|
||||
|
||||
|
||||
@@ -108,6 +117,32 @@ def _visible_image_endpoint_for_base(db, base: str, owner: str | None):
|
||||
return fallback
|
||||
|
||||
|
||||
async def _fetch_result_image_b64(url: str) -> Optional[str]:
|
||||
"""Fetch an image URL returned in an upstream response body, base64-encoded
|
||||
(or None on a non-200).
|
||||
|
||||
The URL comes from the diffusion/OpenAI server's response, not from our own
|
||||
config, so a malicious or compromised endpoint could otherwise steer this
|
||||
fetch at an internal or cloud-metadata address. Validate it the same way the
|
||||
client-supplied endpoint is validated before the first request.
|
||||
"""
|
||||
import base64
|
||||
import httpx
|
||||
from src.url_safety import check_outbound_url
|
||||
|
||||
ok, reason = check_outbound_url(
|
||||
url,
|
||||
block_private=os.getenv("IMAGE_BLOCK_PRIVATE_IPS", "false").lower() == "true",
|
||||
)
|
||||
if not ok:
|
||||
raise HTTPException(502, f"Upstream returned an unsafe image URL: {reason}")
|
||||
async with httpx.AsyncClient(timeout=60) as c2:
|
||||
ir = await c2.get(url)
|
||||
if ir.status_code == 200:
|
||||
return base64.b64encode(ir.content).decode()
|
||||
return None
|
||||
|
||||
|
||||
def setup_gallery_routes() -> APIRouter:
|
||||
router = APIRouter(tags=["gallery"])
|
||||
|
||||
@@ -197,8 +232,6 @@ def setup_gallery_routes() -> APIRouter:
|
||||
@router.post("/api/gallery/{image_id}/replace")
|
||||
async def gallery_replace(request: Request, image_id: str):
|
||||
"""Replace an existing gallery image file with a new one."""
|
||||
from pathlib import Path
|
||||
|
||||
user = get_current_user(request)
|
||||
db = SessionLocal()
|
||||
try:
|
||||
@@ -214,9 +247,8 @@ def setup_gallery_routes() -> APIRouter:
|
||||
raise HTTPException(400, "No image provided")
|
||||
|
||||
content = await read_upload_limited(file, GALLERY_UPLOAD_MAX_BYTES, "Gallery replacement")
|
||||
img_dir = Path(GENERATED_IMAGES_DIR)
|
||||
img_dir.mkdir(parents=True, exist_ok=True)
|
||||
img_path = img_dir / _sanitize_gallery_filename(img.filename)
|
||||
GALLERY_IMAGE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
img_path = _gallery_image_path(img.filename)
|
||||
img_path.write_bytes(content)
|
||||
|
||||
# Refresh dimensions in case the editor resized the canvas.
|
||||
@@ -904,15 +936,23 @@ def setup_gallery_routes() -> APIRouter:
|
||||
raise HTTPException(404, "Image not found")
|
||||
|
||||
img_filename = img.filename
|
||||
# Remove the file from disk
|
||||
img_path = _gallery_image_path(img_filename)
|
||||
if img_path.exists():
|
||||
img_path.unlink()
|
||||
|
||||
# Soft-delete the record
|
||||
# Soft-delete the record first; the DB is the source of truth.
|
||||
img.is_active = False
|
||||
db.commit()
|
||||
|
||||
# Only after the soft-delete commit succeeds do we remove the file.
|
||||
# If the file were deleted first and the commit then failed/rolled
|
||||
# back, the still-active record would point at a missing file.
|
||||
# Best-effort so a missing or locked file can't 500 a delete that
|
||||
# already succeeded logically. Uses the path-confined resolver so a
|
||||
# malformed stored filename can't escape generated_images.
|
||||
try:
|
||||
img_path = _gallery_image_path(img_filename)
|
||||
if img_path.exists():
|
||||
img_path.unlink()
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not remove gallery image file for {img_filename}: {e}")
|
||||
|
||||
# Strip stale chat-history references so the image bubble
|
||||
# (and its prompt caption) doesn't come back after a server
|
||||
# reboot replays the session. We remove the matching tool
|
||||
@@ -1142,10 +1182,7 @@ def setup_gallery_routes() -> APIRouter:
|
||||
if item.get("b64_json"):
|
||||
raw_b64 = item["b64_json"]
|
||||
elif item.get("url"):
|
||||
async with httpx.AsyncClient(timeout=60) as c2:
|
||||
img_r = await c2.get(item["url"])
|
||||
if img_r.status_code == 200:
|
||||
raw_b64 = base64.b64encode(img_r.content).decode()
|
||||
raw_b64 = await _fetch_result_image_b64(item["url"])
|
||||
if not raw_b64:
|
||||
raise HTTPException(502, "OpenAI returned no image")
|
||||
|
||||
@@ -1206,7 +1243,7 @@ def setup_gallery_routes() -> APIRouter:
|
||||
original and regenerates `strength` fraction. With strength ~0.4
|
||||
you get edge blending + lighting unification while keeping the
|
||||
composition recognisable."""
|
||||
import httpx, base64 as _b64
|
||||
import httpx
|
||||
user = require_privilege(request, "can_generate_images")
|
||||
body = await request.json()
|
||||
|
||||
@@ -1382,10 +1419,9 @@ def setup_gallery_routes() -> APIRouter:
|
||||
if item.get("b64_json"):
|
||||
return {"image": item["b64_json"]}
|
||||
if item.get("url"):
|
||||
async with httpx.AsyncClient(timeout=60) as c2:
|
||||
ir = await c2.get(item["url"])
|
||||
if ir.status_code == 200:
|
||||
return {"image": _b64.b64encode(ir.content).decode()}
|
||||
img_b64 = await _fetch_result_image_b64(item["url"])
|
||||
if img_b64:
|
||||
return {"image": img_b64}
|
||||
last_err = f"{path}: server returned no image"
|
||||
except httpx.ConnectError as e:
|
||||
raise HTTPException(502, f"Can't reach diffusion server at {base}: {e}")
|
||||
@@ -1445,6 +1481,7 @@ def setup_gallery_routes() -> APIRouter:
|
||||
img_bytes = base64.b64decode(image_b64)
|
||||
src = Image.open(io.BytesIO(img_bytes)).convert("RGB")
|
||||
try:
|
||||
patch_realesrgan_torchvision_compat()
|
||||
from realesrgan import RealESRGANer
|
||||
except ImportError:
|
||||
return {"error": "realesrgan not installed. Install it from Cookbook → Dependencies (search 'realesrgan')."}
|
||||
@@ -1494,6 +1531,7 @@ def setup_gallery_routes() -> APIRouter:
|
||||
img_bytes = base64.b64decode(image_b64)
|
||||
src = Image.open(io.BytesIO(img_bytes)).convert("RGB")
|
||||
try:
|
||||
patch_realesrgan_torchvision_compat()
|
||||
from basicsr.archs.rrdbnet_arch import RRDBNet
|
||||
from realesrgan import RealESRGANer
|
||||
except ImportError:
|
||||
|
||||
@@ -119,7 +119,7 @@ def setup_hwfit_routes():
|
||||
return detect_system(host=host, ssh_port=ssh_port, platform=platform, fresh=fresh)
|
||||
|
||||
@router.get("/models")
|
||||
def get_models(use_case: str = "", sort: str = "score", limit: int = 50, search: str = "", host: str = "", quant: str = "", ctx: str = "", gpu_count: str = "", gpu_group: str = "", ssh_port: str = "", platform: str = "", fresh: bool = False, manual_mode: str = "", manual_gpu_count: str = "", manual_vram_gb: str = "", manual_ram_gb: str = "", manual_backend: str = "", ignore_detected_gpu: bool = False, ignore_detected_ram: bool = False, fit_only: bool = False):
|
||||
def get_models(use_case: str = "", sort: str = "newest", limit: int = 50, search: str = "", host: str = "", quant: str = "", ctx: str = "", gpu_count: str = "", gpu_group: str = "", ssh_port: str = "", platform: str = "", fresh: bool = False, manual_mode: str = "", manual_gpu_count: str = "", manual_vram_gb: str = "", manual_ram_gb: str = "", manual_backend: str = "", ignore_detected_gpu: bool = False, ignore_detected_ram: bool = False, fit_only: bool = False):
|
||||
"""Rank LLM models against detected hardware and return scored results.
|
||||
gpu_count: override GPU count (0 = CPU only, 1-N = simulate N GPUs of the
|
||||
active group). gpu_group: index into system.gpu_groups (the homogeneous
|
||||
|
||||
@@ -108,6 +108,12 @@ def _load_disabled_map():
|
||||
db.close()
|
||||
|
||||
|
||||
def _mcp_oauth_redirect_uri() -> str:
|
||||
"""Shared callback URL for legacy Google and generic MCP OAuth flows."""
|
||||
from src.mcp_oauth import REDIRECT_URI
|
||||
return REDIRECT_URI
|
||||
|
||||
|
||||
def setup_mcp_routes(mcp_manager: McpManager):
|
||||
"""Setup MCP routes with the provided manager."""
|
||||
|
||||
@@ -445,9 +451,9 @@ def setup_mcp_routes(mcp_manager: McpManager):
|
||||
client_id = keys["client_id"]
|
||||
scopes = oauth_cfg.get("scopes", [])
|
||||
|
||||
# For Desktop App creds, redirect to localhost — the user will
|
||||
# For Desktop App creds, default to localhost — the user will
|
||||
# paste the resulting URL back if they're on a different device.
|
||||
redirect_uri = "http://localhost:7000/api/mcp/oauth/callback"
|
||||
redirect_uri = _mcp_oauth_redirect_uri()
|
||||
|
||||
params = {
|
||||
"client_id": client_id,
|
||||
@@ -469,7 +475,7 @@ def setup_mcp_routes(mcp_manager: McpManager):
|
||||
return RedirectResponse(auth_url)
|
||||
else:
|
||||
# Remote device — show paste-back page
|
||||
return HTMLResponse(_oauth_authorize_page(auth_url, server_id, host))
|
||||
return HTMLResponse(_oauth_authorize_page(auth_url, server_id, host, redirect_uri))
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
@@ -536,7 +542,7 @@ def setup_mcp_routes(mcp_manager: McpManager):
|
||||
client_id = keys["client_id"]
|
||||
client_secret = keys["client_secret"]
|
||||
|
||||
redirect_uri = "http://localhost:7000/api/mcp/oauth/callback"
|
||||
redirect_uri = _mcp_oauth_redirect_uri()
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
resp = await client.post(
|
||||
@@ -603,13 +609,19 @@ def setup_mcp_routes(mcp_manager: McpManager):
|
||||
return router
|
||||
|
||||
|
||||
def _oauth_authorize_page(auth_url: str, server_id: str, host: str) -> str:
|
||||
def _oauth_authorize_page(
|
||||
auth_url: str,
|
||||
server_id: str,
|
||||
host: str,
|
||||
redirect_uri: str = "http://localhost:7000/api/mcp/oauth/callback",
|
||||
) -> str:
|
||||
"""Page with Google sign-in link and URL paste-back form for remote access."""
|
||||
# Escape values interpolated into the page: `host` comes from the request
|
||||
# Host header and `server_id` from the OAuth state — neither is trusted.
|
||||
auth_url = html.escape(auth_url, quote=True)
|
||||
server_id = html.escape(server_id, quote=True)
|
||||
host = html.escape(host, quote=True)
|
||||
redirect_uri = html.escape(redirect_uri, quote=True)
|
||||
return f"""<!DOCTYPE html>
|
||||
<html><head>
|
||||
<meta charset="UTF-8"><title>Authorize — Odysseus</title>
|
||||
@@ -654,7 +666,7 @@ def _oauth_authorize_page(auth_url: str, server_id: str, host: str) -> str:
|
||||
<div class="divider"></div>
|
||||
<form method="POST" action="http://{host}/api/mcp/oauth/exchange/{server_id}">
|
||||
<p>Paste the URL from your browser after signing in:</p>
|
||||
<input type="text" name="callback_url" placeholder="http://localhost:7000/api/mcp/oauth/callback?code=..." required>
|
||||
<input type="text" name="callback_url" placeholder="{redirect_uri}?code=..." required>
|
||||
<br><button type="submit">Connect</button>
|
||||
</form>
|
||||
</div></body></html>"""
|
||||
|
||||
@@ -29,6 +29,7 @@ from src.llm_core import llm_call_async
|
||||
from services.memory.memory_extractor import audit_memories
|
||||
from src.auth_helpers import get_current_user, require_user
|
||||
from src.endpoint_resolver import resolve_endpoint
|
||||
from src.task_endpoint import resolve_task_endpoint
|
||||
from src.upload_limits import read_upload_limited, MEMORY_IMPORT_MAX_BYTES
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -240,14 +241,18 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
|
||||
}
|
||||
messages = [system_msg] + sess.get_context_messages()
|
||||
|
||||
t_url, t_model, t_headers = resolve_task_endpoint(
|
||||
sess.endpoint_url, sess.model, sess.headers, owner=_owner(request)
|
||||
)
|
||||
|
||||
try:
|
||||
suggestion_text = await llm_call_async(
|
||||
sess.endpoint_url,
|
||||
sess.model,
|
||||
t_url,
|
||||
t_model,
|
||||
messages,
|
||||
temperature=0.2,
|
||||
max_tokens=500,
|
||||
headers=sess.headers,
|
||||
headers=t_headers,
|
||||
)
|
||||
try:
|
||||
suggestions = json.loads(suggestion_text)
|
||||
@@ -278,42 +283,50 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
|
||||
endpoint_url = model = None
|
||||
headers = {}
|
||||
|
||||
# Try default model from settings first
|
||||
settings = _load_settings()
|
||||
ep_id = settings.get("default_endpoint_id", "")
|
||||
default_model = settings.get("default_model", "")
|
||||
if ep_id:
|
||||
db = SessionLocal()
|
||||
try:
|
||||
ep = db.query(ModelEndpoint).filter(
|
||||
ModelEndpoint.id == ep_id, ModelEndpoint.is_enabled == True
|
||||
).first()
|
||||
if ep:
|
||||
base = _normalize_base(ep.base_url)
|
||||
endpoint_url = build_chat_url(base)
|
||||
model = default_model
|
||||
if not model and ep.models:
|
||||
try:
|
||||
models = _json.loads(ep.models) if isinstance(ep.models, str) else ep.models
|
||||
if models:
|
||||
model = models[0]
|
||||
except Exception:
|
||||
pass
|
||||
if ep.api_key:
|
||||
headers = {"Authorization": f"Bearer {ep.api_key}"}
|
||||
finally:
|
||||
db.close()
|
||||
# Try utility model from settings first — memory audit is a background
|
||||
# task and should prefer the lighter utility model over the main chat model.
|
||||
from src.task_endpoint import resolve_task_endpoint
|
||||
user = _owner(request)
|
||||
t_url, t_model, t_headers = resolve_task_endpoint(owner=user)
|
||||
if t_url and t_model:
|
||||
endpoint_url, model, headers = t_url, t_model, t_headers
|
||||
else:
|
||||
# Fall back to default model if no task/utility model configured
|
||||
settings = _load_settings()
|
||||
ep_id = settings.get("default_endpoint_id", "")
|
||||
default_model = settings.get("default_model", "")
|
||||
if ep_id:
|
||||
db = SessionLocal()
|
||||
try:
|
||||
ep = db.query(ModelEndpoint).filter(
|
||||
ModelEndpoint.id == ep_id, ModelEndpoint.is_enabled == True
|
||||
).first()
|
||||
if ep:
|
||||
base = _normalize_base(ep.base_url)
|
||||
endpoint_url = build_chat_url(base)
|
||||
model = default_model
|
||||
if not model and ep.models:
|
||||
try:
|
||||
models = _json.loads(ep.models) if isinstance(ep.models, str) else ep.models
|
||||
if models:
|
||||
model = models[0]
|
||||
except Exception:
|
||||
pass
|
||||
if ep.api_key:
|
||||
headers = {"Authorization": f"Bearer {ep.api_key}"}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
# Fall back to session model if no default configured
|
||||
if not endpoint_url and session:
|
||||
try:
|
||||
sess = session_manager.get_session(session)
|
||||
_assert_session_owner(sess, _owner(request))
|
||||
endpoint_url = sess.endpoint_url
|
||||
model = sess.model
|
||||
headers = sess.headers
|
||||
except KeyError:
|
||||
pass
|
||||
# Fall back to session model if no default configured
|
||||
if not endpoint_url and session:
|
||||
try:
|
||||
sess = session_manager.get_session(session)
|
||||
_assert_session_owner(sess, _owner(request))
|
||||
endpoint_url = sess.endpoint_url
|
||||
model = sess.model
|
||||
headers = sess.headers
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
if not endpoint_url or not model:
|
||||
raise HTTPException(400, "No default model configured — set one in Settings")
|
||||
@@ -360,13 +373,14 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
|
||||
try:
|
||||
sess = session_manager.get_session(session)
|
||||
_assert_session_owner(sess, _owner(request))
|
||||
endpoint_url = sess.endpoint_url
|
||||
model = sess.model
|
||||
headers = sess.headers
|
||||
endpoint_url, model, headers = resolve_task_endpoint(
|
||||
sess.endpoint_url, sess.model, sess.headers, owner=_owner(request)
|
||||
)
|
||||
except KeyError:
|
||||
raise HTTPException(404, "Session not found — needed for LLM config")
|
||||
logger.warning("Session %s not found, falling back to utility endpoint", session)
|
||||
endpoint_url, model, headers = resolve_endpoint("utility", owner=_owner(request))
|
||||
else:
|
||||
endpoint_url, model, headers = resolve_endpoint("utility", owner=_owner(request))
|
||||
endpoint_url, model, headers = resolve_task_endpoint(owner=_owner(request))
|
||||
|
||||
if not endpoint_url or not model:
|
||||
raise HTTPException(400, "No LLM model configured. Set a default model in Settings.")
|
||||
|
||||
@@ -26,7 +26,7 @@ from src.endpoint_resolver import (
|
||||
build_models_url,
|
||||
build_headers,
|
||||
)
|
||||
from src.auth_helpers import _auth_disabled, owner_filter
|
||||
from src.auth_helpers import _auth_disabled, effective_user, owner_filter
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -248,6 +248,9 @@ _PROVIDER_CURATED = {
|
||||
"zai-coding": [
|
||||
"glm-5.1", "glm-5v-turbo", "glm-5-turbo", "glm-4.7", "glm-4.5-air",
|
||||
],
|
||||
"kimi-code": [
|
||||
"kimi-for-coding",
|
||||
],
|
||||
"deepseek": [
|
||||
"deepseek-chat", "deepseek-reasoner",
|
||||
],
|
||||
@@ -315,6 +318,8 @@ def _match_provider_curated(base_url: str, provider: str) -> str:
|
||||
parsed = urlparse(base_url)
|
||||
if _host_match(base_url, "z.ai") and "/api/coding" in (parsed.path or ""):
|
||||
return "zai-coding"
|
||||
if _host_match(base_url, "kimi.com") and "/coding" in (parsed.path or ""):
|
||||
return "kimi-code"
|
||||
for domain, key in _HOST_TO_CURATED:
|
||||
if _host_match(base_url, domain):
|
||||
return key
|
||||
@@ -703,6 +708,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
|
||||
"""Probe a base URL's /models endpoint and return list of model IDs.
|
||||
For Anthropic, queries their /v1/models API, falling back to hardcoded list."""
|
||||
from src.endpoint_resolver import resolve_url
|
||||
from src.llm_core import httpx_get_kimi_aware
|
||||
base = resolve_url(_normalize_base(base_url))
|
||||
provider = _safe_detect_provider(base)
|
||||
if provider == "chatgpt-subscription":
|
||||
@@ -738,7 +744,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
|
||||
url = _safe_build_models_url(base)
|
||||
headers = _safe_build_headers(api_key, base)
|
||||
try:
|
||||
r = httpx.get(url, headers=headers, timeout=timeout, verify=llm_verify())
|
||||
r = httpx_get_kimi_aware(url, headers, timeout=timeout, verify=llm_verify())
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
# OpenAI format: {"data": [{"id": "model-name"}]}
|
||||
@@ -754,6 +760,11 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
|
||||
for _e in _PROVIDER_CURATED.get(_ck, []):
|
||||
if _e not in set(models) and not any(m.startswith(_e) for m in models):
|
||||
models.append(_e)
|
||||
if _host_match(base, "kimi.com") and "/coding" in (urlparse(base).path or ""):
|
||||
_ck = _match_provider_curated(base, None)
|
||||
for _e in _PROVIDER_CURATED.get(_ck, []):
|
||||
if _e not in set(models) and not any(m.startswith(_e) for m in models):
|
||||
models.append(_e)
|
||||
return [m for m in models if _is_chat_model(m)]
|
||||
except httpx.HTTPStatusError as e:
|
||||
if api_key:
|
||||
@@ -870,15 +881,52 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
|
||||
|
||||
|
||||
def _model_endpoint_error_message(base_url: str, ping: Dict[str, Any] = None) -> str:
|
||||
"""Return a provider-aware error message for failed endpoint probes."""
|
||||
"""Return a provider-aware error message for failed endpoint probes.
|
||||
|
||||
Surfaces the URL we actually probed and, when the endpoint looks like
|
||||
LM Studio (port 1234 or hostname match), adds a hint about loading a
|
||||
model and confirming the Developer Server is running. The user previously
|
||||
saw a generic "No models found for that provider/key" with no way to
|
||||
tell whether the URL was wrong, the server was down, or the server was
|
||||
reachable but had no model loaded (issue #25).
|
||||
"""
|
||||
ping = ping or {}
|
||||
error = ping.get("error")
|
||||
from src.endpoint_resolver import build_models_url
|
||||
try:
|
||||
probed = build_models_url(base_url) or base_url
|
||||
except Exception:
|
||||
probed = base_url
|
||||
parsed = urlparse(base_url)
|
||||
host = (parsed.hostname or "").lower()
|
||||
is_ollama = parsed.port == 11434 or "ollama" in host or "ollama" in base_url.lower()
|
||||
is_lmstudio = (
|
||||
parsed.port == 1234
|
||||
or "lmstudio" in host
|
||||
or "lm-studio" in host
|
||||
or "lm_studio" in host
|
||||
)
|
||||
|
||||
if is_lmstudio:
|
||||
parts = [
|
||||
"LM Studio is reachable, but no models were reported.",
|
||||
f"Probed {probed}.",
|
||||
]
|
||||
if error:
|
||||
parts.append(f"Last probe error: {error}.")
|
||||
parts.append(
|
||||
"Open LM Studio, load at least one model, and confirm the "
|
||||
"Developer Server is running on port 1234."
|
||||
)
|
||||
parts.append(
|
||||
"Base URL should be http://localhost:1234/v1 (native) or "
|
||||
"http://host.docker.internal:1234/v1 (Docker)."
|
||||
)
|
||||
return " ".join(parts)
|
||||
|
||||
if is_ollama:
|
||||
parts = ["No Ollama models found for that endpoint."]
|
||||
parts.append(f"Probed {probed}.")
|
||||
if error:
|
||||
parts.append(f"Last probe error: {error}.")
|
||||
parts.append("Check that Ollama is running and that the base URL is correct.")
|
||||
@@ -888,9 +936,9 @@ def _model_endpoint_error_message(base_url: str, ping: Dict[str, Any] = None) ->
|
||||
return " ".join(parts)
|
||||
|
||||
if error:
|
||||
return f"No models found for that provider/key. Last probe error: {error}."
|
||||
return f"No models found for that provider/key. Probed {probed}. Last probe error: {error}."
|
||||
|
||||
return "No models found for that provider/key."
|
||||
return f"No models found for that provider/key. Probed {probed}."
|
||||
|
||||
|
||||
def _normalize_model_ids(value):
|
||||
@@ -1207,13 +1255,16 @@ def setup_model_routes(model_discovery):
|
||||
# Require auth; "" is the unconfigured single-user mode, treated as
|
||||
# "see everything" by _fetch_models.
|
||||
try:
|
||||
from src.auth_helpers import get_current_user as _gcu
|
||||
owner = _gcu(request) or ""
|
||||
except Exception:
|
||||
owner = ""
|
||||
# Reject anonymous in configured deployments — no leaking the model
|
||||
# list to unauthenticated callers.
|
||||
try:
|
||||
if getattr(request.state, "api_token", False):
|
||||
scopes = set(getattr(request.state, "api_token_scopes", []) or [])
|
||||
if "chat" not in scopes:
|
||||
raise HTTPException(403, "API token is not scoped for chat")
|
||||
if not getattr(request.state, "api_token_owner", None):
|
||||
raise HTTPException(403, "API token has no owner")
|
||||
owner = effective_user(request) or ""
|
||||
|
||||
# Reject anonymous in configured deployments — no leaking the model
|
||||
# list to unauthenticated callers.
|
||||
auth_mgr = getattr(request.app.state, "auth_manager", None)
|
||||
if not owner and not _auth_disabled() and auth_mgr is not None and getattr(auth_mgr, "is_configured", False):
|
||||
raise HTTPException(401, "Not authenticated")
|
||||
|
||||
@@ -10,7 +10,7 @@ from fastapi import APIRouter, HTTPException, Request
|
||||
from pydantic import BaseModel
|
||||
|
||||
from core.database import SessionLocal, Note
|
||||
from src.auth_helpers import get_current_user
|
||||
from src.auth_helpers import require_user
|
||||
from src.constants import DATA_DIR
|
||||
from sqlalchemy.orm.attributes import flag_modified
|
||||
|
||||
@@ -208,14 +208,17 @@ async def dispatch_reminder(
|
||||
try:
|
||||
from src.endpoint_resolver import resolve_endpoint
|
||||
from src.llm_core import llm_call_async
|
||||
from src.reminder_personas import synthesis_system_prompt
|
||||
url, model, headers = resolve_endpoint("utility", owner=owner or None)
|
||||
if not url:
|
||||
url, model, headers = resolve_endpoint("default", owner=owner or None)
|
||||
if url and model:
|
||||
persona_id = (settings.get("reminder_llm_persona") or "").strip()
|
||||
sys_prompt = synthesis_system_prompt(persona_id)
|
||||
raw = await llm_call_async(
|
||||
url=url, model=model,
|
||||
messages=[
|
||||
{"role": "system", "content": "You are a reminder assistant. Write a single short, warm, motivating sentence (max 25 words) reminding the user about the note below. Do not add greetings, preamble, or hashtags. Output only the sentence."},
|
||||
{"role": "system", "content": sys_prompt},
|
||||
{"role": "user", "content": f"Title: {title}\n\n{note_body}".strip()},
|
||||
],
|
||||
temperature=0.7, max_tokens=200, headers=headers, timeout=30,
|
||||
@@ -567,7 +570,16 @@ def setup_note_routes(task_scheduler=None):
|
||||
router = APIRouter(prefix="/api/notes", tags=["notes"])
|
||||
|
||||
def _owner(request: Request) -> Optional[str]:
|
||||
return get_current_user(request)
|
||||
# require_user, not bare get_current_user: a request that reaches
|
||||
# these owner-scoped routes with NO identity (auth-middleware
|
||||
# regression, SSRF from a sibling service) must fail closed (401)
|
||||
# when auth is configured — not be treated as the single-user mode
|
||||
# and handed blanket access to every account's notes. The documented
|
||||
# anonymous modes (AUTH_ENABLED=false, LOCALHOST_BYPASS on loopback,
|
||||
# unconfigured first-run) still resolve to None, the single-user
|
||||
# path. fire_reminder below already gated this way; the CRUD routes
|
||||
# did not.
|
||||
return require_user(request) or None
|
||||
|
||||
def _is_admin_or_single_user(request: Request, user: str | None) -> bool:
|
||||
if user == "internal-tool":
|
||||
@@ -802,8 +814,7 @@ def setup_note_routes(task_scheduler=None):
|
||||
Returns {synthesis, email_sent}.
|
||||
"""
|
||||
# Gate against anonymous callers — LLM synthesis can burn tokens.
|
||||
from src.auth_helpers import require_user as _ru
|
||||
user = _ru(request)
|
||||
user = require_user(request)
|
||||
body = await request.json()
|
||||
note_id = str(body.get("note_id") or "").strip()
|
||||
if not note_id:
|
||||
@@ -826,6 +837,12 @@ def setup_note_routes(task_scheduler=None):
|
||||
_override["reminder_webhook_integration_id"] = body["webhook_integration_id"]
|
||||
if body.get("webhook_payload_template"):
|
||||
_override["reminder_webhook_payload_template"] = body["webhook_payload_template"]
|
||||
# Mirror the in-UI AI Synthesis toggle + persona so the test
|
||||
# actually exercises the synthesis path before/without a Save.
|
||||
if "llm_synthesis" in body:
|
||||
_override["reminder_llm_synthesis"] = bool(body["llm_synthesis"])
|
||||
if "llm_persona" in body:
|
||||
_override["reminder_llm_persona"] = str(body["llm_persona"] or "")
|
||||
else:
|
||||
db = SessionLocal()
|
||||
try:
|
||||
|
||||
@@ -160,8 +160,11 @@ def setup_personal_routes(personal_docs_manager, rag_manager, rag_available):
|
||||
JSON response confirming removal
|
||||
"""
|
||||
try:
|
||||
if not directory:
|
||||
raise HTTPException(400, "Directory path is required")
|
||||
# Confine to PERSONAL_DIR — parity with add_directory_to_rag (which
|
||||
# resolves the path the same way). Without this, an arbitrary or
|
||||
# `..`-escaping path is passed straight to
|
||||
# personal_docs_manager.remove_directory / rag.remove_directory.
|
||||
directory = _resolve_allowed_personal_dir(directory)
|
||||
|
||||
logger.info(f"Removing directory from RAG: {directory}")
|
||||
|
||||
@@ -275,8 +278,8 @@ def setup_personal_routes(personal_docs_manager, rag_manager, rag_available):
|
||||
# Delete file from disk if it's in uploads dir
|
||||
deleted_from_disk = False
|
||||
try:
|
||||
abs_target = os.path.abspath(filepath)
|
||||
base_abs = os.path.abspath(UPLOADS_DIR)
|
||||
abs_target = os.path.realpath(filepath)
|
||||
base_abs = os.path.realpath(UPLOADS_DIR)
|
||||
in_uploads = (
|
||||
abs_target == base_abs
|
||||
or os.path.commonpath([abs_target, base_abs]) == base_abs
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
"""Shell routes — user-facing command execution endpoint."""
|
||||
|
||||
import asyncio
|
||||
import importlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
@@ -14,6 +15,7 @@ from collections import namedtuple
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any
|
||||
from core.platform_compat import IS_APPLE_SILICON, which_tool
|
||||
from src.optional_deps import prepare_optional_dependency_import
|
||||
|
||||
# POSIX-only: `pty`/`fcntl` transitively import `termios`, which does NOT exist
|
||||
# on Windows, so importing them unconditionally crashed app startup there
|
||||
@@ -149,6 +151,11 @@ def _pip_dist_name(pkg: dict) -> str:
|
||||
return (pkg.get("name") or "").replace("_", "-")
|
||||
|
||||
|
||||
def _import_optional_dependency_for_status(name: str):
|
||||
prepare_optional_dependency_import(name)
|
||||
return importlib.import_module(name)
|
||||
|
||||
|
||||
def _package_installed_from_probe(name: str, probe: dict) -> bool:
|
||||
"""Return whether an optional dependency is usable by Cookbook.
|
||||
|
||||
@@ -970,7 +977,6 @@ def setup_shell_routes() -> APIRouter:
|
||||
"""
|
||||
_require_admin(request)
|
||||
_reject_cross_site(request)
|
||||
import importlib
|
||||
import importlib.metadata as importlib_metadata
|
||||
import shlex
|
||||
import json as _json
|
||||
@@ -1057,6 +1063,13 @@ def setup_shell_routes() -> APIRouter:
|
||||
"category": "Image",
|
||||
"target": "remote",
|
||||
},
|
||||
{
|
||||
"name": "transformers",
|
||||
"pip": "transformers",
|
||||
"desc": "Hugging Face model components used by SD/Flux pipelines and image tools",
|
||||
"category": "Image",
|
||||
"target": "remote",
|
||||
},
|
||||
{
|
||||
"name": "rembg",
|
||||
"pip": "rembg[gpu]",
|
||||
@@ -1202,7 +1215,7 @@ def setup_shell_routes() -> APIRouter:
|
||||
pkg["status_note"] = _package_status_note("vllm", probe)
|
||||
else:
|
||||
try:
|
||||
importlib.import_module(pkg["name"])
|
||||
_import_optional_dependency_for_status(pkg["name"])
|
||||
importlib_metadata.version(_pip_dist_name(pkg))
|
||||
pkg["installed"] = True
|
||||
except ImportError:
|
||||
@@ -1251,6 +1264,7 @@ def setup_shell_routes() -> APIRouter:
|
||||
"sglang[all]",
|
||||
"diffusers",
|
||||
"diffusers[torch]",
|
||||
"transformers",
|
||||
"TTS",
|
||||
"bark",
|
||||
"faster-whisper",
|
||||
|
||||
@@ -691,8 +691,12 @@ async def _run_skill_test_once(md: str, task: str, url, model, headers, owner) -
|
||||
{"role": "user", "content": task},
|
||||
]
|
||||
try:
|
||||
# max_tokens explicitly set: passing 0 lets some upstreams (Ollama,
|
||||
# OpenAI-compat) generate an empty completion, which manifested as
|
||||
# the skill test returning nothing while chat (which carries its
|
||||
# preset's max_tokens) worked. 4096 matches the chat default.
|
||||
async for chunk in stream_agent_loop(url, model, messages, headers=headers,
|
||||
temperature=0.3, max_tokens=0, max_rounds=8, owner=owner):
|
||||
temperature=0.3, max_tokens=4096, max_rounds=8, owner=owner):
|
||||
if not chunk.startswith("data: ") or chunk.strip() == "data: [DONE]":
|
||||
continue
|
||||
try:
|
||||
|
||||
@@ -151,6 +151,7 @@ class TaskCreate(BaseModel):
|
||||
endpoint_url: Optional[str] = None
|
||||
then_task_id: Optional[str] = None # chain: run this task after success
|
||||
notifications_enabled: Optional[bool] = None # None lets action-specific defaults apply
|
||||
character_id: Optional[str] = None # built-in persona id (PERSONAS) — biases output voice
|
||||
|
||||
|
||||
class TaskUpdate(BaseModel):
|
||||
@@ -171,6 +172,7 @@ class TaskUpdate(BaseModel):
|
||||
endpoint_url: Optional[str] = None
|
||||
then_task_id: Optional[str] = None
|
||||
notifications_enabled: Optional[bool] = None
|
||||
character_id: Optional[str] = None
|
||||
|
||||
|
||||
def _display_task_name(t: ScheduledTask) -> str:
|
||||
@@ -203,6 +205,7 @@ def _task_to_dict(t: ScheduledTask, include_last_run_result: bool = False) -> di
|
||||
"output_target": t.output_target,
|
||||
"session_id": t.session_id,
|
||||
"crew_member_id": getattr(t, "crew_member_id", None),
|
||||
"character_id": getattr(t, "character_id", None),
|
||||
"model": t.model,
|
||||
"endpoint_url": t.endpoint_url,
|
||||
"run_count": t.run_count or 0,
|
||||
@@ -552,6 +555,7 @@ def setup_task_routes(task_scheduler) -> APIRouter:
|
||||
then_task_id=then_task_id,
|
||||
webhook_token=webhook_token,
|
||||
notifications_enabled=notifications_enabled,
|
||||
character_id=(req.character_id or None),
|
||||
)
|
||||
db.add(task)
|
||||
db.commit()
|
||||
@@ -705,6 +709,9 @@ def setup_task_routes(task_scheduler) -> APIRouter:
|
||||
task.then_task_id = _validate_then_task_id(db, req.then_task_id, user, current_task_id=task.id)
|
||||
if req.notifications_enabled is not None:
|
||||
task.notifications_enabled = bool(req.notifications_enabled)
|
||||
if req.character_id is not None:
|
||||
# Empty string clears the persona; non-empty stores the id.
|
||||
task.character_id = req.character_id or None
|
||||
if req.cron_expression is not None:
|
||||
if req.cron_expression:
|
||||
try:
|
||||
|
||||
@@ -198,6 +198,8 @@ def setup_webhook_routes(
|
||||
"opencode-go": "https://opencode.ai/zen/go/v1",
|
||||
"fireworks": "https://api.fireworks.ai/inference/v1",
|
||||
"venice": "https://api.venice.ai/api/v1",
|
||||
"kimi-code": "https://api.kimi.com/coding/v1",
|
||||
"kimicode": "https://api.kimi.com/coding/v1",
|
||||
}
|
||||
|
||||
# Model prefix → provider mapping for auto-detection
|
||||
@@ -210,6 +212,8 @@ def setup_webhook_routes(
|
||||
"mistral": "mistral",
|
||||
"llama": "groq",
|
||||
"mixtral": "groq",
|
||||
"kimi-for-coding": "kimi-code",
|
||||
"kimi": "kimi-code",
|
||||
}
|
||||
|
||||
def _resolve_base_url(model: Optional[str], provider: Optional[str]) -> Optional[str]:
|
||||
|
||||
@@ -0,0 +1,635 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Build a neutral agent migration manifest.
|
||||
|
||||
This helper is intentionally read-only. It does not import the Odysseus
|
||||
application package, write to data/, call an LLM, or apply anything. It turns
|
||||
common agent export shapes into a portable JSON manifest that Odysseus can
|
||||
preview or import later.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import hashlib
|
||||
import json
|
||||
import mimetypes
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Iterable
|
||||
|
||||
|
||||
SCHEMA_VERSION = "agent-migration.v1"
|
||||
TEXT_EXTENSIONS = {
|
||||
".cfg",
|
||||
".conf",
|
||||
".csv",
|
||||
".json",
|
||||
".log",
|
||||
".md",
|
||||
".markdown",
|
||||
".py",
|
||||
".rst",
|
||||
".toml",
|
||||
".txt",
|
||||
".yaml",
|
||||
".yml",
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class InputWarning:
|
||||
path: str
|
||||
message: str
|
||||
|
||||
|
||||
def utc_now_iso() -> str:
|
||||
return datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")
|
||||
|
||||
|
||||
def sha256_text(text: str) -> str:
|
||||
return hashlib.sha256(text.encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
def sha256_bytes(data: bytes) -> str:
|
||||
return hashlib.sha256(data).hexdigest()
|
||||
|
||||
|
||||
def sha256_path(path: Path) -> str:
|
||||
h = hashlib.sha256()
|
||||
with path.open("rb") as f:
|
||||
for chunk in iter(lambda: f.read(65536), b""):
|
||||
h.update(chunk)
|
||||
return h.hexdigest()
|
||||
|
||||
|
||||
def stable_id(kind: str, source_name: str, *parts: Any) -> str:
|
||||
raw = "\x1f".join([kind, source_name, *[str(part) for part in parts]])
|
||||
return f"{kind}:{hashlib.sha256(raw.encode('utf-8')).hexdigest()[:16]}"
|
||||
|
||||
|
||||
def read_json(path: Path) -> Any:
|
||||
with path.open("r", encoding="utf-8") as handle:
|
||||
return json.load(handle)
|
||||
|
||||
|
||||
def normalize_category(value: Any) -> str:
|
||||
category = str(value or "fact").strip().lower()
|
||||
return category or "fact"
|
||||
|
||||
|
||||
def normalize_memory_text(item: Any) -> str:
|
||||
if isinstance(item, str):
|
||||
return item.strip()
|
||||
if isinstance(item, dict):
|
||||
for key in ("text", "content", "memory", "value"):
|
||||
value = item.get(key)
|
||||
if isinstance(value, str) and value.strip():
|
||||
return value.strip()
|
||||
return ""
|
||||
|
||||
|
||||
def memory_metadata(item: Any, source_path: Path, index: int) -> dict[str, Any]:
|
||||
metadata: dict[str, Any] = {
|
||||
"source_path": str(source_path),
|
||||
"source_index": index,
|
||||
}
|
||||
if isinstance(item, dict):
|
||||
for key in ("id", "timestamp", "created_at", "updated_at", "source", "tags", "pinned"):
|
||||
if key in item:
|
||||
metadata[f"source_{key}"] = item.get(key)
|
||||
return metadata
|
||||
|
||||
|
||||
def payload_items(payload: Any, keys: tuple[str, ...]) -> Any:
|
||||
if isinstance(payload, dict):
|
||||
for key in keys:
|
||||
if isinstance(payload.get(key), list):
|
||||
return payload[key]
|
||||
return payload
|
||||
|
||||
|
||||
def collect_memory_json(path: Path, source_name: str) -> tuple[list[dict[str, Any]], list[InputWarning]]:
|
||||
warnings: list[InputWarning] = []
|
||||
try:
|
||||
payload = read_json(path)
|
||||
except Exception as exc:
|
||||
return [], [InputWarning(str(path), f"could not read JSON: {exc}")]
|
||||
|
||||
payload = payload_items(payload, ("memories", "memory", "items", "data"))
|
||||
|
||||
if not isinstance(payload, list):
|
||||
return [], [InputWarning(str(path), "expected a JSON list or an object containing a memory list")]
|
||||
|
||||
items: list[dict[str, Any]] = []
|
||||
seen: set[str] = set()
|
||||
for index, item in enumerate(payload):
|
||||
text = normalize_memory_text(item)
|
||||
if not text:
|
||||
warnings.append(InputWarning(str(path), f"skipped memory at index {index}: missing text"))
|
||||
continue
|
||||
digest = sha256_text(text.strip().lower())
|
||||
if digest in seen:
|
||||
warnings.append(InputWarning(str(path), f"skipped duplicate memory at index {index}"))
|
||||
continue
|
||||
seen.add(digest)
|
||||
category = normalize_category(item.get("category") if isinstance(item, dict) else "fact")
|
||||
source = str(item.get("source") or source_name) if isinstance(item, dict) else source_name
|
||||
items.append(
|
||||
{
|
||||
"id": stable_id("memory", source_name, path, index, digest),
|
||||
"kind": "memory",
|
||||
"text": text,
|
||||
"category": category,
|
||||
"source": source,
|
||||
"metadata": memory_metadata(item, path, index),
|
||||
}
|
||||
)
|
||||
return items, warnings
|
||||
|
||||
|
||||
def normalize_timestamp(value: Any) -> str | None:
|
||||
if value is None or value == "":
|
||||
return None
|
||||
if isinstance(value, (int, float)):
|
||||
try:
|
||||
return (
|
||||
datetime.fromtimestamp(float(value), timezone.utc)
|
||||
.replace(microsecond=0)
|
||||
.isoformat()
|
||||
.replace("+00:00", "Z")
|
||||
)
|
||||
except (OverflowError, OSError, ValueError):
|
||||
return str(value)
|
||||
return str(value)
|
||||
|
||||
|
||||
def normalize_role(value: Any) -> str:
|
||||
role = str(value or "unknown").strip().lower()
|
||||
if role in {"human", "user"}:
|
||||
return "user"
|
||||
if role in {"assistant", "ai", "bot", "model"}:
|
||||
return "assistant"
|
||||
if role in {"system", "tool"}:
|
||||
return role
|
||||
return role or "unknown"
|
||||
|
||||
|
||||
def content_part_text(part: Any) -> str:
|
||||
if isinstance(part, str):
|
||||
return part
|
||||
if isinstance(part, dict):
|
||||
for key in ("text", "content", "value"):
|
||||
value = part.get(key)
|
||||
if isinstance(value, str):
|
||||
return value
|
||||
if part.get("type") == "text" and isinstance(part.get("text"), str):
|
||||
return part["text"]
|
||||
return ""
|
||||
|
||||
|
||||
def normalize_message_text(message: dict[str, Any]) -> str:
|
||||
content = message.get("content")
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
if isinstance(content, list):
|
||||
return "\n".join(text for text in (content_part_text(part).strip() for part in content) if text)
|
||||
if isinstance(content, dict):
|
||||
parts = content.get("parts")
|
||||
if isinstance(parts, list):
|
||||
return "\n".join(text for text in (content_part_text(part).strip() for part in parts) if text)
|
||||
for key in ("text", "content", "value"):
|
||||
value = content.get(key)
|
||||
if isinstance(value, str):
|
||||
return value
|
||||
for key in ("text", "body", "message"):
|
||||
value = message.get(key)
|
||||
if isinstance(value, str):
|
||||
return value
|
||||
return ""
|
||||
|
||||
|
||||
def normalize_message(message: dict[str, Any]) -> dict[str, Any] | None:
|
||||
author = message.get("author") if isinstance(message.get("author"), dict) else {}
|
||||
role = (
|
||||
message.get("role")
|
||||
or message.get("sender")
|
||||
or message.get("speaker")
|
||||
or author.get("role")
|
||||
or author.get("name")
|
||||
)
|
||||
text = normalize_message_text(message).strip()
|
||||
if not text:
|
||||
return None
|
||||
normalized: dict[str, Any] = {
|
||||
"role": normalize_role(role),
|
||||
"text": text,
|
||||
}
|
||||
timestamp = normalize_timestamp(message.get("created_at") or message.get("create_time") or message.get("timestamp"))
|
||||
if timestamp:
|
||||
normalized["created_at"] = timestamp
|
||||
message_id = message.get("id")
|
||||
if message_id is not None:
|
||||
normalized["source_id"] = str(message_id)
|
||||
return normalized
|
||||
|
||||
|
||||
def chatgpt_mapping_messages(conversation: dict[str, Any]) -> list[dict[str, Any]]:
|
||||
mapping = conversation.get("mapping")
|
||||
if not isinstance(mapping, dict):
|
||||
return []
|
||||
rows: list[tuple[float, int, dict[str, Any]]] = []
|
||||
for index, node in enumerate(mapping.values()):
|
||||
if not isinstance(node, dict) or not isinstance(node.get("message"), dict):
|
||||
continue
|
||||
message = node["message"]
|
||||
sort_value = message.get("create_time")
|
||||
try:
|
||||
sort_key = float(sort_value)
|
||||
except (TypeError, ValueError):
|
||||
sort_key = float(index)
|
||||
normalized = normalize_message(message)
|
||||
if normalized:
|
||||
rows.append((sort_key, index, normalized))
|
||||
return [row[2] for row in sorted(rows, key=lambda row: (row[0], row[1]))]
|
||||
|
||||
|
||||
def conversation_messages(conversation: dict[str, Any]) -> tuple[list[dict[str, Any]], str]:
|
||||
mapped = chatgpt_mapping_messages(conversation)
|
||||
if mapped:
|
||||
return mapped, "chatgpt_mapping"
|
||||
for key in ("messages", "chat_messages", "turns"):
|
||||
raw_messages = conversation.get(key)
|
||||
if isinstance(raw_messages, list):
|
||||
messages = [
|
||||
normalized
|
||||
for raw in raw_messages
|
||||
if isinstance(raw, dict)
|
||||
for normalized in [normalize_message(raw)]
|
||||
if normalized
|
||||
]
|
||||
return messages, key
|
||||
return [], "unknown"
|
||||
|
||||
|
||||
def conversation_title(conversation: dict[str, Any], index: int) -> str:
|
||||
for key in ("title", "name", "summary"):
|
||||
value = conversation.get(key)
|
||||
if isinstance(value, str) and value.strip():
|
||||
return value.strip()
|
||||
return f"Conversation {index + 1}"
|
||||
|
||||
|
||||
def collect_conversation_json(
|
||||
path: Path,
|
||||
source_name: str,
|
||||
*,
|
||||
include_content: bool = False,
|
||||
max_messages: int = 2000,
|
||||
) -> tuple[list[dict[str, Any]], list[InputWarning]]:
|
||||
warnings: list[InputWarning] = []
|
||||
try:
|
||||
payload = read_json(path)
|
||||
except Exception as exc:
|
||||
return [], [InputWarning(str(path), f"could not read JSON: {exc}")]
|
||||
|
||||
payload = payload_items(payload, ("conversations", "conversation", "items", "data"))
|
||||
if isinstance(payload, dict):
|
||||
payload = [payload]
|
||||
if not isinstance(payload, list):
|
||||
return [], [InputWarning(str(path), "expected a JSON list or an object containing a conversation list")]
|
||||
|
||||
items: list[dict[str, Any]] = []
|
||||
for index, conversation in enumerate(payload):
|
||||
if not isinstance(conversation, dict):
|
||||
warnings.append(InputWarning(str(path), f"skipped conversation at index {index}: expected object"))
|
||||
continue
|
||||
messages, format_hint = conversation_messages(conversation)
|
||||
if not messages:
|
||||
warnings.append(InputWarning(str(path), f"skipped conversation at index {index}: no text messages found"))
|
||||
continue
|
||||
title = conversation_title(conversation, index)
|
||||
source_id = conversation.get("id") or conversation.get("uuid") or conversation.get("conversation_id")
|
||||
text_digest = sha256_text("\n".join(f"{msg['role']}:{msg['text']}" for msg in messages))
|
||||
metadata: dict[str, Any] = {
|
||||
"source_path": str(path),
|
||||
"source_index": index,
|
||||
"source_format": format_hint,
|
||||
"message_count": len(messages),
|
||||
"text_sha256": text_digest,
|
||||
"content_included": False,
|
||||
}
|
||||
if source_id is not None:
|
||||
metadata["source_id"] = str(source_id)
|
||||
for key in ("create_time", "created_at", "update_time", "updated_at"):
|
||||
timestamp = normalize_timestamp(conversation.get(key))
|
||||
if timestamp:
|
||||
metadata[f"source_{key}"] = timestamp
|
||||
item: dict[str, Any] = {
|
||||
"id": stable_id("conversation", source_name, path, source_id or index, text_digest),
|
||||
"kind": "conversation_thread",
|
||||
"title": title,
|
||||
"source": source_name,
|
||||
"metadata": metadata,
|
||||
}
|
||||
if include_content:
|
||||
if len(messages) > max_messages:
|
||||
warnings.append(
|
||||
InputWarning(
|
||||
str(path),
|
||||
f"skipped conversation content at index {index}: over {max_messages} messages",
|
||||
)
|
||||
)
|
||||
else:
|
||||
item["messages"] = messages
|
||||
item["metadata"]["content_included"] = True
|
||||
items.append(item)
|
||||
return items, warnings
|
||||
|
||||
|
||||
def parse_skill_frontmatter(text: str) -> dict[str, Any]:
|
||||
if not text.startswith("---"):
|
||||
return {}
|
||||
end = text.find("\n---", 3)
|
||||
if end < 0:
|
||||
return {}
|
||||
frontmatter: dict[str, Any] = {}
|
||||
for line in text[3:end].strip().splitlines():
|
||||
if not line.strip() or line.lstrip().startswith("#") or ":" not in line:
|
||||
continue
|
||||
key, value = line.split(":", 1)
|
||||
key = key.strip()
|
||||
value = value.strip().strip('"').strip("'")
|
||||
if key:
|
||||
frontmatter[key] = value
|
||||
return frontmatter
|
||||
|
||||
|
||||
def collect_skill_dir(path: Path, source_name: str) -> tuple[list[dict[str, Any]], list[InputWarning]]:
|
||||
warnings: list[InputWarning] = []
|
||||
if path.is_symlink():
|
||||
return [], [InputWarning(str(path), "skills path is a symlink; skipped")]
|
||||
if not path.exists():
|
||||
return [], [InputWarning(str(path), "skills directory does not exist")]
|
||||
if not path.is_dir():
|
||||
return [], [InputWarning(str(path), "skills path is not a directory")]
|
||||
|
||||
items: list[dict[str, Any]] = []
|
||||
for skill_path in sorted(path.rglob("SKILL.md")):
|
||||
if skill_path.is_symlink():
|
||||
warnings.append(InputWarning(str(skill_path), "skipped symlinked skill file"))
|
||||
continue
|
||||
try:
|
||||
text = skill_path.read_text(encoding="utf-8")
|
||||
except Exception as exc:
|
||||
warnings.append(InputWarning(str(skill_path), f"could not read skill: {exc}"))
|
||||
continue
|
||||
frontmatter = parse_skill_frontmatter(text)
|
||||
name = str(frontmatter.get("name") or skill_path.parent.name).strip() or skill_path.parent.name
|
||||
items.append(
|
||||
{
|
||||
"id": stable_id("skill", source_name, skill_path, sha256_text(text)),
|
||||
"kind": "skill",
|
||||
"name": name,
|
||||
"category": str(frontmatter.get("category") or "general"),
|
||||
"source": source_name,
|
||||
"format": "SKILL.md",
|
||||
"content": text,
|
||||
"metadata": {
|
||||
"source_path": str(skill_path),
|
||||
"sha256": sha256_text(text),
|
||||
"frontmatter": frontmatter,
|
||||
},
|
||||
}
|
||||
)
|
||||
return items, warnings
|
||||
|
||||
|
||||
def looks_textual(path: Path) -> bool:
|
||||
if path.suffix.lower() in TEXT_EXTENSIONS:
|
||||
return True
|
||||
guessed, _ = mimetypes.guess_type(str(path))
|
||||
return bool(guessed and (guessed.startswith("text/") or guessed in {"application/json"}))
|
||||
|
||||
|
||||
def iter_archive_dir(path: Path) -> Iterable[Path | InputWarning]:
|
||||
try:
|
||||
children = sorted(path.iterdir())
|
||||
except Exception as exc:
|
||||
yield InputWarning(str(path), f"could not scan archive directory: {exc}")
|
||||
return
|
||||
for child in children:
|
||||
if child.is_symlink():
|
||||
yield InputWarning(str(child), "skipped symlinked archive path")
|
||||
continue
|
||||
if child.is_file():
|
||||
yield child
|
||||
elif child.is_dir():
|
||||
yield from iter_archive_dir(child)
|
||||
|
||||
|
||||
def iter_archive_files(paths: Iterable[Path]) -> Iterable[Path | InputWarning]:
|
||||
for path in paths:
|
||||
if path.is_symlink():
|
||||
yield InputWarning(str(path), "skipped symlinked archive path")
|
||||
continue
|
||||
if path.is_file():
|
||||
yield path
|
||||
elif path.is_dir():
|
||||
yield from iter_archive_dir(path)
|
||||
|
||||
|
||||
def collect_archive_paths(
|
||||
paths: list[Path],
|
||||
source_name: str,
|
||||
*,
|
||||
include_content: bool = False,
|
||||
max_bytes: int = 256_000,
|
||||
) -> tuple[list[dict[str, Any]], list[InputWarning]]:
|
||||
warnings: list[InputWarning] = []
|
||||
items: list[dict[str, Any]] = []
|
||||
existing_paths: list[Path] = []
|
||||
for path in paths:
|
||||
if path.is_symlink():
|
||||
warnings.append(InputWarning(str(path), "archive path is a symlink; skipped"))
|
||||
continue
|
||||
if not path.exists():
|
||||
warnings.append(InputWarning(str(path), "archive path does not exist"))
|
||||
continue
|
||||
if not path.is_file() and not path.is_dir():
|
||||
warnings.append(InputWarning(str(path), "archive path is not a file or directory"))
|
||||
continue
|
||||
existing_paths.append(path)
|
||||
|
||||
for entry in iter_archive_files(existing_paths):
|
||||
if isinstance(entry, InputWarning):
|
||||
warnings.append(entry)
|
||||
continue
|
||||
path = entry
|
||||
if not looks_textual(path):
|
||||
warnings.append(InputWarning(str(path), "skipped non-text archive file"))
|
||||
continue
|
||||
try:
|
||||
st = path.stat()
|
||||
except Exception as exc:
|
||||
warnings.append(InputWarning(str(path), f"could not stat archive file: {exc}"))
|
||||
continue
|
||||
size = st.st_size
|
||||
try:
|
||||
file_hash = sha256_path(path)
|
||||
except Exception as exc:
|
||||
warnings.append(InputWarning(str(path), f"could not hash archive file: {exc}"))
|
||||
continue
|
||||
if include_content and size > max_bytes:
|
||||
warnings.append(InputWarning(str(path), f"skipped archive content over {max_bytes} bytes"))
|
||||
archive_item: dict[str, Any] = {
|
||||
"id": stable_id("archive", source_name, path, file_hash),
|
||||
"kind": "archive_document",
|
||||
"title": path.name,
|
||||
"source": source_name,
|
||||
"metadata": {
|
||||
"source_path": str(path),
|
||||
"size_bytes": size,
|
||||
"sha256": file_hash,
|
||||
},
|
||||
}
|
||||
if include_content and size <= max_bytes:
|
||||
try:
|
||||
archive_item["content"] = path.read_text(encoding="utf-8")
|
||||
except UnicodeDecodeError:
|
||||
archive_item["content"] = path.read_text(encoding="utf-8", errors="replace")
|
||||
archive_item["metadata"]["decoded_with_replacement"] = True
|
||||
items.append(archive_item)
|
||||
return items, warnings
|
||||
|
||||
|
||||
def build_manifest(args) -> dict[str, Any]:
|
||||
warnings: list[InputWarning] = []
|
||||
items: list[dict[str, Any]] = []
|
||||
|
||||
for path in args.memory_json:
|
||||
collected, got_warnings = collect_memory_json(path, args.source_name)
|
||||
items.extend(collected)
|
||||
warnings.extend(got_warnings)
|
||||
|
||||
for path in args.skills_dir:
|
||||
collected, got_warnings = collect_skill_dir(path, args.source_name)
|
||||
items.extend(collected)
|
||||
warnings.extend(got_warnings)
|
||||
|
||||
for path in args.conversation_json:
|
||||
collected, got_warnings = collect_conversation_json(
|
||||
path,
|
||||
args.source_name,
|
||||
include_content=args.include_conversation_content,
|
||||
max_messages=args.max_conversation_messages,
|
||||
)
|
||||
items.extend(collected)
|
||||
warnings.extend(got_warnings)
|
||||
|
||||
if args.archive:
|
||||
collected, got_warnings = collect_archive_paths(
|
||||
args.archive,
|
||||
args.source_name,
|
||||
include_content=args.include_archive_content,
|
||||
max_bytes=args.max_archive_bytes,
|
||||
)
|
||||
items.extend(collected)
|
||||
warnings.extend(got_warnings)
|
||||
|
||||
counts: dict[str, int] = {}
|
||||
for item in items:
|
||||
counts[item["kind"]] = counts.get(item["kind"], 0) + 1
|
||||
|
||||
return {
|
||||
"schema_version": SCHEMA_VERSION,
|
||||
"generated_at": utc_now_iso(),
|
||||
"source": {
|
||||
"name": args.source_name,
|
||||
"kind": args.source_kind,
|
||||
},
|
||||
"summary": {
|
||||
"item_count": len(items),
|
||||
"counts_by_kind": counts,
|
||||
"warning_count": len(warnings),
|
||||
},
|
||||
"items": items,
|
||||
"warnings": [{"path": warning.path, "message": warning.message} for warning in warnings],
|
||||
}
|
||||
|
||||
|
||||
def parse_args(argv: list[str] | None = None):
|
||||
parser = argparse.ArgumentParser(description="Build a neutral Odysseus agent migration manifest.")
|
||||
parser.add_argument("--source-name", default="agent-export", help="Human-readable source name.")
|
||||
parser.add_argument("--source-kind", default="generic", help="Source adapter kind, e.g. generic, openclaw, hermes.")
|
||||
parser.add_argument(
|
||||
"--memory-json",
|
||||
action="append",
|
||||
type=Path,
|
||||
default=[],
|
||||
help="JSON memory export. May be a list, or an object containing memories/items/data.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--skills-dir",
|
||||
action="append",
|
||||
type=Path,
|
||||
default=[],
|
||||
help="Directory containing SKILL.md files. Scanned recursively.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--archive",
|
||||
action="append",
|
||||
type=Path,
|
||||
default=[],
|
||||
help="Text/Markdown/JSON file or directory to preserve as archive documents.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--conversation-json",
|
||||
action="append",
|
||||
type=Path,
|
||||
default=[],
|
||||
help="Conversation export JSON. Supports generic message lists and ChatGPT-style conversations.json.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--include-archive-content",
|
||||
action="store_true",
|
||||
help="Embed archive document content in the manifest. By default only metadata is included.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-archive-bytes",
|
||||
type=int,
|
||||
default=256_000,
|
||||
help="Maximum bytes to embed per archive file when --include-archive-content is used.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--include-conversation-content",
|
||||
action="store_true",
|
||||
help="Embed normalized conversation messages. By default only thread metadata is included.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-conversation-messages",
|
||||
type=int,
|
||||
default=2000,
|
||||
help="Maximum messages to embed per conversation when --include-conversation-content is used.",
|
||||
)
|
||||
parser.add_argument("--output", type=Path, help="Write manifest JSON to this path instead of stdout.")
|
||||
parser.add_argument("--compact", action="store_true", help="Write compact JSON without indentation.")
|
||||
return parser.parse_args(argv)
|
||||
|
||||
|
||||
def main(argv: list[str] | None = None) -> int:
|
||||
args = parse_args(argv)
|
||||
manifest = build_manifest(args)
|
||||
text = json.dumps(manifest, ensure_ascii=False, sort_keys=True, separators=(",", ":")) if args.compact else (
|
||||
json.dumps(manifest, ensure_ascii=False, indent=2, sort_keys=True) + "\n"
|
||||
)
|
||||
if args.output:
|
||||
args.output.parent.mkdir(parents=True, exist_ok=True)
|
||||
args.output.write_text(text, encoding="utf-8")
|
||||
else:
|
||||
sys.stdout.write(text)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -0,0 +1,133 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Backfill release_date on entries in services/hwfit/data/hf_models.json.
|
||||
|
||||
Why: the `newest` sort in the cookbook ranks rows by release_date. Anything
|
||||
missing a date sorts to the bottom. This script pulls `created_at` from the
|
||||
HuggingFace API for each catalog entry without one (or all entries when
|
||||
--refresh is passed) and writes the catalog back.
|
||||
|
||||
Usage:
|
||||
python scripts/backfill_model_release_dates.py # missing only
|
||||
python scripts/backfill_model_release_dates.py --refresh # all entries
|
||||
python scripts/backfill_model_release_dates.py --limit 50 # cap requests
|
||||
python scripts/backfill_model_release_dates.py --dry-run # show, don't write
|
||||
|
||||
Auth: set HF_TOKEN env var (or huggingface-cli login) to access gated repos.
|
||||
"""
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
from huggingface_hub import HfApi
|
||||
from huggingface_hub.utils import HfHubHTTPError
|
||||
except ImportError:
|
||||
print("Install huggingface_hub: pip install huggingface_hub", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
CATALOG_PATH = Path(__file__).resolve().parent.parent / "services" / "hwfit" / "data" / "hf_models.json"
|
||||
|
||||
|
||||
def fetch_release_date(api: HfApi, repo_id: str) -> str | None:
|
||||
"""Return YYYY-MM-DD release date, or None on miss / error."""
|
||||
try:
|
||||
info = api.model_info(repo_id, files_metadata=False)
|
||||
except HfHubHTTPError as e:
|
||||
# 401 = gated/private, 404 = renamed/deleted. Either way, no date.
|
||||
status = getattr(getattr(e, "response", None), "status_code", None)
|
||||
print(f" {repo_id}: HTTP {status or '?'}", file=sys.stderr)
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f" {repo_id}: {type(e).__name__}: {e}", file=sys.stderr)
|
||||
return None
|
||||
created = getattr(info, "created_at", None)
|
||||
if not created:
|
||||
return None
|
||||
return created.strftime("%Y-%m-%d")
|
||||
|
||||
|
||||
def main():
|
||||
p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
|
||||
p.add_argument("--refresh", action="store_true", help="Overwrite existing release_date too (default: only fill missing).")
|
||||
p.add_argument("--limit", type=int, default=0, help="Stop after N API calls (0 = no limit).")
|
||||
p.add_argument("--dry-run", action="store_true", help="Don't write back; just report.")
|
||||
p.add_argument("--sleep", type=float, default=0.05, help="Seconds to sleep between requests (default 0.05).")
|
||||
args = p.parse_args()
|
||||
|
||||
if not CATALOG_PATH.exists():
|
||||
print(f"Catalog not found: {CATALOG_PATH}", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
|
||||
with CATALOG_PATH.open(encoding="utf-8") as f:
|
||||
catalog = json.load(f)
|
||||
|
||||
candidates = []
|
||||
for i, m in enumerate(catalog):
|
||||
name = m.get("name")
|
||||
if not name:
|
||||
continue
|
||||
existing = (m.get("release_date") or "").strip()
|
||||
if existing and not args.refresh:
|
||||
continue
|
||||
candidates.append(i)
|
||||
|
||||
if args.limit:
|
||||
candidates = candidates[: args.limit]
|
||||
|
||||
print(f"Catalog: {CATALOG_PATH}")
|
||||
print(f"Total entries: {len(catalog)}")
|
||||
print(f"Targets ({'refresh all' if args.refresh else 'missing only'}{'' if not args.limit else f', capped at {args.limit}'}): {len(candidates)}")
|
||||
if not candidates:
|
||||
print("Nothing to do.")
|
||||
return
|
||||
|
||||
api = HfApi(token=os.environ.get("HF_TOKEN") or None)
|
||||
updated = 0
|
||||
skipped = 0
|
||||
started = time.time()
|
||||
for n, idx in enumerate(candidates, start=1):
|
||||
entry = catalog[idx]
|
||||
name = entry["name"]
|
||||
old = (entry.get("release_date") or "").strip()
|
||||
new = fetch_release_date(api, name)
|
||||
if new is None:
|
||||
skipped += 1
|
||||
tag = "skip"
|
||||
elif new == old:
|
||||
tag = "unchanged"
|
||||
else:
|
||||
entry["release_date"] = new
|
||||
updated += 1
|
||||
tag = f"set {new}" + (f" (was {old})" if old else "")
|
||||
print(f"[{n}/{len(candidates)}] {name} — {tag}")
|
||||
if args.sleep:
|
||||
time.sleep(args.sleep)
|
||||
|
||||
elapsed = time.time() - started
|
||||
print()
|
||||
print(f"Done in {elapsed:.1f}s — {updated} updated, {skipped} skipped (HF unavailable / gated / missing date).")
|
||||
|
||||
if args.dry_run:
|
||||
print("Dry run — no write.")
|
||||
return
|
||||
|
||||
if updated:
|
||||
# Atomic write: tmp file in the same dir, then rename. Keeps the
|
||||
# catalog usable even if the process dies mid-write.
|
||||
tmp = CATALOG_PATH.with_suffix(".json.tmp")
|
||||
with tmp.open("w", encoding="utf-8") as f:
|
||||
json.dump(catalog, f, indent=1, ensure_ascii=False)
|
||||
f.write("\n")
|
||||
tmp.replace(CATALOG_PATH)
|
||||
print(f"Wrote {CATALOG_PATH}")
|
||||
else:
|
||||
print("No changes to write.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,341 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Import models from the upstream vllm-project/recipes catalog into our
|
||||
local hf_models.json. Two modes:
|
||||
|
||||
--update-existing Stamp min_vllm_version + vllm_recipe=True on rows we
|
||||
already carry. Cheap, no HF API calls.
|
||||
--add-missing Create new catalog rows for every recipe model we
|
||||
don't carry. Hits the HF API for created_at + downloads
|
||||
(~1 req per missing model, paced).
|
||||
|
||||
Both modes write atomically (tmp + rename) so a crashed run leaves the
|
||||
catalog intact. Default with no mode flags runs both, prefer to pass them
|
||||
explicitly.
|
||||
|
||||
Usage:
|
||||
python scripts/import_from_vllm_recipes.py --update-existing
|
||||
python scripts/import_from_vllm_recipes.py --add-missing
|
||||
python scripts/import_from_vllm_recipes.py --dry-run
|
||||
python scripts/import_from_vllm_recipes.py --limit 10
|
||||
|
||||
Auth: set HF_TOKEN to access gated repos when --add-missing.
|
||||
"""
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
import httpx
|
||||
import yaml
|
||||
except ImportError:
|
||||
print("pip install httpx PyYAML", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
from huggingface_hub import HfApi
|
||||
from huggingface_hub.utils import HfHubHTTPError
|
||||
except ImportError:
|
||||
HfApi = None
|
||||
HfHubHTTPError = Exception
|
||||
|
||||
|
||||
CATALOG_PATH = Path(__file__).resolve().parent.parent / "services" / "hwfit" / "data" / "hf_models.json"
|
||||
RECIPES_TREE_URL = (
|
||||
"https://api.github.com/repos/vllm-project/recipes/git/trees/main?recursive=1"
|
||||
)
|
||||
RECIPE_RAW_URL = (
|
||||
"https://raw.githubusercontent.com/vllm-project/recipes/main/models/{repo}.yaml"
|
||||
)
|
||||
|
||||
|
||||
# Map recipe `precision` to the closest catalog `quantization` label that
|
||||
# fit.py / models.py already understand.
|
||||
_PRECISION_TO_QUANT = {
|
||||
"fp8": "FP8",
|
||||
"nvfp4": "NVFP4",
|
||||
"mxfp4": "MXFP4",
|
||||
"bf16": "BF16",
|
||||
"fp16": "F16",
|
||||
"f16": "F16",
|
||||
"fp4": "FP4",
|
||||
"int8": "INT8",
|
||||
"int4": "INT4",
|
||||
"awq-4bit": "AWQ-4bit",
|
||||
"awq-8bit": "AWQ-8bit",
|
||||
}
|
||||
|
||||
# Architecture name → use_case fallback. fit.py weights use_case for filtering;
|
||||
# missing field defaults to a generic bucket.
|
||||
_ARCH_USE_CASE = {
|
||||
"moe": "General-purpose reasoning, long-context",
|
||||
"llama": "General-purpose chat",
|
||||
"qwen2": "General-purpose chat",
|
||||
"qwen3": "General-purpose reasoning",
|
||||
"deepseek_v3_moe": "General-purpose reasoning, long-context",
|
||||
"deepseek_v4_moe": "General-purpose reasoning, long-context",
|
||||
}
|
||||
|
||||
|
||||
def _parse_param_count(s) -> int:
|
||||
"""'230B' / '8.6B' / '4.2T' → integer parameter count."""
|
||||
if s is None:
|
||||
return 0
|
||||
s = str(s).strip().replace(",", "")
|
||||
m = re.match(r"^([\d.]+)\s*([KMBT]?)$", s, re.I)
|
||||
if not m:
|
||||
return 0
|
||||
num = float(m.group(1))
|
||||
unit = (m.group(2) or "").upper()
|
||||
mult = {"K": 1e3, "M": 1e6, "B": 1e9, "T": 1e12, "": 1.0}[unit]
|
||||
return int(num * mult)
|
||||
|
||||
|
||||
def _capabilities_for(arch: str, hardware: dict, ctx_len: int, has_reasoning: bool) -> list[str]:
|
||||
caps = []
|
||||
if "moe" in (arch or "").lower():
|
||||
caps.append("moe")
|
||||
if has_reasoning:
|
||||
caps.append("reasoning")
|
||||
if ctx_len and ctx_len >= 100_000:
|
||||
caps.append("long_context")
|
||||
if any(hw in (hardware or {}) for hw in ("mi300x", "mi325x", "mi350x", "mi355x")):
|
||||
caps.append("amd_supported")
|
||||
return caps
|
||||
|
||||
|
||||
def _fetch_manifest(client: httpx.Client) -> set[str]:
|
||||
r = client.get(RECIPES_TREE_URL, headers={"Accept": "application/vnd.github+json"}, timeout=15)
|
||||
r.raise_for_status()
|
||||
tree = (r.json() or {}).get("tree") or []
|
||||
out: set[str] = set()
|
||||
for e in tree:
|
||||
path = (e or {}).get("path") or ""
|
||||
if path.startswith("models/") and path.endswith(".yaml"):
|
||||
body = path[len("models/"):-len(".yaml")]
|
||||
if "/" in body:
|
||||
out.add(body)
|
||||
return out
|
||||
|
||||
|
||||
def _fetch_recipe(client: httpx.Client, repo: str) -> dict | None:
|
||||
url = RECIPE_RAW_URL.format(repo=repo)
|
||||
try:
|
||||
r = client.get(url, timeout=10)
|
||||
if r.status_code != 200:
|
||||
return None
|
||||
return yaml.safe_load(r.text) or {}
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _stamp_from_recipe(entry: dict, recipe: dict) -> bool:
|
||||
"""Mutate entry with recipe-derived fields. Returns True if anything changed."""
|
||||
model = recipe.get("model") or {}
|
||||
meta = recipe.get("meta") or {}
|
||||
features = recipe.get("features") or {}
|
||||
|
||||
changed = False
|
||||
new_min = (model.get("min_vllm_version") or "").strip()
|
||||
if new_min and entry.get("min_vllm_version") != new_min:
|
||||
entry["min_vllm_version"] = new_min
|
||||
changed = True
|
||||
if not entry.get("vllm_recipe"):
|
||||
entry["vllm_recipe"] = True
|
||||
changed = True
|
||||
# Hardware support map — useful for filtering "which models run on my AMD box".
|
||||
hw = meta.get("hardware") or {}
|
||||
if hw and entry.get("recipe_hardware") != hw:
|
||||
entry["recipe_hardware"] = {k: str(v) for k, v in hw.items()}
|
||||
changed = True
|
||||
# Tool/reasoning parser hints — purely informational at catalog level;
|
||||
# the live launch command builder still reads them from the recipe API.
|
||||
if features.get("reasoning") and not entry.get("has_reasoning_parser"):
|
||||
entry["has_reasoning_parser"] = True
|
||||
changed = True
|
||||
if features.get("tool_calling") and not entry.get("has_tool_call_parser"):
|
||||
entry["has_tool_call_parser"] = True
|
||||
changed = True
|
||||
return changed
|
||||
|
||||
|
||||
def _build_new_entry(repo: str, recipe: dict, hf_info=None) -> dict | None:
|
||||
"""Build a fresh catalog entry from a recipe + (optional) HF model info."""
|
||||
model = recipe.get("model") or {}
|
||||
meta = recipe.get("meta") or {}
|
||||
features = recipe.get("features") or {}
|
||||
variants = recipe.get("variants") or {}
|
||||
|
||||
org, name = repo.split("/", 1)
|
||||
raw_params = _parse_param_count(model.get("parameter_count"))
|
||||
active_raw = _parse_param_count(model.get("active_parameters"))
|
||||
ctx = model.get("context_length") or 0
|
||||
|
||||
# Pick the smallest-VRAM variant as the catalog quant — that's what most
|
||||
# users land on first. NVFP4/MXFP4 typically win this on Blackwell;
|
||||
# FP8 elsewhere; BF16 baseline only.
|
||||
pick_quant = None
|
||||
pick_vram = None
|
||||
for vk, vv in variants.items():
|
||||
if not isinstance(vv, dict):
|
||||
continue
|
||||
prec = (vv.get("precision") or "").lower()
|
||||
vram = vv.get("vram_minimum_gb") or 0
|
||||
quant = _PRECISION_TO_QUANT.get(prec)
|
||||
if quant and (pick_vram is None or (vram and vram < pick_vram)):
|
||||
pick_quant = quant
|
||||
pick_vram = vram or pick_vram
|
||||
if not pick_quant:
|
||||
pick_quant = "BF16"
|
||||
|
||||
arch = (model.get("architecture") or "").lower()
|
||||
use_case = _ARCH_USE_CASE.get(arch, "General-purpose chat")
|
||||
caps = _capabilities_for(arch, meta.get("hardware") or {}, ctx, bool(features.get("reasoning")))
|
||||
|
||||
rel_date = ""
|
||||
downloads = 0
|
||||
likes = 0
|
||||
if hf_info is not None:
|
||||
created = getattr(hf_info, "created_at", None)
|
||||
if created:
|
||||
rel_date = created.strftime("%Y-%m-%d")
|
||||
downloads = int(getattr(hf_info, "downloads", 0) or 0)
|
||||
likes = int(getattr(hf_info, "likes", 0) or 0)
|
||||
if not rel_date:
|
||||
rel_date = str(meta.get("date_updated") or datetime.utcnow().strftime("%Y-%m-%d"))
|
||||
|
||||
entry: dict = {
|
||||
"name": repo,
|
||||
"provider": org,
|
||||
"parameter_count": str(model.get("parameter_count") or "?"),
|
||||
"parameters_raw": raw_params,
|
||||
"is_moe": "moe" in arch,
|
||||
"quantization": pick_quant,
|
||||
"context_length": int(ctx or 0),
|
||||
"use_case": use_case,
|
||||
"capabilities": caps,
|
||||
"pipeline_tag": "text-generation",
|
||||
"architecture": arch or "unknown",
|
||||
"hf_downloads": downloads,
|
||||
"hf_likes": likes,
|
||||
"release_date": rel_date,
|
||||
# Recipe-derived bits.
|
||||
"vllm_recipe": True,
|
||||
"min_vllm_version": (model.get("min_vllm_version") or "").strip() or None,
|
||||
"recipe_hardware": {k: str(v) for k, v in (meta.get("hardware") or {}).items()},
|
||||
"has_reasoning_parser": bool(features.get("reasoning")),
|
||||
"has_tool_call_parser": bool(features.get("tool_calling")),
|
||||
}
|
||||
if active_raw:
|
||||
entry["active_parameters"] = active_raw
|
||||
if pick_vram:
|
||||
# min_vram_gb is what hwfit uses for "does this fit". Recipe states a
|
||||
# minimum for the chosen variant; round up slightly for KV-cache room.
|
||||
entry["min_vram_gb"] = float(pick_vram)
|
||||
entry["min_ram_gb"] = float(round(pick_vram * 0.6, 1))
|
||||
entry["recommended_ram_gb"] = float(round(pick_vram * 1.2, 1))
|
||||
# Drop empty / None fields to keep the JSON tidy.
|
||||
return {k: v for k, v in entry.items() if v not in (None, "", [], {})}
|
||||
|
||||
|
||||
def main():
|
||||
p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
|
||||
p.add_argument("--update-existing", action="store_true", help="Stamp min_vllm_version + vllm_recipe on existing rows.")
|
||||
p.add_argument("--add-missing", action="store_true", help="Add new rows for recipe models not in the catalog.")
|
||||
p.add_argument("--limit", type=int, default=0, help="Stop after N recipe fetches.")
|
||||
p.add_argument("--dry-run", action="store_true", help="Don't write back; just report.")
|
||||
p.add_argument("--sleep", type=float, default=0.05, help="Seconds between HTTP requests.")
|
||||
args = p.parse_args()
|
||||
if not args.update_existing and not args.add_missing:
|
||||
args.update_existing = args.add_missing = True
|
||||
|
||||
with CATALOG_PATH.open(encoding="utf-8") as f:
|
||||
catalog = json.load(f)
|
||||
by_name = {m.get("name"): m for m in catalog if m.get("name")}
|
||||
|
||||
client = httpx.Client(follow_redirects=True)
|
||||
print(f"Catalog: {CATALOG_PATH} ({len(catalog)} entries)")
|
||||
print("Fetching upstream manifest…")
|
||||
try:
|
||||
manifest = _fetch_manifest(client)
|
||||
except Exception as e:
|
||||
print(f"FATAL: manifest fetch failed: {e}", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
print(f"Manifest: {len(manifest)} recipes")
|
||||
|
||||
existing = sorted(by_name.keys() & manifest)
|
||||
missing = sorted(manifest - by_name.keys())
|
||||
print(f"Match catalog ↔ manifest: existing={len(existing)} missing={len(missing)}")
|
||||
|
||||
targets: list[tuple[str, str]] = [] # (repo, action)
|
||||
if args.update_existing:
|
||||
targets.extend((r, "update") for r in existing)
|
||||
if args.add_missing:
|
||||
targets.extend((r, "add") for r in missing)
|
||||
if args.limit:
|
||||
targets = targets[: args.limit]
|
||||
print(f"Targets: {len(targets)}")
|
||||
|
||||
hf_api = HfApi(token=os.environ.get("HF_TOKEN") or None) if HfApi else None
|
||||
updated = added = skipped = 0
|
||||
started = time.time()
|
||||
|
||||
for n, (repo, action) in enumerate(targets, 1):
|
||||
recipe = _fetch_recipe(client, repo)
|
||||
if not recipe:
|
||||
print(f"[{n}/{len(targets)}] {repo:55} skip (no recipe fetched)")
|
||||
skipped += 1
|
||||
time.sleep(args.sleep)
|
||||
continue
|
||||
if action == "update":
|
||||
entry = by_name[repo]
|
||||
if _stamp_from_recipe(entry, recipe):
|
||||
updated += 1
|
||||
print(f"[{n}/{len(targets)}] {repo:55} updated")
|
||||
else:
|
||||
print(f"[{n}/{len(targets)}] {repo:55} unchanged")
|
||||
else: # add
|
||||
hf_info = None
|
||||
if hf_api:
|
||||
try:
|
||||
hf_info = hf_api.model_info(repo, files_metadata=False)
|
||||
except HfHubHTTPError as e:
|
||||
code = getattr(getattr(e, "response", None), "status_code", "?")
|
||||
print(f" HF {code} for {repo} — building from recipe only", file=sys.stderr)
|
||||
except Exception as e:
|
||||
print(f" HF error for {repo}: {e}", file=sys.stderr)
|
||||
new_entry = _build_new_entry(repo, recipe, hf_info)
|
||||
if new_entry:
|
||||
catalog.append(new_entry)
|
||||
by_name[repo] = new_entry
|
||||
added += 1
|
||||
print(f"[{n}/{len(targets)}] {repo:55} added ({new_entry.get('parameter_count','?')}, {new_entry.get('quantization','?')})")
|
||||
else:
|
||||
skipped += 1
|
||||
print(f"[{n}/{len(targets)}] {repo:55} skip (couldn't build entry)")
|
||||
time.sleep(args.sleep)
|
||||
|
||||
elapsed = time.time() - started
|
||||
print()
|
||||
print(f"Done in {elapsed:.1f}s — added={added}, updated={updated}, skipped={skipped}")
|
||||
|
||||
if args.dry_run:
|
||||
print("Dry run — no write.")
|
||||
return
|
||||
if added or updated:
|
||||
tmp = CATALOG_PATH.with_suffix(".json.tmp")
|
||||
with tmp.open("w", encoding="utf-8") as f:
|
||||
json.dump(catalog, f, indent=1, ensure_ascii=False)
|
||||
f.write("\n")
|
||||
tmp.replace(CATALOG_PATH)
|
||||
print(f"Wrote {CATALOG_PATH} ({len(catalog)} entries)")
|
||||
else:
|
||||
print("No changes — catalog untouched.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -19,22 +19,32 @@ GPU_BANDWIDTH = {
|
||||
"6950 xt": 576, "6900 xt": 512, "6800 xt": 512, "6800": 512, "6700 xt": 384, "6600 xt": 256, "6600": 224,
|
||||
"mi300x": 5300, "mi300": 5300, "mi250x": 3277, "mi250": 3277, "mi210": 1638, "mi100": 1229,
|
||||
"9070 xt": 624, "9070": 488, "9060 xt": 322, "9060": 322,
|
||||
# Apple Silicon unified-memory bandwidth (GB/s). Keyed off the chip name
|
||||
# reported by sysctl machdep.cpu.brand_string (e.g. "Apple M4 Max"). Listed
|
||||
# before the bare "m_" keys matters less than length-sorting (done below),
|
||||
# which guarantees "m4 max" is tried before "m4".
|
||||
"m1 ultra": 800, "m1 max": 400, "m1 pro": 200, "m1": 68,
|
||||
"m2 ultra": 800, "m2 max": 400, "m2 pro": 200, "m2": 100,
|
||||
"m3 ultra": 800, "m3 max": 300, "m3 pro": 150, "m3": 100,
|
||||
"m4 max": 546, "m4 pro": 273, "m4": 120,
|
||||
"m5 max": 546, "m5 pro": 273, "m5": 150,
|
||||
}
|
||||
|
||||
# Pre-sort keys by length descending for correct substring matching
|
||||
_BW_KEYS_SORTED = sorted(GPU_BANDWIDTH.keys(), key=len, reverse=True)
|
||||
|
||||
# metal: backstop for Apple Silicon chips not in GPU_BANDWIDTH (e.g. a future
|
||||
# M5) — the named chips above take the accurate bandwidth path instead.
|
||||
# Apple Silicon unified-memory bandwidth (GB/s). For chip families with both
|
||||
# binned and full variants under the same "Apple Mx Max" brand string, prefer
|
||||
# GPU core count when hardware detection provides it; otherwise fall back to the
|
||||
# conservative tier so speed estimates do not over-promise.
|
||||
APPLE_BANDWIDTH_FIXED = {
|
||||
"m1 ultra": 800, "m1 max": 400, "m1 pro": 200, "m1": 68,
|
||||
"m2 ultra": 800, "m2 max": 400, "m2 pro": 200, "m2": 100,
|
||||
"m3 ultra": 800, "m3 pro": 150, "m3": 100,
|
||||
"m4 pro": 273, "m4": 120,
|
||||
"m5 pro": 307, "m5": 153,
|
||||
}
|
||||
APPLE_BANDWIDTH_BY_CORES = {
|
||||
"m3 max": {30: 300, 40: 400},
|
||||
"m4 max": {32: 410, 40: 546},
|
||||
"m5 max": {32: 460, 40: 614},
|
||||
}
|
||||
_APPLE_FIXED_KEYS_SORTED = sorted(APPLE_BANDWIDTH_FIXED.keys(), key=len, reverse=True)
|
||||
_APPLE_VARIANT_KEYS_SORTED = sorted(APPLE_BANDWIDTH_BY_CORES.keys(), key=len, reverse=True)
|
||||
|
||||
# metal: backstop for Apple Silicon chips not in the explicit tables above
|
||||
# (e.g. a future M6) — use a conservative generic estimate when unknown.
|
||||
FALLBACK_K = {"cuda": 220, "rocm": 180, "metal": 150, "cpu_x86": 70, "cpu_arm": 90}
|
||||
|
||||
USE_CASE_WEIGHTS = {
|
||||
@@ -60,10 +70,56 @@ CONTEXT_TARGET = {
|
||||
}
|
||||
|
||||
|
||||
def _lookup_bandwidth(gpu_name):
|
||||
def _lookup_apple_bandwidth(system):
|
||||
gpu_name = system.get("gpu_name")
|
||||
if not isinstance(gpu_name, str) or not gpu_name:
|
||||
return None
|
||||
gn = gpu_name.lower()
|
||||
|
||||
# Guard against false matches on non-Apple GPUs whose names contain
|
||||
# "m3"/"m4"/"m5" (e.g. NVIDIA Quadro M4 000).
|
||||
if "apple" not in gn:
|
||||
return None
|
||||
|
||||
raw_cores = system.get("gpu_cores")
|
||||
try:
|
||||
gpu_cores = int(raw_cores) if raw_cores is not None else None
|
||||
except (TypeError, ValueError):
|
||||
gpu_cores = None
|
||||
|
||||
for key in _APPLE_VARIANT_KEYS_SORTED:
|
||||
if key not in gn:
|
||||
continue
|
||||
if gpu_cores in APPLE_BANDWIDTH_BY_CORES[key]:
|
||||
return APPLE_BANDWIDTH_BY_CORES[key][gpu_cores]
|
||||
return min(APPLE_BANDWIDTH_BY_CORES[key].values())
|
||||
|
||||
for key in _APPLE_FIXED_KEYS_SORTED:
|
||||
if key in gn:
|
||||
return APPLE_BANDWIDTH_FIXED[key]
|
||||
return None
|
||||
|
||||
|
||||
def _lookup_bandwidth(system):
|
||||
if isinstance(system, dict):
|
||||
gpu_name = system.get("gpu_name")
|
||||
else:
|
||||
gpu_name = system
|
||||
|
||||
if not isinstance(gpu_name, str) or not gpu_name:
|
||||
return None
|
||||
|
||||
# Apple tiers live only in the Apple-specific table now (#2564), so route
|
||||
# BOTH dict and bare-string callers through it. A bare string carries no
|
||||
# gpu_cores, so the helper falls back to the conservative (lowest) tier for
|
||||
# that model -- before #2564 the generic table answered string lookups, and
|
||||
# dropping that made _lookup_bandwidth("Apple M3 Max") return None.
|
||||
apple_input = system if isinstance(system, dict) else {"gpu_name": gpu_name}
|
||||
bw = _lookup_apple_bandwidth(apple_input)
|
||||
if bw is not None:
|
||||
return bw
|
||||
|
||||
gn = gpu_name.lower()
|
||||
for key in _BW_KEYS_SORTED:
|
||||
if key in gn:
|
||||
return GPU_BANDWIDTH[key]
|
||||
@@ -84,7 +140,7 @@ def _estimate_speed(model, quant, run_mode, system, offload_frac=0.0):
|
||||
"""
|
||||
pb = _active_params_b(model)
|
||||
is_moe = model.get("is_moe", False)
|
||||
bw = _lookup_bandwidth(system.get("gpu_name"))
|
||||
bw = _lookup_bandwidth(system)
|
||||
backend = system.get("backend", "cpu_x86")
|
||||
|
||||
if bw and run_mode in ("gpu", "cpu_offload"):
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import json
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
@@ -335,6 +336,37 @@ def _detect_apple_silicon():
|
||||
if total_gb <= 0:
|
||||
return None
|
||||
|
||||
def _parse_apple_gpu_cores(text):
|
||||
if not text:
|
||||
return None
|
||||
try:
|
||||
data = json.loads(text)
|
||||
except (TypeError, ValueError, json.JSONDecodeError):
|
||||
data = None
|
||||
if isinstance(data, dict):
|
||||
for gpu in data.get("SPDisplaysDataType") or []:
|
||||
if not isinstance(gpu, dict):
|
||||
continue
|
||||
model = str(gpu.get("sppci_model") or gpu.get("_name") or "")
|
||||
if "apple" not in model.lower():
|
||||
continue
|
||||
cores = gpu.get("sppci_cores")
|
||||
try:
|
||||
return int(str(cores).strip())
|
||||
except (TypeError, ValueError):
|
||||
continue
|
||||
m = re.search(r"Total Number of Cores:\s*(\d+)", text)
|
||||
if m:
|
||||
try:
|
||||
return int(m.group(1))
|
||||
except ValueError:
|
||||
return None
|
||||
return None
|
||||
|
||||
gpu_cores = _parse_apple_gpu_cores(_run(["system_profiler", "SPDisplaysDataType", "-json"]))
|
||||
if gpu_cores is None:
|
||||
gpu_cores = _parse_apple_gpu_cores(_run(["system_profiler", "SPDisplaysDataType"]))
|
||||
|
||||
# Usable GPU budget. macOS lets Metal use most of unified memory, but the
|
||||
# default working-set limit scales with RAM: small machines have to keep
|
||||
# more back for the OS + app. These fractions track Apple's
|
||||
@@ -357,7 +389,7 @@ def _detect_apple_silicon():
|
||||
pass
|
||||
|
||||
gpu = {"index": 0, "name": brand, "vram_gb": vram_gb}
|
||||
return {
|
||||
info = {
|
||||
"gpu_name": brand,
|
||||
"gpu_vram_gb": vram_gb,
|
||||
"gpu_count": 1,
|
||||
@@ -369,6 +401,9 @@ def _detect_apple_silicon():
|
||||
# separate pool — downstream fit logic uses this to avoid double-budgeting.
|
||||
"unified_memory": True,
|
||||
}
|
||||
if gpu_cores is not None:
|
||||
info["gpu_cores"] = gpu_cores
|
||||
return info
|
||||
|
||||
|
||||
def _read_file(path):
|
||||
@@ -611,6 +646,93 @@ def _cache_key(host: str, ssh_port: str, platform_name: str):
|
||||
)
|
||||
|
||||
|
||||
def _is_containerized():
|
||||
"""Best-effort check for whether the local Odysseus process is running in a container."""
|
||||
if _remote_host:
|
||||
return False
|
||||
|
||||
if os.path.exists("/.dockerenv"):
|
||||
return True
|
||||
|
||||
try:
|
||||
with open("/proc/1/cgroup", encoding="utf-8", errors="replace") as f:
|
||||
text = f.read().lower()
|
||||
return any(marker in text for marker in ("docker", "containerd", "kubepods"))
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _hardware_visibility_warning(result):
|
||||
"""Return a non-blocking UX warning when detected hardware may only be container-visible."""
|
||||
if not isinstance(result, dict):
|
||||
return None
|
||||
|
||||
if result.get("manual_hardware"):
|
||||
return None
|
||||
|
||||
if not result.get("containerized"):
|
||||
return None
|
||||
|
||||
if result.get("gpu_error"):
|
||||
return None
|
||||
|
||||
if not result.get("has_gpu"):
|
||||
return {
|
||||
"code": "container_no_gpu_visible",
|
||||
"severity": "warning",
|
||||
"title": "No GPU visible inside Docker",
|
||||
"message": (
|
||||
"Cookbook is scanning hardware from inside the Odysseus container. "
|
||||
"If your host has a GPU, Docker may not be exposing it to the container, "
|
||||
"so model recommendations may be CPU-only or too conservative."
|
||||
),
|
||||
"actions": [
|
||||
"manual_hardware",
|
||||
"rescan",
|
||||
"copy_diagnostics",
|
||||
],
|
||||
}
|
||||
|
||||
total_ram = result.get("total_ram_gb") or 0
|
||||
if total_ram and total_ram <= 8:
|
||||
return {
|
||||
"code": "container_low_ram_visible",
|
||||
"severity": "info",
|
||||
"title": "Container-visible RAM may be lower than host RAM",
|
||||
"message": (
|
||||
"Cookbook is seeing the RAM available inside the container. "
|
||||
"If your host has more memory, validate host RAM separately or use Manual Hardware."
|
||||
),
|
||||
"actions": [
|
||||
"manual_hardware",
|
||||
"rescan",
|
||||
"copy_diagnostics",
|
||||
],
|
||||
}
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _attach_probe_context(result, host=""):
|
||||
"""Attach probe-scope metadata and optional hardware visibility warning."""
|
||||
if not isinstance(result, dict) or result.get("error"):
|
||||
return result
|
||||
|
||||
is_remote = bool(host)
|
||||
containerized = False if is_remote else _is_containerized()
|
||||
|
||||
result["probe_scope"] = "remote" if is_remote else ("container" if containerized else "native")
|
||||
result["containerized"] = containerized
|
||||
|
||||
warning = _hardware_visibility_warning(result)
|
||||
if warning:
|
||||
result["hardware_visibility_warning"] = warning
|
||||
else:
|
||||
result.pop("hardware_visibility_warning", None)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def detect_system(host="", ssh_port="", platform="", fresh=False):
|
||||
"""Detect system hardware: RAM, CPU, GPU. Cached per host (hardware rarely
|
||||
changes, and probing a remote host over SSH is slow). Pass fresh=True to
|
||||
@@ -635,6 +757,7 @@ def detect_system(host="", ssh_port="", platform="", fresh=False):
|
||||
if _remote_platform == "windows" and _remote_host:
|
||||
result = _detect_windows()
|
||||
if result:
|
||||
result = _attach_probe_context(result, host=host)
|
||||
_remote_host = None
|
||||
_remote_platform = None
|
||||
_cache_by_host[cache_key] = (now, result)
|
||||
@@ -653,6 +776,7 @@ def detect_system(host="", ssh_port="", platform="", fresh=False):
|
||||
if not _remote_host and os.name == "nt":
|
||||
result = _detect_windows()
|
||||
if result:
|
||||
result = _attach_probe_context(result, host=host)
|
||||
_cache_by_host[cache_key] = (now, result)
|
||||
return result
|
||||
# PowerShell probe failed entirely — fall through to the generic path
|
||||
@@ -683,6 +807,7 @@ def detect_system(host="", ssh_port="", platform="", fresh=False):
|
||||
"gpu_name": gpu_info["gpu_name"],
|
||||
"gpu_vram_gb": gpu_info["gpu_vram_gb"],
|
||||
"gpu_count": gpu_info["gpu_count"],
|
||||
"gpu_cores": gpu_info.get("gpu_cores"),
|
||||
"gpus": gpu_info.get("gpus", []),
|
||||
"gpu_groups": gpu_info.get("gpu_groups", []),
|
||||
"homogeneous": gpu_info.get("homogeneous", True),
|
||||
@@ -714,6 +839,7 @@ def detect_system(host="", ssh_port="", platform="", fresh=False):
|
||||
"gpu_error": _last_gpu_error,
|
||||
}
|
||||
|
||||
result = _attach_probe_context(result, host=host)
|
||||
_remote_host = None
|
||||
_remote_platform = None
|
||||
_cache_by_host[cache_key] = (now, result)
|
||||
|
||||
@@ -188,12 +188,18 @@ def compute_serve_profiles(system, model, serve_weights_gb=None, serve_quant=Non
|
||||
# Shrink context if even the chosen KV won't fit alongside weights.
|
||||
# Start from the smaller of the profile's target and the model's limit.
|
||||
cur_ctx = min(ctx, model_ctx_max)
|
||||
while cur_ctx >= 8192:
|
||||
# Floor the context-shrink loop at 8192, but never above the model's own
|
||||
# trained limit. A model with a sub-8192 context (e.g. a 2048-token
|
||||
# SmolLM) starts below 8192, so a hard-coded 8192 guard skipped the loop
|
||||
# entirely and produced NO profile — the serve UI then fell back to
|
||||
# manual flags even though the model fits the GPU trivially.
|
||||
ctx_floor = min(8192, model_ctx_max)
|
||||
while cur_ctx >= ctx_floor:
|
||||
kv = _kv_gb(model, cur_ctx, kv_type)
|
||||
n_cpu_moe, fits = _cpu_moe_for_budget(model, quant, kv, budget, fixed_gb=serve_weights_gb)
|
||||
est = _weights_gb(model, quant, serve_weights_gb) + kv + 0.6
|
||||
# If a non-MoE model can't fit even fully offloaded, try less context.
|
||||
if model.get("is_moe") or fits or cur_ctx <= 8192:
|
||||
if model.get("is_moe") or fits or cur_ctx <= ctx_floor:
|
||||
profiles.append({
|
||||
"key": key,
|
||||
"label": label,
|
||||
|
||||
@@ -66,41 +66,57 @@ def _has_duplicate_title(skills, title: str) -> bool:
|
||||
def _extract_json_object(text: str) -> Optional[dict]:
|
||||
"""Best-effort extraction of a JSON object from an LLM response.
|
||||
|
||||
The response may be wrapped in code fences or surrounded by prose, and some
|
||||
models emit a stray brace in the prose before the real object
|
||||
(e.g. "uses {placeholder} then {...}"). Slicing first-'{' .. last-'}' then
|
||||
grabs an unparseable span and the skill is silently lost. Try the whole
|
||||
string first, then each '{' start position in turn, returning the first
|
||||
candidate that parses to a JSON object (dict). Returns None if none do.
|
||||
The response may be wrapped in code fences or surrounded by prose. Uses
|
||||
json.JSONDecoder().raw_decode() to locate the boundaries of complete JSON
|
||||
objects starting at each '{' position. Nested objects are filtered out to
|
||||
keep only top-level candidates. If multiple non-overlapping valid JSON
|
||||
objects are found, it is treated as ambiguous and returns None. Otherwise,
|
||||
returns the single valid candidate dictionary.
|
||||
"""
|
||||
if not text:
|
||||
return None
|
||||
s = text.strip()
|
||||
if s.startswith("```"):
|
||||
s = s.split("\n", 1)[-1].rsplit("```", 1)[0].strip()
|
||||
end = s.rfind("}")
|
||||
if end == -1:
|
||||
|
||||
decoder = json.JSONDecoder()
|
||||
candidates = []
|
||||
|
||||
start = s.find("{")
|
||||
while start != -1:
|
||||
try:
|
||||
obj, idx = decoder.raw_decode(s[start:])
|
||||
end_pos = start + idx
|
||||
if isinstance(obj, dict):
|
||||
candidates.append((start, end_pos, obj))
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
pass
|
||||
start = s.find("{", start + 1)
|
||||
|
||||
# Filter out nested candidates to identify top-level dictionaries
|
||||
top_level = []
|
||||
for c in candidates:
|
||||
is_nested = False
|
||||
for other in candidates:
|
||||
if other == c:
|
||||
continue
|
||||
if other[0] <= c[0] and c[1] <= other[1]:
|
||||
is_nested = True
|
||||
break
|
||||
if not is_nested:
|
||||
top_level.append(c)
|
||||
|
||||
if not top_level:
|
||||
return None
|
||||
|
||||
def _as_dict(candidate):
|
||||
try:
|
||||
obj = json.loads(candidate)
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
return None
|
||||
return obj if isinstance(obj, dict) else None
|
||||
if len(top_level) > 1:
|
||||
logger.debug(
|
||||
"[skill-extract] Found multiple non-overlapping JSON objects: %s",
|
||||
[item[2].get("title") for item in top_level]
|
||||
)
|
||||
return None
|
||||
|
||||
# The clean, common case: the whole (de-fenced) string is the object.
|
||||
obj = _as_dict(s)
|
||||
if obj is not None:
|
||||
return obj
|
||||
# Otherwise scan each '{' candidate up to the last '}'.
|
||||
start = s.find("{")
|
||||
while 0 <= start < end:
|
||||
obj = _as_dict(s[start : end + 1])
|
||||
if obj is not None:
|
||||
return obj
|
||||
start = s.find("{", start + 1)
|
||||
return None
|
||||
return top_level[0][2]
|
||||
|
||||
|
||||
async def maybe_extract_skill(
|
||||
|
||||
@@ -603,7 +603,6 @@ class SkillsManager:
|
||||
escalation) — those are work-in-progress and pollute the
|
||||
prompt with half-finished procedures.
|
||||
"""
|
||||
active_toolsets = active_toolsets or []
|
||||
out = []
|
||||
for s in self.load(owner=owner):
|
||||
status = s.get("status")
|
||||
@@ -617,13 +616,16 @@ class SkillsManager:
|
||||
# Platform gating
|
||||
if platform and s.get("platforms") and platform not in s["platforms"]:
|
||||
continue
|
||||
# requires_toolsets: hide unless every required toolset is active
|
||||
# requires_toolsets: hide unless every required toolset is active.
|
||||
# active_toolsets=None means the caller doesn't know the active
|
||||
# set (API listings, chat preface) — don't gate in that case;
|
||||
# only an explicit list filters.
|
||||
req = s.get("requires_toolsets") or []
|
||||
if req and not all(t in active_toolsets for t in req):
|
||||
if req and active_toolsets is not None and not all(t in active_toolsets for t in req):
|
||||
continue
|
||||
# fallback_for_toolsets: hide when any of those toolsets is active
|
||||
fb = s.get("fallback_for_toolsets") or []
|
||||
if fb and any(t in active_toolsets for t in fb):
|
||||
if fb and active_toolsets and any(t in active_toolsets for t in fb):
|
||||
continue
|
||||
out.append({
|
||||
"name": s["name"],
|
||||
|
||||
@@ -64,20 +64,40 @@ def is_youtube_url(url: str) -> bool:
|
||||
return "youtube.com" in url or "youtu.be" in url
|
||||
|
||||
|
||||
# youtube.com-shaped hosts. music.youtube.com serves the same /watch and
|
||||
# /shorts paths, so links shared from YouTube Music must resolve too.
|
||||
_YT_HOSTS = ("www.youtube.com", "youtube.com", "m.youtube.com", "music.youtube.com")
|
||||
# Path prefixes whose first following segment is the video id. Covers the
|
||||
# /embed/ player, Shorts (/shorts/), live streams (/live/), and the legacy
|
||||
# /v/ embed — all of which `is_youtube_url` already treats as YouTube, so
|
||||
# they must be extractable or the link is silently dropped (neither web-fetched
|
||||
# nor transcript-fetched) by the chat pipeline.
|
||||
_YT_PATH_PREFIXES = ("/embed/", "/shorts/", "/live/", "/v/")
|
||||
|
||||
|
||||
def extract_youtube_id(url: str) -> Optional[str]:
|
||||
"""Extract YouTube video ID from various URL formats."""
|
||||
"""Extract a YouTube video ID from the common URL shapes:
|
||||
watch?v=, youtu.be/<id>, /embed/<id>, /shorts/<id>, /live/<id>, /v/<id>,
|
||||
across youtube.com / m.youtube.com / music.youtube.com / youtu.be."""
|
||||
if not isinstance(url, str):
|
||||
return None
|
||||
parsed = urllib.parse.urlparse(url)
|
||||
if parsed.hostname in ("www.youtube.com", "youtube.com", "m.youtube.com"):
|
||||
host = (parsed.hostname or "").lower()
|
||||
if host in _YT_HOSTS:
|
||||
if parsed.path == "/watch":
|
||||
params = urllib.parse.parse_qs(parsed.query)
|
||||
if "v" in params:
|
||||
if params.get("v"):
|
||||
return params["v"][0]
|
||||
elif parsed.path.startswith("/embed/"):
|
||||
return parsed.path.split("/")[-1]
|
||||
elif parsed.hostname == "youtu.be":
|
||||
return parsed.path[1:]
|
||||
else:
|
||||
for prefix in _YT_PATH_PREFIXES:
|
||||
if parsed.path.startswith(prefix):
|
||||
vid = parsed.path[len(prefix):].split("/")[0]
|
||||
if vid:
|
||||
return vid
|
||||
elif host == "youtu.be":
|
||||
vid = parsed.path.lstrip("/").split("/")[0]
|
||||
if vid:
|
||||
return vid
|
||||
return None
|
||||
|
||||
|
||||
@@ -170,6 +190,8 @@ def format_transcript_for_context(
|
||||
if segments:
|
||||
ctx += "Timestamped Transcript:\n"
|
||||
for seg in segments:
|
||||
if not isinstance(seg, dict):
|
||||
continue
|
||||
ctx += f"[{seg['timestamp']}] {seg['text']}\n"
|
||||
# Check length — fall back to plain text if too long
|
||||
if len(ctx) > 12000:
|
||||
@@ -202,15 +224,24 @@ async def fetch_youtube_comments(
|
||||
f"https://www.youtube.com/watch?v={video_id}",
|
||||
]
|
||||
|
||||
proc = await asyncio.wait_for(
|
||||
asyncio.create_subprocess_exec(
|
||||
*cmd,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
),
|
||||
timeout=timeout,
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*cmd,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
stdout, stderr = await proc.communicate()
|
||||
# Bound the wait on the process actually finishing, not on spawning it.
|
||||
# create_subprocess_exec returns as soon as the child starts, so wrapping
|
||||
# it in wait_for never enforces the timeout — proc.communicate() is the
|
||||
# blocking step. Kill and reap the child if it overruns so it does not
|
||||
# linger after we return.
|
||||
try:
|
||||
stdout, stderr = await asyncio.wait_for(
|
||||
proc.communicate(), timeout=timeout
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
proc.kill()
|
||||
await proc.wait()
|
||||
raise
|
||||
|
||||
if proc.returncode != 0:
|
||||
return {"success": False, "error": f"yt-dlp failed: {stderr.decode()[:200]}", "comments": []}
|
||||
|
||||
@@ -91,6 +91,9 @@ _ROUTING_PATTERNS: tuple[tuple[str, str, Pattern[str]], ...] = tuple(
|
||||
("ui", "tool or feature toggle request", r"\b(?:disable|enable|turn\s+(?:on|off))\s+(?:the\s+)?(?:shell|search|web|browser|documents?|memory|skills|images?|calendar|email|mail|research|incognito)\b"),
|
||||
|
||||
# Deep research jobs, not quick conceptual mentions of research.
|
||||
("web", "explicit web search request", rf"{_PLEASE}(?:do|run|use|perform|make)\s+(?:a\s+)?(?:web\s+search|search\s+the\s+web)\b.+"),
|
||||
("web", "web lookup imperative request", rf"{_PLEASE}(?:web\s+search|search\s+the\s+web|search\s+online|look\s+up|google)\b.+"),
|
||||
("web", "assistant web lookup request", rf"{_ACTION_QUESTION}(?:web\s+search|search\s+the\s+web|search\s+online|look\s+up|google)\b.+"),
|
||||
("research", "deep research imperative request", rf"{_PLEASE}(?:research|deep\s+dive|look\s+into|investigate)\s+.+"),
|
||||
("research", "assistant deep research request", rf"{_ACTION_QUESTION}(?:research|do\s+research|deep\s+dive|look\s+into|investigate)\s+.+"),
|
||||
|
||||
|
||||
@@ -38,6 +38,167 @@ from src.agent_tools import (
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Redaction patterns for common secret-bearing shapes. Explicit and tested
|
||||
# (see tests/test_loop_guard_signals.py) rather than one clever broad regex —
|
||||
# safety first, but we try not to mangle harmless prose. Applied in order.
|
||||
_REDACTED = "[redacted]"
|
||||
|
||||
# Cookie: ... / Set-Cookie: ... — redact the rest of the line (cookies hold spaces).
|
||||
_SENSITIVE_COOKIE_RE = re.compile(
|
||||
r"(?i)\b((?:set-)?cookie\s*[:=]\s*)[^\r\n]+"
|
||||
)
|
||||
# URL credentials, e.g. postgres://user:pass@host/db. The password half allows
|
||||
# inner colons (postgres://user:pa:ss@host/db) but still stops at / and @.
|
||||
_SENSITIVE_URL_CRED_RE = re.compile(
|
||||
r"(?i)\b([a-z][a-z0-9+.\-]*://)[^\s:/@]+:[^\s/@]+@"
|
||||
)
|
||||
# Prefix-only discovery regexes. Each matches the key and its separator (the part
|
||||
# we KEEP); the value that follows is found by a linear scanner rather than by a
|
||||
# regex, so there is no backtracking-prone quantifier over uncontrolled input.
|
||||
#
|
||||
# Authorization: Bearer <tok> / Authorization: Basic "two word secret"
|
||||
_AUTH_PREFIX_RE = re.compile(
|
||||
r"(?i)authorization\s*[:=]\s*(?:bearer|basic)\s+"
|
||||
)
|
||||
# Provider-prefixed env names, e.g. OPENAI_API_KEY=..., AWS_SECRET_ACCESS_KEY=...,
|
||||
# GITHUB_TOKEN=... — require a sensitive suffix preceded by `_` so benign names
|
||||
# that merely end in KEY (MONKEY, TURKEY) are left alone.
|
||||
_ENV_PREFIX_RE = re.compile(
|
||||
r"(?:export\s+)?\b[A-Z][A-Z0-9_]*"
|
||||
r"_(?:KEY|TOKEN|SECRET|PASSWORD|PASSWD|PWD|CREDENTIALS?)\s*=\s*"
|
||||
)
|
||||
# Generic sensitive key, e.g. password=..., api_key: ..., client_secret=...
|
||||
_KEY_PREFIX_RE = re.compile(
|
||||
r"(?i)\b(?:password|passwd|pwd|token|api[_-]?key|client_secret|secret)\b\s*[:=]\s*"
|
||||
)
|
||||
# Obvious provider-shaped bare tokens (no surrounding key needed).
|
||||
_SENSITIVE_BARE_TOKEN_RE = re.compile(
|
||||
r"\b("
|
||||
r"sk-[A-Za-z0-9_\-]{16,}" # OpenAI / Anthropic style
|
||||
r"|gh[pousr]_[A-Za-z0-9]{20,}" # GitHub PAT
|
||||
r"|xox[baprs]-[A-Za-z0-9\-]{10,}" # Slack
|
||||
r"|AKIA[0-9A-Z]{16}" # AWS access key id
|
||||
r"|hf_[A-Za-z0-9]{16,}" # Hugging Face token
|
||||
r"|AIza[0-9A-Za-z_\-]{20,}" # Google API key
|
||||
r")\b"
|
||||
)
|
||||
|
||||
|
||||
def _consume_secret_value_end(text: str, start: int) -> int:
|
||||
"""Return the exclusive end index of the secret value beginning at ``start``.
|
||||
|
||||
If the value is quoted, scan to the matching unescaped quote (backslash
|
||||
escapes are skipped two chars at a time). Otherwise scan to the first
|
||||
whitespace, comma, or semicolon. The scan is linear in the length of the
|
||||
input, so it cannot exhibit catastrophic backtracking.
|
||||
"""
|
||||
n = len(text)
|
||||
if start >= n:
|
||||
return start
|
||||
quote = text[start]
|
||||
if quote in ("'", '"'):
|
||||
i = start + 1
|
||||
while i < n:
|
||||
ch = text[i]
|
||||
if ch == "\\":
|
||||
i += 2
|
||||
continue
|
||||
if ch == quote:
|
||||
return i + 1
|
||||
i += 1
|
||||
return n # unterminated quote: redact to the end
|
||||
i = start
|
||||
while i < n and not text[i].isspace() and text[i] not in (",", ";"):
|
||||
i += 1
|
||||
return i
|
||||
|
||||
|
||||
def _redact_after_prefix(text: str, prefix_re: "re.Pattern") -> str:
|
||||
"""Redact the value following each ``prefix_re`` match using a linear scan."""
|
||||
result = []
|
||||
pos = 0
|
||||
n = len(text)
|
||||
while pos < n:
|
||||
match = prefix_re.search(text, pos)
|
||||
if match is None:
|
||||
result.append(text[pos:])
|
||||
break
|
||||
result.append(text[pos:match.end()])
|
||||
value_end = _consume_secret_value_end(text, match.end())
|
||||
if value_end > match.end():
|
||||
result.append(_REDACTED)
|
||||
pos = value_end
|
||||
else:
|
||||
# Empty value: nothing to redact; step past the prefix and continue.
|
||||
pos = match.end()
|
||||
if pos < n:
|
||||
result.append(text[pos])
|
||||
pos += 1
|
||||
return "".join(result)
|
||||
|
||||
|
||||
def _redact_private_keys(text: str) -> str:
|
||||
"""Replace PEM private-key blocks with a placeholder via linear scanning.
|
||||
|
||||
Finds ``-----BEGIN `` markers, verifies the header names a PRIVATE KEY,
|
||||
locates the matching ``-----END `` marker, and collapses the whole block.
|
||||
No regex is used, so the (multi-line, uncontrolled) body cannot trigger
|
||||
polynomial matching.
|
||||
"""
|
||||
begin_marker = "-----BEGIN "
|
||||
end_marker = "-----END "
|
||||
dash = "-----"
|
||||
max_header = 64 # generous bound on "[TYPE ]PRIVATE KEY"
|
||||
result = []
|
||||
pos = 0
|
||||
while True:
|
||||
begin = text.find(begin_marker, pos)
|
||||
if begin == -1:
|
||||
result.append(text[pos:])
|
||||
return "".join(result)
|
||||
header_start = begin + len(begin_marker)
|
||||
header_close = text.find(dash, header_start)
|
||||
if (
|
||||
header_close == -1
|
||||
or header_close - header_start > max_header
|
||||
or not text[header_start:header_close].endswith("PRIVATE KEY")
|
||||
):
|
||||
result.append(text[pos:header_start])
|
||||
pos = header_start
|
||||
continue
|
||||
end = text.find(end_marker, header_close)
|
||||
if end == -1:
|
||||
result.append(text[pos:])
|
||||
return "".join(result)
|
||||
end_header_start = end + len(end_marker)
|
||||
end_close = text.find(dash, end_header_start)
|
||||
if (
|
||||
end_close == -1
|
||||
or end_close - end_header_start > max_header
|
||||
or not text[end_header_start:end_close].endswith("PRIVATE KEY")
|
||||
):
|
||||
result.append(text[pos:header_start])
|
||||
pos = header_start
|
||||
continue
|
||||
result.append(text[pos:begin])
|
||||
result.append("[redacted private key]")
|
||||
pos = end_close + len(dash)
|
||||
|
||||
|
||||
def _redact_sensitive_text(value: object) -> str:
|
||||
"""Redact obvious credential values before surfacing tool output."""
|
||||
if value is None:
|
||||
return ""
|
||||
|
||||
text = str(value)
|
||||
text = _redact_private_keys(text)
|
||||
text = _redact_after_prefix(text, _AUTH_PREFIX_RE)
|
||||
text = _SENSITIVE_COOKIE_RE.sub(r"\1" + _REDACTED, text)
|
||||
text = _SENSITIVE_URL_CRED_RE.sub(r"\1" + _REDACTED + "@", text)
|
||||
text = _redact_after_prefix(text, _ENV_PREFIX_RE)
|
||||
text = _redact_after_prefix(text, _KEY_PREFIX_RE)
|
||||
return _SENSITIVE_BARE_TOKEN_RE.sub(_REDACTED, text)
|
||||
|
||||
|
||||
def _load_mcp_disabled_map() -> Dict[str, set]:
|
||||
"""Load per-server disabled tool sets from the database."""
|
||||
@@ -262,6 +423,11 @@ _DOMAIN_RULES = {
|
||||
- Use `manage_settings` for preferences and tool enable/disable.
|
||||
- Use named tools over `app_api` when a named wrapper exists.
|
||||
- `app_api` is only for safe UI/API actions without a named tool; do not use it for shell, package installs, engine rebuilds, or sensitive auth/admin paths.""",
|
||||
"contacts": """\
|
||||
## Contacts rules
|
||||
- Use `resolve_contact` to look up a contact's email or phone number by name. Searches the CardDAV address book and sent email history.
|
||||
- Use `manage_contact` to list, add, update, or delete contacts in the address book.
|
||||
- Do NOT use `manage_memory` for contact lookups — contact details live in the address book, not memory.""",
|
||||
}
|
||||
|
||||
_DOMAIN_TOOL_MAP = {
|
||||
@@ -274,6 +440,7 @@ _DOMAIN_TOOL_MAP = {
|
||||
"sessions": {"create_session", "list_sessions", "manage_session", "send_to_session", "search_chats"},
|
||||
"files": {"bash", "python", "read_file", "write_file", "edit_file", "grep", "glob", "ls", "get_workspace"},
|
||||
"settings": {"manage_settings", "manage_endpoints", "manage_mcp", "manage_webhooks", "manage_tokens", "app_api"},
|
||||
"contacts": {"resolve_contact", "manage_contact"},
|
||||
}
|
||||
|
||||
def _domain_rules_for_tools(tool_names: set) -> list[str]:
|
||||
@@ -402,7 +569,7 @@ Generate an image. Line 1 = description, line 2 = model name, line 3 = WxH (e.g.
|
||||
"ask_teacher": "- ```ask_teacher``` — Escalate a hard question to a more capable model. Line 1 = model name or 'auto', rest = the question. Use when stuck or need expert knowledge.",
|
||||
"list_models": "- ```list_models``` — Show all available AI models across all endpoints. Use when user asks what models are available.",
|
||||
"manage_session": "- ```manage_session``` — Rename, archive, delete, fork, switch, or `list` chats (the UI calls them 'chats'; 'session' is internal). Line 1 = action (list/switch/rename/archive/unarchive/delete/important/unimportant/truncate/fork), Line 2 = exact chat id from `list_sessions` (or `current` where supported). For delete/archive/truncate, always list first and reuse the exact id; never invent placeholder ids. `switch`/`open` returns a clickable anchor link the user can tap to open the chat — use for \"open my X chat\".",
|
||||
"manage_memory": "- ```manage_memory``` — Manage the user's persistent memory (facts, identity, preferences, context that persists across chats). Line 1 = action (list/add/edit/delete/search), rest = content. Use when user says 'remember this', states identity facts like 'my name is <name>' / 'call me <name>' / 'I live in <place>', or asks about stored memories.",
|
||||
"manage_memory": "- ```manage_memory``` — Manage the user's persistent memory (facts about the USER themselves, their preferences, context that persists across chats). Line 1 = action (list/add/edit/delete/search), rest = content. Use when user says 'remember this' about themselves, states identity facts like 'my name is <name>' / 'call me <name>' / 'I live in <place>', or asks about stored memories. DO NOT use for info about another person (their address, phone, email, birthday) — that goes in `manage_contact`. If the user pastes an address/phone with a name and says 'save this for <person>', use `manage_contact add` with the address arg, NOT manage_memory.",
|
||||
"manage_skills": "- ```manage_skills``` — Skill registry (SKILL.md format). Args (JSON): {\"action\": \"list|view|view_ref|search|add|edit|patch|publish|delete\", ...}. `list` returns the index of available skills (published + teacher-escalation drafts); `view name=foo` fetches the full SKILL.md; `view_ref name=foo path=...` loads a reference file under the skill directory. For `add`, provide an explicit kebab-case `name` and only report the exact returned name, because storage may normalize or dedupe it. Use this BEFORE doing domain work — there may already be a procedure (published or draft) that prescribes the correct steps. Drafts written by the teacher loop are authoritative guidance even though they're not yet published.",
|
||||
"manage_tasks": "- ```manage_tasks``` — Create and manage scheduled background tasks (recurring AI jobs). Args (JSON): {\"action\": \"list|create|edit|delete|pause|resume|run\", ...}",
|
||||
"manage_endpoints": "- ```manage_endpoints``` — Add, remove, or configure AI model API endpoints. Args (JSON): {\"action\": \"list|add|delete|enable|disable\", ...}. Use when user wants to add a new AI provider.",
|
||||
@@ -422,7 +589,9 @@ Notes, checklists, AND user reminders. Use this for "create/add/write a note", t
|
||||
```send_email
|
||||
{"to": "recipient@example.com", "subject": "Re: Your question", "body": "Hi, ...", "account": "gmail"}
|
||||
```
|
||||
Send a new email via SMTP. Use `resolve_contact` first if you only have a name. If multiple email accounts exist, call `list_email_accounts` first and pass the chosen `account`.""",
|
||||
Send a new email via SMTP. Use `resolve_contact` first if you only have a name. If multiple email accounts exist, call `list_email_accounts` first and pass the chosen `account`.
|
||||
|
||||
CRITICAL — signatures: DO NOT invent a sign-off name. End the body with just `Thanks,` or similar — never type a person's name unless the user explicitly told you what to sign as. When `agent_email_confirm` is on (default), the tool returns `{pending: true, pending_id: ...}` and stages the email for the user to approve in the chat UI instead of SMTPing immediately.""",
|
||||
"list_emails": """\
|
||||
```list_emails
|
||||
{"folder": "INBOX", "max_results": 20, "unread_only": false, "account": "gmail"}
|
||||
@@ -433,7 +602,9 @@ List recent emails from a folder, newest first, including read messages by defau
|
||||
```reply_to_email
|
||||
{"uid": "1234", "body": "Sounds good — talk Friday.", "account": "gmail"}
|
||||
```
|
||||
SEND a reply email immediately by UID. Do not use this for "open a reply" or "start a reply" — those should use `ui_control` with `open_email_reply <uid> <folder> reply` to open the email draft document. For follow-up requests like "reply ..." after reading/listing email where the user clearly wants to send now, use the exact UID and account from the latest `read_email`/`list_emails` result. Never invent UID `1`. Threads automatically (In-Reply-To/References handled).""",
|
||||
SEND a reply email immediately by UID. Do not use this for "open a reply" or "start a reply" — those should use `ui_control` with `open_email_reply <uid> <folder> reply` to open the email draft document. For follow-up requests like "reply ..." after reading/listing email where the user clearly wants to send now, use the exact UID and account from the latest `read_email`/`list_emails` result. Never invent UID `1`. Threads automatically (In-Reply-To/References handled).
|
||||
|
||||
CRITICAL — signatures: DO NOT invent a sign-off name. End the body with just `Thanks,` or similar — never type a person's name unless the user explicitly told you what to sign as. When `agent_email_confirm` is on (default), the tool returns `{pending: true, pending_id: ...}` and stages the email for the user to approve in the chat UI instead of SMTPing immediately.""",
|
||||
"bulk_email": """\
|
||||
```bulk_email
|
||||
{"action": "delete", "uids": ["10997", "10998"], "folder": "INBOX", "account": "Gmail"}
|
||||
@@ -443,7 +614,7 @@ Bulk delete/archive/mark emails. Use this for "delete all those" after listing e
|
||||
"archive_email": "- ```archive_email``` — Archive one email by UID. Args (JSON): {\"uid\":\"...\", \"folder\":\"INBOX\", \"account\":\"Gmail\"}. For multiple messages use bulk_email.",
|
||||
"mark_email_read": "- ```mark_email_read``` — Mark one email read/unread. Args (JSON): {\"uid\":\"...\", \"read\":true, \"folder\":\"INBOX\", \"account\":\"Gmail\"}. For multiple messages use bulk_email.",
|
||||
"resolve_contact": "- ```resolve_contact``` — Look up a contact's email by name. Searches CardDAV address book + sent email history. Args (JSON): {\"name\": \"...\"}. Use BEFORE send_email when the user gives only a name.",
|
||||
"manage_contact": "- ```manage_contact``` — Create/update/delete/list CardDAV contacts. Args (JSON): {\"action\": \"list|add|update|delete\", \"name\": \"...\", \"email\": \"...\", \"uid\": \"...\"}. Use only for explicit address-book/contact requests with contact details. Do NOT use for user identity facts like 'my name is <name>'; save those with manage_memory. For update/delete, call action=list first to get the uid.",
|
||||
"manage_contact": "- ```manage_contact``` — Create/update/delete/list CardDAV contacts. Args (JSON): {\"action\": \"list|add|update|delete\", \"name\": \"...\", \"email\": \"...\", \"phones\": [...], \"address\": \"...\", \"uid\": \"...\"}. Use for info about another person: email, phone, postal address. For 'save this for <person>' / address paste / phone next to a name, use this — NOT manage_memory. Do NOT use for user identity facts ('my name is X'); those are manage_memory. For update/delete, call action=list first for the uid.",
|
||||
"manage_calendar": """\
|
||||
```manage_calendar
|
||||
{"action": "create_event", "summary": "<event title>", "dtstart": "<natural language or ISO datetime>"}
|
||||
@@ -600,7 +771,7 @@ _API_HOSTS = frozenset([
|
||||
"api.deepseek.com", "deepseek.com",
|
||||
"api.together.xyz", "api.fireworks.ai",
|
||||
"api.perplexity.ai", "api.x.ai",
|
||||
"ollama.com", "api.venice.ai",
|
||||
"ollama.com", "api.venice.ai", "api.kimi.com",
|
||||
"api.githubcopilot.com",
|
||||
# Local OpenAI-compatible endpoints (llama.cpp, vLLM, LM Studio, etc.).
|
||||
# Without these, `_is_api_model` falls back to keyword sniffing on the
|
||||
@@ -787,6 +958,12 @@ def _classify_agent_request(messages: List[Dict], last_user: str) -> Dict[str, o
|
||||
domains.add("documents")
|
||||
if has(r"\b(search|web|google|look up|latest|news|current|weather|forecast|stock price|price of|website|url|https?://|www\.)\b"):
|
||||
domains.add("web")
|
||||
if has(
|
||||
r"\b(wyszukaj|wyszukać|wyszukac)\b.*\b(internet|internecie|online|web)\b",
|
||||
r"\b(sprawd[zź]|znajd[zź])\b.*\b(internet|internecie|online|web)\b",
|
||||
r"\b(aktualn\w*|bieżąc\w*|biezac\w*|dzisiaj|teraz)\b.*\b(pogod\w*|temperatur\w*)\b",
|
||||
):
|
||||
domains.add("web")
|
||||
if has(r"\b(research|deep dive|investigate|look into)\b"):
|
||||
domains.add("web")
|
||||
if has(r"\b(open|show|toggle|turn on|turn off|disable|enable|switch model|change model|settings|theme|panel)\b"):
|
||||
@@ -797,6 +974,8 @@ def _classify_agent_request(messages: List[Dict], last_user: str) -> Dict[str, o
|
||||
domains.add("files")
|
||||
if has(r"\b(endpoint|api token|mcp|webhook|preference|configure|config|setting)\b"):
|
||||
domains.add("settings")
|
||||
if has(r"\b(contact|contacts|phone|phone number|address book|vcard)\b"):
|
||||
domains.add("contacts")
|
||||
|
||||
low_signal = not continuation and not domains
|
||||
return {
|
||||
@@ -845,6 +1024,7 @@ def _build_system_prompt(
|
||||
compact: bool = False,
|
||||
owner: Optional[str] = None,
|
||||
suppress_local_context: bool = False,
|
||||
active_email: Optional[Dict[str, str]] = None,
|
||||
) -> List[Dict]:
|
||||
"""Build agent system prompt, inject MCP/document context, merge consecutive system msgs."""
|
||||
global _cached_base_prompt, _cached_base_prompt_key
|
||||
@@ -1037,6 +1217,66 @@ def _build_system_prompt(
|
||||
else:
|
||||
set_active_document(None)
|
||||
|
||||
# Active email reader — frontend told us the user has an email open.
|
||||
# Inject a context block so "reply", "summarize this", "what does it say"
|
||||
# resolve to the real UID instead of the agent inventing a fresh .md
|
||||
# draft with fake headers. This is the email equivalent of _doc_message.
|
||||
_email_message = None
|
||||
if active_email and active_email.get("uid"):
|
||||
_em_uid = active_email.get("uid", "")
|
||||
_em_folder = active_email.get("folder", "INBOX")
|
||||
_em_account = active_email.get("account", "")
|
||||
_em_subject = active_email.get("subject", "") or "(no subject)"
|
||||
_em_from = active_email.get("from", "") or "(unknown sender)"
|
||||
_em_preview = (active_email.get("body_preview", "") or "").strip()
|
||||
_preview_block = f"\nBody preview:\n```\n{_em_preview[:1800]}\n```" if _em_preview else ""
|
||||
_acct_arg = f" {_em_account}" if _em_account else ""
|
||||
email_ctx = (
|
||||
f"ACTIVE EMAIL OPEN (the user has this email open in a reader window right now)\n"
|
||||
f"UID: {_em_uid}\n"
|
||||
f"Folder: {_em_folder}\n"
|
||||
f"Account: {_em_account or '(default)'}\n"
|
||||
f"From: {_em_from}\n"
|
||||
f"Subject: {_em_subject}{_preview_block}\n\n"
|
||||
f"CRITICAL DEFAULT — every request about email this turn refers to "
|
||||
f"THIS email unless the user names a DIFFERENT specific recipient "
|
||||
f"(a name, an email address, or another thread). Examples that "
|
||||
f"ALL mean reply-to-the-open-email:\n"
|
||||
f" • 'reply' / 'reply to this' / 'respond'\n"
|
||||
f" • 'write email saying X' / 'send email saying X' / 'draft something'\n"
|
||||
f" • 'tell them X' / 'say hi' / 'thanks' / 'ack' / 'lmk'\n"
|
||||
f" • 'summarize it' / 'what does it say' / 'tldr'\n"
|
||||
f" • 'forward this' / 'forward to <addr>'\n"
|
||||
f"DO NOT ASK THE USER 'who do you want to send this to?' — the "
|
||||
f"answer is ALWAYS the sender of the open email (above) unless they "
|
||||
f"named someone else. Asking that is the wrong move every time.\n\n"
|
||||
f"RULES for the open email:\n"
|
||||
f"1. DRAFT a reply (default for any 'write/send/reply/tell them' "
|
||||
f"request without a different recipient): call `ui_control` with "
|
||||
f"`action=\"open_email_reply\"` and `extra=\"{_em_uid} {_em_folder} "
|
||||
f"reply\"`. This opens the proper reply doc with To/Subject/"
|
||||
f"In-Reply-To pre-filled by the backend. The user will see and edit "
|
||||
f"it before sending. DO NOT `create_document` a markdown file with "
|
||||
f"hand-written `To:` / `Subject:` / `In-Reply-To:` headers — that "
|
||||
f"is wrong every time.\n"
|
||||
f"2. SEND a reply immediately (skip the draft): call "
|
||||
f"`reply_to_email` with the UID above. Only do this when the user "
|
||||
f"explicitly says 'send' / 'send the reply' / 'reply and send'.\n"
|
||||
f"3. READ the full body (the preview above may be truncated): "
|
||||
f"call `read_email` with the UID/folder/account above.\n"
|
||||
f"4. SUMMARIZE / answer questions about it: read it first, then "
|
||||
f"answer in chat. Don't create a document for a summary unless "
|
||||
f"the user explicitly asks for one.\n"
|
||||
f"5. Never ask the user to paste the email or 'share it with you' "
|
||||
f"— you already have its identity above and can read the full body.\n"
|
||||
f"6. The ONLY time you ask 'who to send to?' is when the user "
|
||||
f"explicitly says 'send a NEW email to someone else' or names a "
|
||||
f"recipient you can't identify. A bare 'send email saying X' = the "
|
||||
f"open email's sender.\n"
|
||||
)
|
||||
_email_message = untrusted_context_message("active email reader", email_ctx)
|
||||
_email_message["_protected"] = True
|
||||
|
||||
# Inject writing style for any email writing path. This is deliberately
|
||||
# broader than read/list: models may compose via send_email, reply_to_email,
|
||||
# or ui_control open_email_reply after the first tool round.
|
||||
@@ -1244,6 +1484,9 @@ def _build_system_prompt(
|
||||
if _doc_message:
|
||||
merged.insert(last_user_idx, _doc_message)
|
||||
last_user_idx += 1 # the document message is now at last_user_idx
|
||||
if _email_message:
|
||||
merged.insert(last_user_idx, _email_message)
|
||||
last_user_idx += 1
|
||||
if _skills_message:
|
||||
merged.insert(last_user_idx, _skills_message)
|
||||
last_user_idx += 1
|
||||
@@ -1278,12 +1521,18 @@ def _build_base_prompt(
|
||||
from src.tool_index import ALWAYS_AVAILABLE
|
||||
|
||||
disabled = set(disabled_tools or [])
|
||||
if not get_setting("image_gen_enabled", True):
|
||||
if not get_setting("image_gen_enabled", False):
|
||||
disabled.add("generate_image")
|
||||
|
||||
if relevant_tools is not None:
|
||||
# RAG mode: include always-available + retrieved + admin (if needed)
|
||||
tool_names = set(ALWAYS_AVAILABLE) | set(relevant_tools)
|
||||
# RAG mode: trust the relevant_tools set as already-composed.
|
||||
# get_tools_for_query starts from ALWAYS_AVAILABLE and may
|
||||
# *discard* tools that conflict with the query's intent (e.g.
|
||||
# drop manage_memory for clear contact-save patterns). Unioning
|
||||
# ALWAYS_AVAILABLE back in here used to silently undo those
|
||||
# drops. Only force-include the irreducible loop primitives
|
||||
# (ask_user, update_plan) as belt-and-suspenders.
|
||||
tool_names = set(relevant_tools) | {"ask_user", "update_plan"}
|
||||
if needs_admin:
|
||||
tool_names |= _ADMIN_TOOLS
|
||||
agent_prompt = _assemble_prompt(tool_names, disabled, compact=compact)
|
||||
@@ -1724,6 +1973,7 @@ async def stream_agent_loop(
|
||||
max_tool_calls: int = 0,
|
||||
context_length: int = 0,
|
||||
active_document=None,
|
||||
active_email: Optional[Dict[str, str]] = None,
|
||||
session_id: Optional[str] = None,
|
||||
disabled_tools: Optional[Set[str]] = None,
|
||||
owner: Optional[str] = None,
|
||||
@@ -1801,18 +2051,21 @@ async def stream_agent_loop(
|
||||
logger.info(f"[tool-rag] Using caller-provided relevant_tools ({len(_relevant_tools)} tools)")
|
||||
if not guide_only and not _relevant_tools and bool(_intent.get("low_signal")):
|
||||
from src.tool_index import ALWAYS_AVAILABLE
|
||||
_relevant_tools = set(ALWAYS_AVAILABLE)
|
||||
if workspace:
|
||||
# An active workspace IS the file-work signal: a vague "look at the
|
||||
# project" means explore this folder. Surface only the READ-ONLY file
|
||||
# tools (intersection with the plan-mode read-only allowlist) so the
|
||||
# agent can investigate; write/shell tools stay out until the request
|
||||
# actually calls for them (RAG retrieval adds those on a real ask).
|
||||
_relevant_tools = set(ALWAYS_AVAILABLE)
|
||||
from src.tool_security import PLAN_MODE_READONLY_TOOLS
|
||||
_relevant_tools |= (_DOMAIN_TOOL_MAP["files"] & PLAN_MODE_READONLY_TOOLS)
|
||||
logger.info("[tool-rag] Low-signal but workspace active; including read-only file tools")
|
||||
else:
|
||||
logger.info("[tool-rag] Low-signal agent message; skipping retrieval and using always-available tools only")
|
||||
# Don't short-circuit: fall through to RAG retrieval below.
|
||||
# Non-English queries are flagged low_signal by the English-only
|
||||
# intent classifier, but fastembed retrieval works across languages.
|
||||
logger.info("[tool-rag] Low-signal query; will run RAG retrieval")
|
||||
if not guide_only and not _relevant_tools:
|
||||
try:
|
||||
from src.tool_index import get_tool_index, ALWAYS_AVAILABLE
|
||||
@@ -1887,6 +2140,44 @@ async def stream_agent_loop(
|
||||
if _relevant_tools is not None and active_document is not None:
|
||||
_relevant_tools.update({"edit_document", "update_document", "suggest_document"})
|
||||
|
||||
# The skill index injected by _build_system_prompt tells the model to
|
||||
# call `manage_skills action=view`, and Jaccard-matched skills are pasted
|
||||
# into the prompt as procedures to follow — but neither path goes through
|
||||
# tool selection, so the model can be handed a procedure naming tools
|
||||
# (grep, read_file, ...) that aren't in its schema list. Keep the schemas
|
||||
# in lockstep: manage_skills is callable whenever any skill is indexed,
|
||||
# and a matched skill's declared requires_toolsets ride along with it.
|
||||
if not guide_only and _relevant_tools is not None:
|
||||
try:
|
||||
from services.memory.skills import SkillsManager
|
||||
from src.constants import DATA_DIR
|
||||
_skills_on = True
|
||||
try:
|
||||
from routes.prefs_routes import _load_for_user as _load_prefs
|
||||
_skills_on = (_load_prefs(owner) or {}).get("skills_enabled", True)
|
||||
except Exception:
|
||||
pass
|
||||
_sm = SkillsManager(DATA_DIR)
|
||||
_owner_skills = _sm.load(owner=owner) if _skills_on else []
|
||||
if _owner_skills:
|
||||
_relevant_tools.add("manage_skills")
|
||||
if _retrieval_query:
|
||||
# Validate against every known executable tool, not just
|
||||
# TOOL_SECTIONS — code-nav tools (grep/glob/ls) ship as
|
||||
# schemas without a prompt-prose section.
|
||||
from src.tool_policy import known_tool_names
|
||||
_known = known_tool_names()
|
||||
for _sk in _sm.get_relevant_skills(
|
||||
_retrieval_query, skills=_owner_skills,
|
||||
threshold=0.25, max_items=3,
|
||||
):
|
||||
_relevant_tools.update(
|
||||
t for t in (_sk.get("requires_toolsets") or [])
|
||||
if t in _known
|
||||
)
|
||||
except Exception as _e:
|
||||
logger.debug(f"[tool-rag] skill-aware tool include skipped: {_e}")
|
||||
|
||||
if _relevant_tools is not None:
|
||||
logger.info("[agent-intent] selected_tools=%s", sorted(_relevant_tools)[:50])
|
||||
|
||||
@@ -1937,6 +2228,10 @@ async def stream_agent_loop(
|
||||
# and can override this list for users who know their setup.
|
||||
_model_no_tools = any(kw in _model_lc for kw in (
|
||||
"deepseek-r1",
|
||||
# Open-weight GPT-OSS models are commonly served through llama.cpp /
|
||||
# llama-cpp-python. Their names contain "gpt-o", but they do not use
|
||||
# OpenAI's native tool-call channel unless the endpoint opts in.
|
||||
"gpt-oss",
|
||||
))
|
||||
# Native Ollama endpoints (/api/chat) handle tool schemas differently from
|
||||
# the OpenAI-compat path. Models like gemma4, qwen3.5, ministral respond to
|
||||
@@ -1966,6 +2261,7 @@ async def stream_agent_loop(
|
||||
compact=_is_api_model,
|
||||
owner=owner,
|
||||
suppress_local_context=guide_only,
|
||||
active_email=active_email,
|
||||
)
|
||||
if plan_mode and not guide_only:
|
||||
# Steer the model to investigate-then-propose. Hard tool gating handles
|
||||
@@ -1998,30 +2294,34 @@ async def stream_agent_loop(
|
||||
_t3 = time.time()
|
||||
try:
|
||||
from src.context_compactor import trim_for_context
|
||||
from src.context_budget import compute_input_token_budget, DEFAULT_HARD_MAX
|
||||
from src.settings import is_setting_overridden
|
||||
from src.context_budget import compute_input_token_budget, DEFAULT_HARD_MAX, DEFAULT_BUDGET, budget_is_explicit as _budget_is_explicit
|
||||
from src.model_context import budget_context_for_model
|
||||
|
||||
soft_budget = int(get_setting("agent_input_token_budget", 6000) or 0)
|
||||
soft_budget = int(get_setting("agent_input_token_budget", DEFAULT_BUDGET) or 0)
|
||||
if soft_budget > 0:
|
||||
before_trim_tokens = estimate_tokens(messages)
|
||||
reserve_tokens = min(max(max_tokens or 1024, 512), 2048)
|
||||
# Honour the configurable ceiling for the auto-derived budget path.
|
||||
# No-op when the user has an explicit `agent_input_token_budget`
|
||||
# (that branch ignores hard_max). Falls back to DEFAULT_HARD_MAX
|
||||
# on missing/malformed values so misconfig can't zero the budget.
|
||||
# Ceiling for the auto-derived budget (no effect on an explicit budget;
|
||||
# see #1230). Falls back to DEFAULT_HARD_MAX on missing/malformed values
|
||||
# so misconfig can't zero the budget.
|
||||
try:
|
||||
hard_max = int(get_setting("agent_input_token_hard_max", DEFAULT_HARD_MAX) or DEFAULT_HARD_MAX)
|
||||
except (TypeError, ValueError):
|
||||
hard_max = DEFAULT_HARD_MAX
|
||||
if hard_max <= 0:
|
||||
hard_max = DEFAULT_HARD_MAX
|
||||
# Scale the default budget to the model's context window so long-context
|
||||
# models aren't silently capped at 6000; an explicit user setting is
|
||||
# still honoured (clamped to the window). (#1170)
|
||||
# Default value = auto sentinel (scale to the window); any other value =
|
||||
# explicit cap. Value-based, not presence-based, because the save path
|
||||
# materializes defaults so a persisted default must still read as auto (#4121).
|
||||
budget_is_explicit = _budget_is_explicit(soft_budget)
|
||||
# Scale only off a window we actually discovered, bound to the value it
|
||||
# proves (else 0) — not the passed-in context_length, which can be stale
|
||||
# or unset for some callers (#4122 review).
|
||||
ctx_for_budget = budget_context_for_model(endpoint_url, model, fallback=context_length)
|
||||
effective_budget = compute_input_token_budget(
|
||||
soft_budget,
|
||||
context_length,
|
||||
is_setting_overridden("agent_input_token_budget"),
|
||||
ctx_for_budget,
|
||||
budget_is_explicit,
|
||||
hard_max=hard_max,
|
||||
)
|
||||
trimmed_messages = trim_for_context(
|
||||
@@ -2076,6 +2376,7 @@ async def stream_agent_loop(
|
||||
# signatures + consecutive no-text tool rounds to bail early.
|
||||
_recent_call_sigs = collections.deque(maxlen=6)
|
||||
_stuck_rounds = 0
|
||||
_MAX_STUCK_ROUNDS = 4 # consecutive no-progress rounds before loop-breaker bails
|
||||
# Frequency of each exact call signature (tool + args), for the runaway
|
||||
# backstop. Counting identical repeats — not distinct same-tool calls —
|
||||
# lets a legit batch (e.g. 18 calendar events at once) through.
|
||||
@@ -2096,11 +2397,12 @@ async def stream_agent_loop(
|
||||
# tool, so we don't nudge on harmless transitional text like "let me
|
||||
# know what you think".
|
||||
_INTENT_RE = re.compile(
|
||||
r"(?:^|\n)\s*(?:let me|i'?ll|i will|going to|let's)\s+"
|
||||
r"(?:^|\n)\s*(?:let me|i'?ll|i will|i need to|we need to|need to|"
|
||||
r"i should|we should|i must|we must|going to|let's)\s+"
|
||||
r"(?:tail|check|investigate|look at|see|tail|read|fetch|inspect|"
|
||||
r"verify|diagnose|examine|debug|capture|grab|pull|view|run|call|"
|
||||
r"trigger|launch|start|kick off|stop|kill|restart|adopt|serve|"
|
||||
r"register|adopt|list|search|find|query|hit|ping|test)"
|
||||
r"register|adopt|list|search|find|query|hit|ping|test|use|perform|do)"
|
||||
r"\b[^.\n]{0,140}",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
@@ -2141,9 +2443,17 @@ async def stream_agent_loop(
|
||||
elif _is_api_model:
|
||||
# Filter schemas by RAG-selected tools (if available)
|
||||
if _relevant_tools:
|
||||
# _build_base_prompt unions _ADMIN_TOOLS into the prompt
|
||||
# sections when admin intent fires — the schema list must
|
||||
# offer the same names, or the model reads prose describing
|
||||
# tools it cannot call and substitutes the nearest schema
|
||||
# it does have (e.g. manage_memory for manage_skills).
|
||||
_schema_names = set(_relevant_tools)
|
||||
if _needs_admin:
|
||||
_schema_names |= _ADMIN_TOOLS
|
||||
base_schemas = [
|
||||
s for s in FUNCTION_TOOL_SCHEMAS
|
||||
if s.get("function", {}).get("name") in _relevant_tools
|
||||
if s.get("function", {}).get("name") in _schema_names
|
||||
]
|
||||
_mcp_filtered = [
|
||||
s for s in mcp_schemas
|
||||
@@ -2489,17 +2799,22 @@ async def stream_agent_loop(
|
||||
# promise: short response (<400 chars), no fenced code/answer,
|
||||
# and an action-intent phrase was matched. Long answers that
|
||||
# happen to contain "let me know" are not stalls.
|
||||
_looks_like_promise = (
|
||||
_promise_shape = (
|
||||
not guide_only
|
||||
and _intent_match is not None
|
||||
and len(_intent_text) < 400
|
||||
and "```" not in _intent_text
|
||||
and _intent_nudge_count < _MAX_INTENT_NUDGES
|
||||
)
|
||||
_looks_like_promise = _promise_shape and _intent_nudge_count < _MAX_INTENT_NUDGES
|
||||
if _looks_like_promise:
|
||||
_intent_nudge_count += 1
|
||||
_matched_phrase = _intent_match.group(0).strip()
|
||||
logger.info(f"[agent] intent-without-action nudge #{_intent_nudge_count} on round {round_num}: {_matched_phrase!r}")
|
||||
# Don't log the matched phrase — it's raw model text that may
|
||||
# carry credentials. Structural metadata only.
|
||||
logger.info(
|
||||
"[agent] intent-without-action nudge #%d on round %d",
|
||||
_intent_nudge_count, round_num,
|
||||
)
|
||||
messages.append({
|
||||
"role": "system",
|
||||
"content": (
|
||||
@@ -2515,6 +2830,24 @@ async def stream_agent_loop(
|
||||
# Visible signal in the stream so the user knows we caught it.
|
||||
yield f'data: {json.dumps({"type": "agent_step", "round": round_num + 1})}\n\n'
|
||||
continue
|
||||
# The model keeps announcing actions it never takes and we've spent
|
||||
# every nudge — surface why the turn is ending instead of letting it
|
||||
# look like a clean completion.
|
||||
if _promise_shape and _intent_nudge_count >= _MAX_INTENT_NUDGES:
|
||||
_matched_phrase = _intent_match.group(0).strip()
|
||||
_matched_phrase_safe = _redact_sensitive_text(_matched_phrase)
|
||||
_in_message = (
|
||||
f"Intent-nudge cap reached on round {round_num}: the model "
|
||||
f"announced an action ({_matched_phrase_safe!r}) without a tool call "
|
||||
f"after {_intent_nudge_count} nudge(s); ending the turn."
|
||||
)
|
||||
# Do not log the matched phrase, even redacted. It is raw model
|
||||
# text and may contain credentials; keep logs structural only.
|
||||
logger.warning(
|
||||
"[agent] intent-nudge cap exhausted on round %d (%d/%d)",
|
||||
round_num, _intent_nudge_count, _MAX_INTENT_NUDGES,
|
||||
)
|
||||
yield f'data: {json.dumps({"type": "intent_nudge_exhausted", "round": round_num, "nudges": _intent_nudge_count, "max_nudges": _MAX_INTENT_NUDGES, "message": _in_message})}\n\n'
|
||||
break # no tools — done
|
||||
|
||||
# ── Loop-breaker (Terminus-style stall detector) ──────────────
|
||||
@@ -2547,10 +2880,23 @@ async def stream_agent_loop(
|
||||
# Distinct calls to one tool (a real batch) are legitimate work, so we
|
||||
# count identical call signatures, not raw per-tool-type totals.
|
||||
_runaway = _detect_runaway_call(_call_freq)
|
||||
if _stuck_rounds >= 4 or _runaway:
|
||||
if _stuck_rounds >= _MAX_STUCK_ROUNDS or _runaway:
|
||||
reason = (f"calling {_runaway} with identical arguments over and over" if _runaway
|
||||
else "repeating the same tool calls without new progress")
|
||||
logger.warning(f"[agent] loop-breaker tripped on round {round_num} ({reason}); sig={_sig[:80]!r}")
|
||||
_lb_message = (
|
||||
f"Loop-breaker stopped the agent on round {round_num}: {reason}. "
|
||||
"Forced one tool-free round to converge on an answer or state what's blocked."
|
||||
)
|
||||
# Log structural metadata only — `_sig` is raw tool-call content
|
||||
# that may carry credentials.
|
||||
logger.warning(
|
||||
"[agent] loop-breaker tripped on round %d (%s); "
|
||||
"stuck_rounds=%d/%d runaway=%r",
|
||||
round_num, reason, _stuck_rounds, _MAX_STUCK_ROUNDS, _runaway,
|
||||
)
|
||||
# Surface the stop cause to the stream so the user (and journalctl)
|
||||
# can tell a guard fired, not a clean completion.
|
||||
yield f'data: {json.dumps({"type": "loop_breaker_triggered", "round": round_num, "reason": reason, "stuck_rounds": _stuck_rounds, "max_stuck_rounds": _MAX_STUCK_ROUNDS, "runaway": _runaway, "message": _lb_message})}\n\n'
|
||||
# The model has been executing tools, so its results are already
|
||||
# in context. Force ONE tool-free round to converge: write the
|
||||
# answer from what it has, or state plainly what's blocking it.
|
||||
@@ -2629,6 +2975,10 @@ async def stream_agent_loop(
|
||||
cmd_display = block.content.split("\n")[0].strip()[:80]
|
||||
else:
|
||||
cmd_display = block.content.strip()
|
||||
# The display string is streamed (tool_start/tool_output) and persisted;
|
||||
# redact any secrets in it. block.content itself is left untouched so
|
||||
# tool execution still sees the real command.
|
||||
cmd_display = _redact_sensitive_text(cmd_display)
|
||||
|
||||
if tool_policy and tool_policy.blocks(block.tool_type):
|
||||
desc = f"{block.tool_type}: BLOCKED"
|
||||
@@ -2674,11 +3024,58 @@ async def stream_agent_loop(
|
||||
evt = await _progress_q.get()
|
||||
if evt is None:
|
||||
break
|
||||
# Redact secrets in the live tail before streaming — the
|
||||
# final tool_output is redacted, so the progress tail must
|
||||
# be too, or a secret could flash by mid-run. Copy so we
|
||||
# don't mutate the tool's own event payload.
|
||||
_evt = dict(evt)
|
||||
if isinstance(_evt.get("tail"), str):
|
||||
_evt["tail"] = _redact_sensitive_text(_evt["tail"])
|
||||
yield (
|
||||
f'data: {json.dumps({"type": "tool_progress", "tool": block.tool_type, "round": round_num, **evt})}\n\n'
|
||||
f'data: {json.dumps({"type": "tool_progress", "tool": block.tool_type, "round": round_num, **_evt})}\n\n'
|
||||
)
|
||||
desc, result = await _tool_task
|
||||
|
||||
# A skill the model just loaded can prescribe tools that weren't
|
||||
# RAG-selected this turn (declared via requires_toolsets in its
|
||||
# frontmatter). Union them into the selection so the NEXT round's
|
||||
# schema list includes them — otherwise the model reads "use
|
||||
# grep" from the skill it fetched but has no grep schema to call.
|
||||
if (
|
||||
block.tool_type == "manage_skills"
|
||||
and _relevant_tools is not None
|
||||
and not result.get("error")
|
||||
):
|
||||
_ms_args = {}
|
||||
_ms_raw = (block.content or "").strip()
|
||||
if _ms_raw.startswith("{"):
|
||||
try:
|
||||
_ms_args = json.loads(_ms_raw)
|
||||
except json.JSONDecodeError:
|
||||
_ms_args = {}
|
||||
_ms_name = str(_ms_args.get("name", "") or "").strip()
|
||||
if _ms_name and _ms_args.get("action") in ("view", "view_ref"):
|
||||
try:
|
||||
from services.memory.skills import SkillsManager as _SkM
|
||||
from src.constants import DATA_DIR as _DD
|
||||
from src.tool_policy import known_tool_names as _ktn
|
||||
_known = _ktn()
|
||||
for _sk in _SkM(_DD).load(owner=owner):
|
||||
if _sk.get("name") == _ms_name:
|
||||
_new = {
|
||||
t for t in (_sk.get("requires_toolsets") or [])
|
||||
if t in _known and t not in _relevant_tools
|
||||
}
|
||||
if _new:
|
||||
_relevant_tools.update(_new)
|
||||
logger.info(
|
||||
"[tool-rag] skill '%s' unlocked tools for next round: %s",
|
||||
_ms_name, sorted(_new),
|
||||
)
|
||||
break
|
||||
except Exception as _e:
|
||||
logger.debug(f"skill requires_toolsets unlock skipped: {_e}")
|
||||
|
||||
# Extract structured web sources from web_search tool output.
|
||||
# web_search returns {"output": ..., "exit_code": 0}; check "output"
|
||||
# first so the <!-- SOURCES:…--> marker is found and stripped even
|
||||
@@ -2701,7 +3098,7 @@ async def stream_agent_loop(
|
||||
result["results"] = _clean
|
||||
elif "stdout" in result:
|
||||
result["stdout"] = _clean
|
||||
except (json.JSONDecodeError, Exception):
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Emit doc-specific event for document tools — the frontend
|
||||
@@ -2770,35 +3167,47 @@ async def stream_agent_loop(
|
||||
# empty) stdout/stderr; fall back to the error so the "timed
|
||||
# out" reason reaches the UI instead of a blank result.
|
||||
raw = result["stdout"] or result["stderr"] or result.get("error", "")
|
||||
output_text = _truncate(raw)
|
||||
output_text = _truncate(_redact_sensitive_text(raw))
|
||||
elif "output" in result:
|
||||
# bash / python canonical result: {"output": ..., "exit_code": ...}
|
||||
raw = result["output"] or ""
|
||||
output_text = _truncate(raw)
|
||||
output_text = _truncate(_redact_sensitive_text(raw))
|
||||
elif "response" in result:
|
||||
# AI interaction tools (chat_with_model, send_to_session)
|
||||
label = result.get("model", result.get("session_name", "AI"))
|
||||
output_text = _truncate(f"{label}: {result['response']}")
|
||||
output_text = _truncate(_redact_sensitive_text(f"{label}: {result['response']}"))
|
||||
elif "content" in result:
|
||||
output_text = _truncate(result["content"])
|
||||
output_text = _truncate(_redact_sensitive_text(result["content"]))
|
||||
elif "results" in result:
|
||||
output_text = _truncate(result["results"])
|
||||
output_text = _truncate(_redact_sensitive_text(result["results"]))
|
||||
elif "session_id" in result and "name" in result:
|
||||
output_text = f"Session created: {result['name']} (id: {result['session_id']})"
|
||||
elif "success" in result:
|
||||
output_text = (
|
||||
f"Written: {result.get('path', '')}"
|
||||
if result["success"]
|
||||
else f"Error: {result.get('error', '')}"
|
||||
else f"Error: {_redact_sensitive_text(result.get('error', ''))}"
|
||||
)
|
||||
elif "error" in result:
|
||||
output_text = _truncate(result["error"])
|
||||
output_text = _truncate(_redact_sensitive_text(result["error"]))
|
||||
|
||||
# Emit tool_output (include ui_event data if present)
|
||||
tool_output_data = {"type": "tool_output", "tool": block.tool_type, "command": cmd_display, "output": output_text, "exit_code": result.get("exit_code")}
|
||||
if "ui_event" in result:
|
||||
tool_output_data["ui_event"] = result["ui_event"]
|
||||
for k in ("toggle_name", "state", "mode", "model", "endpoint_url", "theme_name", "colors"):
|
||||
for k in (
|
||||
"toggle_name", "state", "mode", "model", "endpoint_url",
|
||||
"theme_name", "colors",
|
||||
# ui_control open_email_reply payload — without these the
|
||||
# frontend openReplyDraft bails on undefined uid and the
|
||||
# reply window silently never opens.
|
||||
"uid", "folder", "account_id",
|
||||
# Optional pre-filled body for open_email_reply so the
|
||||
# agent can compose-and-open in one tool call.
|
||||
"body",
|
||||
# ui_control open_panel payload
|
||||
"panel",
|
||||
):
|
||||
if k in result:
|
||||
tool_output_data[k] = result[k]
|
||||
# Forward image data from generate_image tool
|
||||
|
||||
@@ -972,16 +972,15 @@ async def do_manage_memory(content: str, session_id: Optional[str] = None, owner
|
||||
memories = [m for m in memories if m.get("category", "").lower() == category_filter]
|
||||
if not memories:
|
||||
return {"results": "No memories found" + (f" in category '{category_filter}'" if category_filter else "") + "."}
|
||||
|
||||
result_lines = [f"Found {len(memories)} memory entries:\n"]
|
||||
for m in memories[:100]:
|
||||
for m in memories:
|
||||
cat = m.get("category", "fact")
|
||||
mid = m.get("id", "?")[:8]
|
||||
text = m.get("text", "")
|
||||
if len(text) > 150:
|
||||
text = text[:150] + "..."
|
||||
result_lines.append(f"- [{cat}] `{mid}` — {text}")
|
||||
if len(memories) > 100:
|
||||
result_lines.append(f"... and {len(memories) - 100} more")
|
||||
return {"results": "\n".join(result_lines)}
|
||||
|
||||
elif action == "add":
|
||||
@@ -1293,7 +1292,7 @@ async def do_ui_control(content: str, session_id: Optional[str] = None, owner: O
|
||||
set_theme <preset> — Apply a built-in theme preset (dark, light, midnight, paper, cyberpunk, retrowave, forest, ocean, ume, copper, terminal, organs, lavender, gpt, claude, cute)
|
||||
create_theme <name> <bg> <fg> <panel> <border> <accent> [key=val ...] — Create custom theme. Optional key=val: advanced color overrides AND background effects: bgPattern=<none|dots|synapse|rain|constellations|perlin-flow|petals|sparkles|embers>, bgEffectColor=#RRGGBB, bgEffectIntensity=<num>, bgEffectSize=<num>, frosted=true|false
|
||||
open_panel <name> — Open a panel (documents, gallery, email, sessions, notes, memories, skills, settings, cookbook)
|
||||
open_email_reply <uid> [folder] [reply|reply-all|ai-reply] — Open a reply draft document for an email; does not send
|
||||
open_email_reply <uid> [folder] [reply|reply-all|ai-reply] [body text] — Open a reply draft document for an email; does not send. ALWAYS append the body text when the user told you what to say (one-shot draft); only omit body when the user just asked to "open a reply" without content.
|
||||
get_toggles — Return current toggle states (server-side knowledge)
|
||||
"""
|
||||
lines = content.strip().split("\n")
|
||||
@@ -1537,21 +1536,54 @@ async def do_ui_control(content: str, session_id: Optional[str] = None, owner: O
|
||||
}
|
||||
|
||||
elif action == "open_email_reply":
|
||||
reply_parts = lines[0].strip().split()
|
||||
uid = reply_parts[1].strip() if len(reply_parts) > 1 else ""
|
||||
folder = reply_parts[2].strip() if len(reply_parts) > 2 else "INBOX"
|
||||
mode = reply_parts[3].strip().lower() if len(reply_parts) > 3 else "reply"
|
||||
# Two forms supported:
|
||||
# open_email_reply <uid> [folder] [reply|reply-all|ai-reply]
|
||||
# open_email_reply <uid> [folder] [reply|reply-all|ai-reply]
|
||||
# <body text on subsequent lines or after the mode token>
|
||||
# The body text (if any) gets pre-filled into the reply draft so the
|
||||
# agent can compose-and-open in one tool call instead of opening an
|
||||
# empty draft and leaving the user to wonder what happened.
|
||||
first_line = lines[0].strip()
|
||||
parts = first_line.split(maxsplit=4)
|
||||
uid = parts[1].strip() if len(parts) > 1 else ""
|
||||
folder = parts[2].strip() if len(parts) > 2 else "INBOX"
|
||||
mode = parts[3].strip().lower() if len(parts) > 3 else "reply"
|
||||
# Body: everything on the first line after the mode token, plus any
|
||||
# subsequent lines. Allows multi-line bodies.
|
||||
inline_body = parts[4] if len(parts) > 4 else ""
|
||||
rest_lines = "\n".join(lines[1:]).strip() if len(lines) > 1 else ""
|
||||
body = (inline_body + ("\n" + rest_lines if rest_lines else "")).strip()
|
||||
if not uid:
|
||||
return {"error": "open_email_reply needs: open_email_reply <uid> [folder] [reply|reply-all|ai-reply]"}
|
||||
return {"error": "open_email_reply needs: open_email_reply <uid> [folder] [reply|reply-all|ai-reply] [body text]"}
|
||||
if mode not in ("reply", "reply-all", "ai-reply"):
|
||||
mode = "reply"
|
||||
return {
|
||||
# Body is REQUIRED for the agent path. Opening an empty draft is what
|
||||
# users do by clicking the Reply button — they don't ask the agent
|
||||
# for that. Every agent invocation of open_email_reply MUST include
|
||||
# the body. Reject empty so the agent retries with the content the
|
||||
# user asked for. Exception: ai-reply mode triggers the existing
|
||||
# AI-Reply path on the frontend which generates its own body.
|
||||
if not body and mode != "ai-reply":
|
||||
return {
|
||||
"error": (
|
||||
"open_email_reply called without body. The agent path REQUIRES a body — "
|
||||
"opening an empty draft is the wrong response when the user asked you to write. "
|
||||
"Re-call with the reply text included: "
|
||||
f"`open_email_reply {uid} {folder or 'INBOX'} {mode} <your reply text here>`. "
|
||||
"Compose the reply now based on the open email's content and the user's request, "
|
||||
"then call this tool again with the body. Do NOT call create_document instead."
|
||||
),
|
||||
}
|
||||
result = {
|
||||
"ui_event": "open_email_reply",
|
||||
"uid": uid,
|
||||
"folder": folder or "INBOX",
|
||||
"mode": mode,
|
||||
"results": f"Opening reply draft for email UID {uid}",
|
||||
"results": f"Opening reply draft for email UID {uid}" + (" with pre-filled body" if body else ""),
|
||||
}
|
||||
if body:
|
||||
result["body"] = body
|
||||
return result
|
||||
|
||||
elif action == "get_toggles":
|
||||
return {
|
||||
@@ -1581,7 +1613,9 @@ async def do_generate_image(content: str, session_id: Optional[str] = None, owne
|
||||
"""
|
||||
import base64
|
||||
import httpx
|
||||
import os
|
||||
from pathlib import Path
|
||||
from src.url_safety import check_outbound_url
|
||||
|
||||
lines = content.strip().split("\n")
|
||||
prompt = lines[0].strip() if lines else ""
|
||||
@@ -1747,8 +1781,15 @@ async def do_generate_image(content: str, session_id: Optional[str] = None, owne
|
||||
|
||||
elif img.get("url"):
|
||||
# Download external URL and save locally (DALL-E returns temp URLs)
|
||||
result_url = img["url"]
|
||||
ok, reason = check_outbound_url(
|
||||
result_url,
|
||||
block_private=os.getenv("IMAGE_BLOCK_PRIVATE_IPS", "false").lower() == "true",
|
||||
)
|
||||
if not ok:
|
||||
return {"error": f"Image API returned unsafe image URL: {reason}"}
|
||||
try:
|
||||
dl_resp = httpx.get(img["url"], timeout=60)
|
||||
dl_resp = httpx.get(result_url, timeout=60)
|
||||
if dl_resp.status_code == 200:
|
||||
img_dir = Path(GENERATED_IMAGES_DIR)
|
||||
img_dir.mkdir(parents=True, exist_ok=True)
|
||||
@@ -1758,10 +1799,10 @@ async def do_generate_image(content: str, session_id: Optional[str] = None, owne
|
||||
image_url = f"/api/generated-image/{filename}"
|
||||
image_id = _save_to_gallery(filename)
|
||||
else:
|
||||
image_url = img["url"] # fallback to external URL
|
||||
image_url = result_url # fallback to external URL
|
||||
except Exception as _dl_e:
|
||||
logger.warning(f"Failed to download DALL-E image: {_dl_e}")
|
||||
image_url = img["url"] # fallback to external URL
|
||||
image_url = result_url # fallback to external URL
|
||||
else:
|
||||
return {"error": "Image API returned unexpected format (no b64_json or url)"}
|
||||
|
||||
|
||||
@@ -4,6 +4,8 @@ import logging
|
||||
from typing import Dict
|
||||
from cryptography.fernet import Fernet, InvalidToken
|
||||
|
||||
from core.platform_compat import safe_chmod
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class APIKeyManager:
|
||||
@@ -15,12 +17,20 @@ class APIKeyManager:
|
||||
def get_or_create_key(self) -> bytes:
|
||||
"""Get or create encryption key for API keys"""
|
||||
if os.path.exists(self.key_file):
|
||||
# Older versions wrote .key with the process umask (often 0o644,
|
||||
# i.e. group/world-readable). Re-restrict on read so existing
|
||||
# installs heal without needing the key to be regenerated.
|
||||
safe_chmod(self.key_file, 0o600)
|
||||
with open(self.key_file, 'rb') as f:
|
||||
return f.read()
|
||||
else:
|
||||
key = Fernet.generate_key()
|
||||
with open(self.key_file, 'wb') as f:
|
||||
f.write(key)
|
||||
# This key decrypts every stored provider credential, so restrict it
|
||||
# to the owner (0o600) — it must not be group/world-readable. No-op
|
||||
# on Windows (files there are ACL-restricted to the user already).
|
||||
safe_chmod(self.key_file, 0o600)
|
||||
return key
|
||||
|
||||
def encrypt_api_key(self, api_key: str) -> str:
|
||||
|
||||
@@ -55,6 +55,8 @@ async def _drain_agent(sess, messages):
|
||||
if "delta" in d:
|
||||
delta = d.get("delta")
|
||||
if isinstance(delta, str):
|
||||
if d.get("thinking"):
|
||||
continue
|
||||
full += delta
|
||||
elif d.get("type") == "agent_step":
|
||||
round_num = d.get("round", round_num)
|
||||
|
||||
@@ -5,12 +5,13 @@ Auto-registration of built-in MCP servers on startup.
|
||||
Each server runs as a stdio subprocess managed by McpManager.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import asyncio
|
||||
|
||||
from core.platform_compat import IS_WINDOWS, which_tool
|
||||
|
||||
@@ -197,12 +198,13 @@ def _npx_package_from_args(args):
|
||||
async def _is_npx_package_cached(npx_path, package_spec, timeout_s=5):
|
||||
"""Probe whether an npx package is already in the local cache.
|
||||
|
||||
Runs `npx --no-install <pkg> --version`. --no-install tells npx to
|
||||
fail instead of downloading, so a cache miss returns fast. We treat
|
||||
"exited 0 with non-empty stdout" as proof of a working cached copy.
|
||||
Anything else (non-zero exit, empty stdout, timeout, missing npx,
|
||||
network error) means we should skip the server.
|
||||
First checks the local `_npx` cache for an installed package. If the
|
||||
package is not found there, falls back to `npx --no-install <pkg>
|
||||
--version` so older npm layouts still work without downloading.
|
||||
"""
|
||||
if _is_package_in_npx_cache(package_spec):
|
||||
return True
|
||||
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
npx_path, "--no-install", package_spec, "--version",
|
||||
@@ -231,3 +233,68 @@ async def _is_npx_package_cached(npx_path, package_spec, timeout_s=5):
|
||||
pass
|
||||
return False
|
||||
return proc.returncode == 0 and bool(stdout.strip())
|
||||
|
||||
|
||||
def _is_package_in_npx_cache(package_spec):
|
||||
"""Return True when npm's `_npx` cache already contains package_spec."""
|
||||
package_name = _npx_package_name(package_spec)
|
||||
if not package_name:
|
||||
return False
|
||||
|
||||
for cache_root in _npm_cache_roots():
|
||||
npx_root = os.path.join(cache_root, "_npx")
|
||||
if _npx_cache_contains_package(npx_root, package_name):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _npx_package_name(package_spec):
|
||||
"""Strip a version/range suffix from an npm package spec."""
|
||||
if not package_spec:
|
||||
return ""
|
||||
if package_spec.startswith("@"):
|
||||
parts = package_spec.split("@", 2)
|
||||
if len(parts) >= 3:
|
||||
return f"@{parts[1]}"
|
||||
return package_spec
|
||||
return package_spec.split("@", 1)[0]
|
||||
|
||||
|
||||
def _npm_cache_roots():
|
||||
roots = []
|
||||
configured = os.environ.get("npm_config_cache")
|
||||
if configured:
|
||||
roots.append(os.path.expanduser(configured))
|
||||
roots.append(os.path.join(os.path.expanduser("~"), ".npm"))
|
||||
local_app_data = os.environ.get("LOCALAPPDATA")
|
||||
if local_app_data:
|
||||
roots.append(os.path.join(local_app_data, "npm-cache"))
|
||||
return list(dict.fromkeys(roots))
|
||||
|
||||
|
||||
def _npx_cache_contains_package(npx_root, package_name):
|
||||
if not os.path.isdir(npx_root):
|
||||
return False
|
||||
package_path = os.path.join("node_modules", *package_name.split("/"), "package.json")
|
||||
try:
|
||||
entries = list(os.scandir(npx_root))
|
||||
except OSError:
|
||||
return False
|
||||
for entry in entries:
|
||||
try:
|
||||
is_dir = entry.is_dir()
|
||||
except OSError:
|
||||
continue
|
||||
cached_name = _cached_package_name(os.path.join(entry.path, package_path))
|
||||
if is_dir and cached_name == package_name:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _cached_package_name(package_json_path):
|
||||
try:
|
||||
with open(package_json_path, encoding="utf-8") as fh:
|
||||
data = json.load(fh)
|
||||
except (OSError, ValueError):
|
||||
return ""
|
||||
return str(data.get("name", "")).strip()
|
||||
|
||||
@@ -128,6 +128,17 @@ def validate_caldav_url(raw_url: str) -> str:
|
||||
return urlunparse(parsed._replace(fragment="")).rstrip("/")
|
||||
|
||||
|
||||
def _event_etag(obj) -> str:
|
||||
"""Best-effort ETag extraction from python-caldav resources."""
|
||||
try:
|
||||
etag = getattr(obj, "etag", None)
|
||||
if callable(etag):
|
||||
etag = etag()
|
||||
return str(etag or "")
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
def _stable_cal_id(remote_url: str, owner: str = "", account_id: str = "") -> str:
|
||||
"""Deterministic local id for a remote CalDAV calendar, scoped to owner
|
||||
and account so two users — or one user with two accounts — pointing at
|
||||
@@ -316,11 +327,12 @@ def _sync_blocking(owner: str, url: str, username: str, password: str, account_i
|
||||
color="#5b8abf",
|
||||
source="caldav",
|
||||
account_id=account_id or None,
|
||||
caldav_base_url=remote_url,
|
||||
)
|
||||
db.add(local_cal)
|
||||
db.commit()
|
||||
else:
|
||||
# Refresh display name and stamp account_id if missing.
|
||||
# Refresh display name and stamp CalDAV metadata if missing.
|
||||
changed = False
|
||||
if local_cal.name != display_name:
|
||||
local_cal.name = display_name
|
||||
@@ -328,6 +340,9 @@ def _sync_blocking(owner: str, url: str, username: str, password: str, account_i
|
||||
if account_id and not local_cal.account_id:
|
||||
local_cal.account_id = account_id
|
||||
changed = True
|
||||
if local_cal.caldav_base_url != remote_url:
|
||||
local_cal.caldav_base_url = remote_url
|
||||
changed = True
|
||||
if changed:
|
||||
db.commit()
|
||||
result["calendars"] += 1
|
||||
@@ -395,6 +410,9 @@ def _sync_blocking(owner: str, url: str, username: str, password: str, account_i
|
||||
|
||||
existing = _find_existing_event(db, pending, uid_val, local_cal.id)
|
||||
if existing:
|
||||
if existing.caldav_sync_pending in {"create", "update"}:
|
||||
result["events"] += 1
|
||||
continue
|
||||
existing.calendar_id = local_cal.id
|
||||
existing.summary = summary
|
||||
existing.description = description
|
||||
@@ -405,6 +423,9 @@ def _sync_blocking(owner: str, url: str, username: str, password: str, account_i
|
||||
existing.is_utc = row_is_utc
|
||||
existing.rrule = rrule
|
||||
existing.origin = "caldav"
|
||||
existing.remote_href = str(getattr(obj, "url", "") or "") or None
|
||||
existing.remote_etag = _event_etag(obj) or None
|
||||
existing.caldav_sync_pending = None
|
||||
else:
|
||||
new_ev = CalendarEvent(
|
||||
uid=uid_val,
|
||||
@@ -418,6 +439,8 @@ def _sync_blocking(owner: str, url: str, username: str, password: str, account_i
|
||||
is_utc=row_is_utc,
|
||||
rrule=rrule,
|
||||
origin="caldav",
|
||||
remote_href=str(getattr(obj, "url", "") or "") or None,
|
||||
remote_etag=_event_etag(obj) or None,
|
||||
)
|
||||
db.add(new_ev)
|
||||
pending[uid_val] = new_ev
|
||||
@@ -442,6 +465,8 @@ def _sync_blocking(owner: str, url: str, username: str, password: str, account_i
|
||||
CalendarEvent.origin == "caldav",
|
||||
CalendarEvent.dtstart >= start,
|
||||
CalendarEvent.dtstart <= end,
|
||||
CalendarEvent.remote_href.isnot(None),
|
||||
CalendarEvent.caldav_sync_pending.is_(None),
|
||||
~CalendarEvent.uid.in_(seen_uids) if seen_uids else CalendarEvent.uid.isnot(None),
|
||||
).all()
|
||||
for ev in stale:
|
||||
@@ -458,6 +483,92 @@ def _sync_blocking(owner: str, url: str, username: str, password: str, account_i
|
||||
return result
|
||||
|
||||
|
||||
def _event_payload(ev) -> dict:
|
||||
return {
|
||||
"uid": ev.uid,
|
||||
"summary": ev.summary,
|
||||
"description": ev.description,
|
||||
"location": ev.location,
|
||||
"dtstart": ev.dtstart,
|
||||
"dtend": ev.dtend,
|
||||
"all_day": ev.all_day,
|
||||
"is_utc": ev.is_utc,
|
||||
"rrule": ev.rrule or "",
|
||||
}
|
||||
|
||||
|
||||
def _load_event_for_writeback(owner: str, uid: str) -> tuple[str, str, dict] | None:
|
||||
from core.database import CalendarCal, CalendarEvent, SessionLocal
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
ev = (
|
||||
db.query(CalendarEvent)
|
||||
.join(CalendarCal)
|
||||
.filter(CalendarEvent.uid == uid, CalendarCal.owner == owner)
|
||||
.first()
|
||||
)
|
||||
if not ev or not ev.calendar or ev.calendar.source != "caldav":
|
||||
return None
|
||||
return ev.calendar.source, ev.calendar.id, _event_payload(ev)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
def _load_delete_for_writeback(owner: str, uid: str) -> tuple[str, str, dict] | None:
|
||||
from core.database import CalendarCal, CalendarDeletedEvent, CalendarEvent, SessionLocal
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
tombstone = db.query(CalendarDeletedEvent).filter(
|
||||
CalendarDeletedEvent.uid == uid,
|
||||
CalendarDeletedEvent.owner == owner,
|
||||
).first()
|
||||
if tombstone:
|
||||
return "caldav", tombstone.calendar_id, {"uid": uid}
|
||||
|
||||
ev = (
|
||||
db.query(CalendarEvent)
|
||||
.join(CalendarCal)
|
||||
.filter(CalendarEvent.uid == uid, CalendarCal.owner == owner)
|
||||
.first()
|
||||
)
|
||||
if not ev or not ev.calendar or ev.calendar.source != "caldav":
|
||||
return None
|
||||
return ev.calendar.source, ev.calendar.id, {"uid": uid}
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
def _pending_writeback_uids(owner: str) -> tuple[list[str], list[str]]:
|
||||
from core.database import CalendarCal, CalendarDeletedEvent, CalendarEvent, SessionLocal
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
rows = (
|
||||
db.query(CalendarEvent.uid)
|
||||
.join(CalendarCal)
|
||||
.filter(
|
||||
CalendarCal.owner == owner,
|
||||
CalendarCal.source == "caldav",
|
||||
CalendarEvent.status != "cancelled",
|
||||
(
|
||||
(CalendarEvent.caldav_sync_pending.isnot(None))
|
||||
| (CalendarEvent.remote_href.is_(None))
|
||||
),
|
||||
)
|
||||
.all()
|
||||
)
|
||||
delete_rows = (
|
||||
db.query(CalendarDeletedEvent.uid)
|
||||
.filter(CalendarDeletedEvent.owner == owner)
|
||||
.all()
|
||||
)
|
||||
return [row[0] for row in rows], [row[0] for row in delete_rows]
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
def _load_caldav_accounts(owner: str) -> list:
|
||||
"""Return the list of CalDAV accounts for *owner*, auto-migrating the legacy
|
||||
single-account ``caldav`` key to the new ``caldav_accounts`` list on first call.
|
||||
@@ -533,3 +644,69 @@ async def sync_caldav(owner: str) -> dict:
|
||||
for err in result.get("errors", []):
|
||||
totals["errors"].append(f"{label}: {err}")
|
||||
return totals
|
||||
|
||||
|
||||
async def push_event_create(owner: str, uid: str) -> dict:
|
||||
loaded = _load_event_for_writeback(owner, uid)
|
||||
if not loaded:
|
||||
return {"ok": True, "skipped": True}
|
||||
source, calendar_id, payload = loaded
|
||||
from src.caldav_writeback import writeback_event
|
||||
return await writeback_event(owner, source, calendar_id, payload)
|
||||
|
||||
|
||||
async def push_event_update(owner: str, uid: str) -> dict:
|
||||
return await push_event_create(owner, uid)
|
||||
|
||||
|
||||
async def push_event_delete(owner: str, uid: str) -> dict:
|
||||
loaded = _load_delete_for_writeback(owner, uid)
|
||||
if not loaded:
|
||||
return {"ok": True, "skipped": True}
|
||||
source, calendar_id, payload = loaded
|
||||
from src.caldav_writeback import writeback_event
|
||||
return await writeback_event(owner, source, calendar_id, payload, delete=True)
|
||||
|
||||
|
||||
async def push_pending_events(owner: str) -> dict:
|
||||
result = {"events": 0, "errors": []}
|
||||
uids, delete_uids = _pending_writeback_uids(owner)
|
||||
for event_uid in uids:
|
||||
try:
|
||||
out = await push_event_update(owner, event_uid)
|
||||
if out.get("ok"):
|
||||
result["events"] += 1
|
||||
elif not out.get("skipped"):
|
||||
result["errors"].append(f"{event_uid}: {str(out.get('error') or out)[:160]}")
|
||||
except Exception as e:
|
||||
logger.warning("CalDAV pending push failed for uid=%s: %s", event_uid, e)
|
||||
result["errors"].append(f"{event_uid}: {str(e)[:160]}")
|
||||
for event_uid in delete_uids:
|
||||
try:
|
||||
out = await push_event_delete(owner, event_uid)
|
||||
if out.get("ok"):
|
||||
result["events"] += 1
|
||||
elif not out.get("skipped"):
|
||||
result["errors"].append(f"{event_uid}: {str(out.get('error') or out)[:160]}")
|
||||
except Exception as e:
|
||||
logger.warning("CalDAV pending delete failed for uid=%s: %s", event_uid, e)
|
||||
result["errors"].append(f"{event_uid}: {str(e)[:160]}")
|
||||
return result
|
||||
|
||||
|
||||
async def sync_caldav_direction(owner: str, direction: str = "pull") -> dict:
|
||||
direction = (direction or "pull").strip().lower()
|
||||
if direction == "pull":
|
||||
return await sync_caldav(owner)
|
||||
if direction == "push":
|
||||
return await push_pending_events(owner)
|
||||
if direction == "both":
|
||||
pushed = await push_pending_events(owner)
|
||||
pulled = await sync_caldav(owner)
|
||||
return {"push": pushed, "pull": pulled}
|
||||
return {
|
||||
"calendars": 0,
|
||||
"events": 0,
|
||||
"deleted": 0,
|
||||
"errors": [f"Unsupported CalDAV sync direction: {direction}"],
|
||||
}
|
||||
|
||||
@@ -89,6 +89,23 @@ def find_remote_calendar(calendars, local_cal_id: str, owner: str = "", account_
|
||||
return None
|
||||
|
||||
|
||||
def _resource_href(obj) -> str:
|
||||
try:
|
||||
return str(getattr(obj, "url", "") or "")
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
def _resource_etag(obj) -> str:
|
||||
try:
|
||||
etag = getattr(obj, "etag", None)
|
||||
if callable(etag):
|
||||
etag = etag()
|
||||
return str(etag or "")
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
def push_event(calendars, local_cal_id: str, ev: dict, *, delete: bool = False,
|
||||
owner: str = "", account_id: str = "") -> dict:
|
||||
"""Create/update (or delete) ``ev`` on the matching remote calendar.
|
||||
@@ -105,6 +122,7 @@ def push_event(calendars, local_cal_id: str, ev: dict, *, delete: bool = False,
|
||||
remote = find_remote_calendar(calendars, local_cal_id, owner=owner, account_id=account_id)
|
||||
if remote is None:
|
||||
return {"ok": False, "error": "remote calendar not found"}
|
||||
remote_url = str(getattr(remote, "url", "") or "")
|
||||
|
||||
try:
|
||||
existing = remote.event_by_uid(uid)
|
||||
@@ -113,17 +131,34 @@ def push_event(calendars, local_cal_id: str, ev: dict, *, delete: bool = False,
|
||||
|
||||
if delete:
|
||||
if existing is None:
|
||||
return {"ok": True, "note": "already absent on remote"}
|
||||
return {"ok": True, "note": "already absent on remote", "calendar_url": remote_url}
|
||||
existing.delete()
|
||||
return {"ok": True}
|
||||
return {
|
||||
"ok": True,
|
||||
"calendar_url": remote_url,
|
||||
"remote_href": _resource_href(existing),
|
||||
"remote_etag": _resource_etag(existing),
|
||||
}
|
||||
|
||||
ical = build_event_ical(ev)
|
||||
if existing is not None:
|
||||
existing.data = ical
|
||||
existing.save()
|
||||
return {"ok": True, "updated": True}
|
||||
remote.save_event(ical)
|
||||
return {"ok": True, "created": True}
|
||||
return {
|
||||
"ok": True,
|
||||
"updated": True,
|
||||
"calendar_url": remote_url,
|
||||
"remote_href": _resource_href(existing),
|
||||
"remote_etag": _resource_etag(existing),
|
||||
}
|
||||
created = remote.save_event(ical)
|
||||
return {
|
||||
"ok": True,
|
||||
"created": True,
|
||||
"calendar_url": remote_url,
|
||||
"remote_href": _resource_href(created),
|
||||
"remote_etag": _resource_etag(created),
|
||||
}
|
||||
|
||||
|
||||
def _discover_calendars(client):
|
||||
@@ -154,6 +189,54 @@ def _writeback_blocking(local_cal_id, ev, delete, url, username, password,
|
||||
owner=owner, account_id=account_id)
|
||||
|
||||
|
||||
def _persist_writeback_result(owner: str, calendar_id: str, uid: str, result: dict, *, delete: bool) -> None:
|
||||
from core.database import CalendarCal, CalendarDeletedEvent, CalendarEvent, SessionLocal
|
||||
|
||||
if not uid or not isinstance(result, dict):
|
||||
return
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
calendar = db.query(CalendarCal).filter(
|
||||
CalendarCal.id == calendar_id,
|
||||
CalendarCal.owner == owner,
|
||||
).first()
|
||||
if calendar and result.get("calendar_url"):
|
||||
calendar.caldav_base_url = result.get("calendar_url")
|
||||
|
||||
if delete:
|
||||
tombstone = db.query(CalendarDeletedEvent).filter(
|
||||
CalendarDeletedEvent.uid == uid,
|
||||
CalendarDeletedEvent.owner == owner,
|
||||
).first()
|
||||
if result.get("ok"):
|
||||
if tombstone:
|
||||
db.delete(tombstone)
|
||||
elif tombstone:
|
||||
tombstone.last_error = str(result.get("error") or result)[:500]
|
||||
db.commit()
|
||||
return
|
||||
|
||||
event = (
|
||||
db.query(CalendarEvent)
|
||||
.join(CalendarCal)
|
||||
.filter(CalendarEvent.uid == uid, CalendarCal.owner == owner)
|
||||
.first()
|
||||
)
|
||||
if event and result.get("ok"):
|
||||
if result.get("remote_href"):
|
||||
event.remote_href = result.get("remote_href")
|
||||
if result.get("remote_etag"):
|
||||
event.remote_etag = result.get("remote_etag")
|
||||
event.caldav_sync_pending = None
|
||||
db.commit()
|
||||
except Exception:
|
||||
db.rollback()
|
||||
logger.exception("CalDAV write-back metadata persistence failed")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
async def writeback_event(owner: str, calendar_source: str, calendar_id: str,
|
||||
ev: dict, *, delete: bool = False) -> dict:
|
||||
"""Best-effort push of a local change to the remote CalDAV server.
|
||||
@@ -204,9 +287,12 @@ async def writeback_event(owner: str, calendar_source: str, calendar_id: str,
|
||||
result = await asyncio.to_thread(
|
||||
_writeback_blocking, calendar_id, ev, delete, url, user, pw, owner, acc_id
|
||||
)
|
||||
_persist_writeback_result(owner, calendar_id, (ev or {}).get("uid", ""), result, delete=delete)
|
||||
if not result.get("ok"):
|
||||
logger.warning("CalDAV write-back did not apply: %s", result.get("error") or result)
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.exception("CalDAV write-back raised")
|
||||
return {"ok": False, "error": str(e)[:200]}
|
||||
result = {"ok": False, "error": str(e)[:200]}
|
||||
_persist_writeback_result(owner, calendar_id, (ev or {}).get("uid", ""), result, delete=delete)
|
||||
return result
|
||||
|
||||
@@ -31,16 +31,22 @@ def compute_input_token_budget(
|
||||
|
||||
Args:
|
||||
configured: the value read from settings (may be the default).
|
||||
context_length: the model's discovered context window (0/unknown if none).
|
||||
explicit: True if the user explicitly set ``agent_input_token_budget``.
|
||||
context_length: the model's discovered context window. Pass 0 when the
|
||||
window is unknown / only a bare fallback — auto-scaling then stays
|
||||
conservative instead of trusting an unproven window (review on #4122).
|
||||
explicit: True if the user set a NON-default budget. The default value is
|
||||
the "auto" sentinel (scale to the window); any other value is an
|
||||
explicit cap. (A deliberately-chosen default can't be distinguished
|
||||
from a materialized default by value, so the default reads as auto.)
|
||||
|
||||
Rules:
|
||||
- Explicit user budget is honoured exactly, only clamped to the model's
|
||||
window when that window is known (never send more than the model holds).
|
||||
- Otherwise (default), scale to ``headroom`` of the context window, capped
|
||||
at ``hard_max`` — so long-context models use their capacity.
|
||||
- When the window is unknown, fall back to the configured/default value
|
||||
(preserving the previous behaviour).
|
||||
window when that window is known (the user's deliberate choice wins;
|
||||
``hard_max`` is an auto-budget ceiling only — see #1230).
|
||||
- Otherwise (auto), scale to ``headroom`` of the context window, capped at
|
||||
``hard_max`` — so long-context models use their capacity.
|
||||
- When the window is unknown (context_length <= 0), use the conservative
|
||||
``default`` budget and do NOT scale off the fallback.
|
||||
"""
|
||||
configured = int(configured or 0)
|
||||
context_length = int(context_length or 0)
|
||||
@@ -53,3 +59,17 @@ def compute_input_token_budget(
|
||||
return max(1, min(scaled, hard_max))
|
||||
|
||||
return configured if configured > 0 else default
|
||||
|
||||
|
||||
def budget_is_explicit(configured: int, *, default: int = DEFAULT_BUDGET) -> bool:
|
||||
"""Whether a configured agent_input_token_budget is a deliberate explicit cap.
|
||||
|
||||
The default value is the "auto" sentinel (scale to the model's window), so only
|
||||
a NON-default positive value counts as explicit. This keys off the VALUE, not
|
||||
settings *presence* — the settings-save path materializes every default into
|
||||
settings.json, so a persisted default must still read as auto (the regression
|
||||
#4121 / #1230 are about). Centralised here so the materialized-default contract
|
||||
is unit-testable and can't silently regress to a presence check.
|
||||
"""
|
||||
configured = int(configured or 0)
|
||||
return configured > 0 and configured != default
|
||||
|
||||
@@ -244,9 +244,17 @@ def trim_for_context(messages: List[Dict], context_length: int, reserve_tokens:
|
||||
protected_tokens = estimate_tokens(protected_msgs)
|
||||
budget -= protected_tokens
|
||||
|
||||
# Priority: keep first system msg (preset prompt), drop others (memory, RAG, memo)
|
||||
essential_system = system_msgs[:1] if system_msgs else []
|
||||
extra_system = system_msgs[1:]
|
||||
# Priority: keep first system msg (preset prompt), drop others (memory, RAG, memo).
|
||||
# Exception: a research-spinoff primer (the seeded report that grounds a
|
||||
# "Discuss" chat) must never be dropped — it is the conversation's whole
|
||||
# knowledge base. Treat any system message carrying research_spinoff_from
|
||||
# metadata as essential alongside the leading system prompt.
|
||||
def _is_research_primer(m):
|
||||
return bool((m.get("metadata") or {}).get("research_spinoff_from"))
|
||||
_primers = [m for m in system_msgs if _is_research_primer(m)]
|
||||
_non_primer = [m for m in system_msgs if not _is_research_primer(m)]
|
||||
essential_system = (_non_primer[:1] if _non_primer else []) + _primers
|
||||
extra_system = _non_primer[1:]
|
||||
|
||||
# Try dropping extra system messages one by one (from the end)
|
||||
trimmed = essential_system + convo_msgs
|
||||
|
||||
@@ -136,7 +136,8 @@ async def _tick() -> None:
|
||||
return
|
||||
try:
|
||||
state = json.loads(state_path.read_text(encoding="utf-8"))
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
logger.warning("cookbook_serve_lifecycle: state file unreadable (%s), skipping tick", e)
|
||||
return
|
||||
tasks = state.get("tasks") or []
|
||||
now_ms = int(time.time() * 1000)
|
||||
@@ -160,11 +161,13 @@ async def _tick() -> None:
|
||||
# Re-read state once before writing so we capture any updates from
|
||||
# concurrent UI syncs.
|
||||
stopped_any = False
|
||||
successfully_stopped_sids = set()
|
||||
for sid, host, port in to_stop:
|
||||
ok = await _stop_serve(sid, host, port)
|
||||
logger.info(f"cookbook_serve_lifecycle: stop {sid} (host={host or 'local'}): {'ok' if ok else 'failed'}")
|
||||
if ok:
|
||||
stopped_any = True
|
||||
successfully_stopped_sids.add(sid)
|
||||
# Drop the auto-registered endpoint so the model picker and
|
||||
# the chat router don't keep pointing at a dead server.
|
||||
for t in tasks:
|
||||
@@ -178,8 +181,25 @@ async def _tick() -> None:
|
||||
if stopped_any:
|
||||
try:
|
||||
from core.atomic_io import atomic_write_json
|
||||
state["tasks"] = tasks
|
||||
atomic_write_json(state_path, state)
|
||||
# Re-read the state file so concurrent UI writes (task adds,
|
||||
# status flips, config edits) are not silently overwritten.
|
||||
# Apply only our stop mutations to the fresh snapshot.
|
||||
try:
|
||||
fresh = json.loads(state_path.read_text(encoding="utf-8"))
|
||||
fresh_tasks = fresh.get("tasks") or []
|
||||
except Exception:
|
||||
fresh = state
|
||||
fresh_tasks = tasks
|
||||
for ft in fresh_tasks:
|
||||
if not isinstance(ft, dict):
|
||||
continue
|
||||
ft_sid = ft.get("sessionId") or ft.get("id")
|
||||
if ft_sid in successfully_stopped_sids:
|
||||
ft["status"] = "stopped"
|
||||
ft["_scheduledStopAtMs"] = None
|
||||
ft["_lastStatusFlipAt"] = now_ms
|
||||
fresh["tasks"] = fresh_tasks
|
||||
atomic_write_json(state_path, fresh)
|
||||
except Exception as e:
|
||||
logger.warning(f"cookbook_serve_lifecycle: state write failed: {e}")
|
||||
|
||||
|
||||
@@ -199,11 +199,20 @@ def _fit_inline_attachment_text(
|
||||
return text[:remaining] + marker, 0
|
||||
|
||||
|
||||
def _process_office_document(path: str, display_name: str) -> str:
|
||||
def _process_office_document(
|
||||
path: str,
|
||||
display_name: str,
|
||||
session_id: str | None = None,
|
||||
auto_opened_docs: list[Dict[str, Any]] | None = None,
|
||||
owner: str | None = None,
|
||||
) -> str:
|
||||
"""Extract an Office/EPUB document to Markdown via the optional markitdown dep.
|
||||
|
||||
Falls back to a friendly banner when markitdown is unavailable or finds no
|
||||
text, so a missing optional dependency never breaks the chat path.
|
||||
text, so a missing optional dependency never breaks the chat path. When a
|
||||
session_id is provided AND the extraction succeeded, the FULL text is also
|
||||
saved as a Document so the agent can page through it via
|
||||
`manage_documents action=read offset=…` after the inline copy is capped.
|
||||
"""
|
||||
from src.markitdown_runtime import (
|
||||
is_markitdown_format,
|
||||
@@ -218,6 +227,46 @@ def _process_office_document(path: str, display_name: str) -> str:
|
||||
if markdown and markdown.strip():
|
||||
title = os.path.splitext(os.path.basename(path))[0]
|
||||
body, marker = _truncate_inline(markdown)
|
||||
|
||||
# Persist the full extracted text as a Document. The agent's existing
|
||||
# manage_documents tool can then read past the inline cap with offset.
|
||||
doc_id = None
|
||||
if session_id:
|
||||
try:
|
||||
from src.office_doc import create_office_document
|
||||
doc_id = create_office_document(
|
||||
session_id=session_id,
|
||||
upload_id=os.path.basename(path),
|
||||
title=title,
|
||||
body_text=markdown,
|
||||
)
|
||||
if doc_id and auto_opened_docs is not None:
|
||||
from src.database import SessionLocal, Document
|
||||
_db = SessionLocal()
|
||||
try:
|
||||
_d = _db.query(Document).filter(Document.id == doc_id).first()
|
||||
if _d:
|
||||
auto_opened_docs.append({
|
||||
"doc_id": _d.id,
|
||||
"title": _d.title,
|
||||
"language": _d.language,
|
||||
"content": _d.current_content,
|
||||
"version": _d.version_count,
|
||||
})
|
||||
finally:
|
||||
_db.close()
|
||||
except Exception as e:
|
||||
logger.warning("Office auto-doc creation failed for %s: %s", path, e)
|
||||
|
||||
# Upgrade the truncation marker with a hint pointing at the full doc so
|
||||
# the agent knows it can read the rest.
|
||||
if doc_id and marker:
|
||||
marker = (
|
||||
f"\n[…truncated for inline context — full {len(markdown):,} chars "
|
||||
f"saved as document `{doc_id}`. Use `manage_documents` with "
|
||||
f"action=read, document_id={doc_id}, offset=<N> to page through.]"
|
||||
)
|
||||
|
||||
return f"\n\n[Document content — {title}]:\n{body}{marker}"
|
||||
|
||||
# No content: tell the user whether to install the optional dep or whether
|
||||
@@ -521,7 +570,13 @@ def build_user_content(
|
||||
elif mime.startswith("text/") or _is_text_file(path):
|
||||
extracted_text = _process_text_file(path)
|
||||
else:
|
||||
extracted_text = _process_office_document(path, display_name)
|
||||
extracted_text = _process_office_document(
|
||||
path,
|
||||
display_name,
|
||||
session_id=session_id,
|
||||
auto_opened_docs=auto_opened_docs,
|
||||
owner=owner,
|
||||
)
|
||||
|
||||
extracted_text, inline_attachment_remaining = _fit_inline_attachment_text(
|
||||
extracted_text,
|
||||
|
||||
@@ -12,7 +12,7 @@ from typing import Optional, Tuple, Dict
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
|
||||
from core.database import SessionLocal, ModelEndpoint
|
||||
from src.llm_core import _detect_provider, _host_match, _ollama_api_root
|
||||
from src.llm_core import _detect_provider, _host_match, _is_kimi_code_url, KIMI_CODE_USER_AGENT, _ollama_api_root
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -183,7 +183,16 @@ def build_chat_url(base: str) -> str:
|
||||
|
||||
|
||||
def build_models_url(base: str) -> Optional[str]:
|
||||
"""Return the provider-specific model-list endpoint URL for a base."""
|
||||
"""Return the provider-specific model-list endpoint URL for a base.
|
||||
|
||||
For OpenAI-compatible servers (LM Studio, llama.cpp, vLLM,
|
||||
text-generation-webui, etc.) the model list is exposed at ``/v1/models``.
|
||||
When the user-supplied base has no path — e.g. ``http://localhost:1234`` —
|
||||
we still need to land on ``/v1/models`` (issue #25); insert the ``/v1``
|
||||
segment only when the path is empty, leaving any explicit non-empty path
|
||||
untouched (so custom prefixes like ``/openai`` or ``/api/openai/v1`` keep
|
||||
their semantics).
|
||||
"""
|
||||
base = normalize_base(resolve_url(base))
|
||||
provider = _detect_provider(base)
|
||||
if provider == "anthropic":
|
||||
@@ -192,6 +201,16 @@ def build_models_url(base: str) -> Optional[str]:
|
||||
return _ollama_api_root(base) + "/tags"
|
||||
if provider == "chatgpt-subscription":
|
||||
return None
|
||||
# Generic OpenAI-compatible fallback: local model servers with no explicit
|
||||
# path conventionally expose `/v1/models` (LM Studio, llama.cpp, vLLM).
|
||||
# For non-local unknown hosts, do not invent `/v1`; append `/models` to the
|
||||
# caller's base so look-alike provider hosts stay generic.
|
||||
parsed = urlparse(base)
|
||||
host = (parsed.hostname or "").lower()
|
||||
is_local = host in {"localhost", "127.0.0.1", "::1", "host.docker.internal"}
|
||||
uses_v1_models_by_default = is_local or host in {"api.deepseek.com"}
|
||||
if not parsed.path and uses_v1_models_by_default:
|
||||
base = base + "/v1"
|
||||
return base + "/models"
|
||||
|
||||
|
||||
@@ -215,6 +234,8 @@ def build_headers(api_key: Optional[str], base: str) -> Dict[str, str]:
|
||||
if provider == "openrouter":
|
||||
headers.setdefault("HTTP-Referer", "https://github.com/pewdiepie-archdaemon/odysseus")
|
||||
headers.setdefault("X-OpenRouter-Title", "Odysseus")
|
||||
if _is_kimi_code_url(base):
|
||||
headers.setdefault("User-Agent", KIMI_CODE_USER_AGENT)
|
||||
return headers
|
||||
|
||||
|
||||
@@ -250,27 +271,23 @@ def resolve_endpoint(
|
||||
ep_id = _stg(f"{setting_prefix}_endpoint_id")
|
||||
model = _stg(f"{setting_prefix}_model")
|
||||
|
||||
# If the specific endpoint is not configured, but the caller provided a
|
||||
# Fall back to utility model for task/research/auto-naming if not specifically configured.
|
||||
if not ep_id and setting_prefix not in ("utility", "default"):
|
||||
ep_id = _stg("utility_endpoint_id")
|
||||
model = _stg("utility_model")
|
||||
|
||||
# If the endpoint is STILL not configured, but the caller provided a
|
||||
# valid fallback (e.g. the active session model), use that immediately.
|
||||
# This prevents background tasks from jumping to the global default_model
|
||||
# when the user is mid-conversation with a different model.
|
||||
if not ep_id and fallback_url and fallback_model:
|
||||
return fallback_url, fallback_model, fallback_headers
|
||||
|
||||
# Unset Utility means "same as Default Chat Model".
|
||||
if setting_prefix == "utility" and not ep_id:
|
||||
# Unset Utility (or anything else that didn't have a fallback) means "same as Default Chat Model".
|
||||
if not ep_id:
|
||||
ep_id = _stg("default_endpoint_id")
|
||||
model = _stg("default_model")
|
||||
|
||||
# Fall back to utility model for task/research/auto-naming if not specifically configured.
|
||||
# If Utility itself is unset, the block above makes that resolve to Default Chat.
|
||||
if not ep_id and setting_prefix != "utility":
|
||||
ep_id = _stg("utility_endpoint_id")
|
||||
model = _stg("utility_model")
|
||||
if not ep_id:
|
||||
ep_id = _stg("default_endpoint_id")
|
||||
model = _stg("default_model")
|
||||
|
||||
if not ep_id:
|
||||
return fallback_url, fallback_model, fallback_headers
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ import re
|
||||
from typing import Dict, List, Optional, Any
|
||||
|
||||
import httpx
|
||||
from fastapi import HTTPException
|
||||
|
||||
from core.atomic_io import atomic_write_json
|
||||
from core.platform_compat import safe_chmod
|
||||
@@ -258,6 +259,11 @@ def add_integration(data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
integration.setdefault("name", "")
|
||||
integration.setdefault("base_url", "")
|
||||
|
||||
if not isinstance(integration.get("name"), str) or not integration["name"].strip():
|
||||
raise HTTPException(400, "Integration name is required")
|
||||
if not isinstance(integration.get("base_url"), str) or not integration["base_url"].strip():
|
||||
raise HTTPException(400, "Integration base URL is required")
|
||||
|
||||
integrations = load_integrations()
|
||||
integrations.append(integration)
|
||||
save_integrations(integrations)
|
||||
@@ -266,6 +272,11 @@ def add_integration(data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
|
||||
def update_integration(integration_id: str, data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||||
"""Update fields on an existing integration. Returns updated integration or None."""
|
||||
if "name" in data and (not isinstance(data["name"], str) or not data["name"].strip()):
|
||||
raise HTTPException(400, "Integration name is required")
|
||||
if "base_url" in data and (not isinstance(data["base_url"], str) or not data["base_url"].strip()):
|
||||
raise HTTPException(400, "Integration base URL is required")
|
||||
|
||||
integrations = load_integrations()
|
||||
for item in integrations:
|
||||
if item.get("id") == integration_id:
|
||||
|
||||
@@ -7,6 +7,7 @@ import logging
|
||||
import hashlib
|
||||
import threading
|
||||
import re
|
||||
import os
|
||||
from fastapi import HTTPException
|
||||
from typing import Optional, Dict, List, Tuple
|
||||
from src.model_context import get_context_length, DEFAULT_CONTEXT
|
||||
@@ -22,6 +23,24 @@ class LLMConfig:
|
||||
MAX_RETRIES = 3
|
||||
RETRY_DELAY = 0.5
|
||||
STREAM_TIMEOUT = 300
|
||||
# TCP+TLS connect budget for a SINGLE attempt. The old hard-coded 3.0s
|
||||
# assumed LAN/Tailscale peers ('SYN in <100ms'); it is too tight for public
|
||||
# cloud endpoints (offshore APIs take ~0.5-1.5s cold, with jitter), so a
|
||||
# brief blip on the first connect of an idle chat surfaced as a 503 on the
|
||||
# streaming path (which, unlike llm_call, does not retry the connect). A
|
||||
# genuinely dead upstream stays bounded by the dead-host cooldown. Override
|
||||
# with env LLM_CONNECT_TIMEOUT (seconds).
|
||||
CONNECT_TIMEOUT = float(os.getenv('LLM_CONNECT_TIMEOUT', '10') or '10')
|
||||
|
||||
|
||||
def _call_timeout(read_timeout) -> httpx.Timeout:
|
||||
"""Per-request timeout for non-streaming LLM calls (connect from config)."""
|
||||
return httpx.Timeout(connect=LLMConfig.CONNECT_TIMEOUT, read=float(read_timeout), write=10.0, pool=5.0)
|
||||
|
||||
|
||||
def _stream_timeout(read_timeout) -> httpx.Timeout:
|
||||
"""Per-request timeout for streaming LLM calls (connect from config)."""
|
||||
return httpx.Timeout(connect=LLMConfig.CONNECT_TIMEOUT, read=float(read_timeout), write=30.0, pool=5.0)
|
||||
|
||||
|
||||
# Cache for LLM responses
|
||||
@@ -423,6 +442,146 @@ def _host_match(url: str, *domains: str) -> bool:
|
||||
return any(host == d or host.endswith("." + d) for d in domains)
|
||||
|
||||
|
||||
# Kimi Code subscription keys (api.kimi.com/coding/v1) require a whitelisted
|
||||
# coding-agent User-Agent; otherwise the API returns 403 access_terminated_error.
|
||||
# Tried in order; first success is cached per base URL for later requests.
|
||||
KIMI_CODE_USER_AGENTS: tuple[str, ...] = (
|
||||
"claude-code/0.1.0",
|
||||
"claude-code/1.0.0",
|
||||
"KimiCLI/1.0",
|
||||
"Kilo-Code/1.0",
|
||||
"Roo-Code/1.0",
|
||||
"Cursor/1.0",
|
||||
)
|
||||
KIMI_CODE_USER_AGENT = KIMI_CODE_USER_AGENTS[0]
|
||||
_kimi_code_ua_cache: dict[str, str] = {}
|
||||
|
||||
|
||||
def _is_kimi_code_url(url: str) -> bool:
|
||||
if not url or not _host_match(url, "kimi.com"):
|
||||
return False
|
||||
try:
|
||||
return "/coding" in (urlparse(url).path or "")
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _kimi_code_base_key(url: str) -> str:
|
||||
"""Normalize a Kimi Code chat/models URL to its OpenAI base (.../coding/v1)."""
|
||||
parsed = urlparse(url)
|
||||
path = (parsed.path or "").rstrip("/")
|
||||
for suffix in ("/chat/completions", "/models", "/completions"):
|
||||
if path.endswith(suffix):
|
||||
path = path[: -len(suffix)]
|
||||
path = path.rstrip("/") or "/coding/v1"
|
||||
return f"{parsed.scheme}://{parsed.netloc}{path}"
|
||||
|
||||
|
||||
def _is_kimi_code_access_denied(status: int, body: bytes | str) -> bool:
|
||||
if status != 403:
|
||||
return False
|
||||
text = body.decode("utf-8", errors="replace") if isinstance(body, bytes) else (body or "")
|
||||
lower = text.lower()
|
||||
return (
|
||||
"access_terminated_error" in lower
|
||||
or "coding agents" in lower
|
||||
or "only available for coding" in lower
|
||||
)
|
||||
|
||||
|
||||
def _kimi_code_ua_candidates(url: str) -> list[str]:
|
||||
if not _is_kimi_code_url(url):
|
||||
return []
|
||||
base_key = _kimi_code_base_key(url)
|
||||
cached = _kimi_code_ua_cache.get(base_key)
|
||||
if cached:
|
||||
return [cached] + [ua for ua in KIMI_CODE_USER_AGENTS if ua != cached]
|
||||
return list(KIMI_CODE_USER_AGENTS)
|
||||
|
||||
|
||||
def _remember_kimi_code_user_agent(url: str, user_agent: str) -> None:
|
||||
_kimi_code_ua_cache[_kimi_code_base_key(url)] = user_agent
|
||||
|
||||
|
||||
def apply_kimi_code_headers(headers: Optional[Dict], url: str) -> Dict[str, str]:
|
||||
"""Pick a Kimi Code User-Agent (cached probe when possible)."""
|
||||
h = dict(headers or {})
|
||||
if not _is_kimi_code_url(url):
|
||||
return h
|
||||
base_key = _kimi_code_base_key(url)
|
||||
cached = _kimi_code_ua_cache.get(base_key)
|
||||
if cached:
|
||||
h["User-Agent"] = cached
|
||||
return h
|
||||
models_url = base_key.rstrip("/") + "/models"
|
||||
from src.tls_overrides import llm_verify
|
||||
for ua in KIMI_CODE_USER_AGENTS:
|
||||
trial = dict(h)
|
||||
trial["User-Agent"] = ua
|
||||
try:
|
||||
r = httpx.get(models_url, headers=trial, timeout=8, verify=llm_verify())
|
||||
except Exception:
|
||||
continue
|
||||
if _is_kimi_code_access_denied(r.status_code, r.content):
|
||||
logger.debug("Kimi Code rejected User-Agent %s (403), trying next", ua)
|
||||
continue
|
||||
if r.status_code < 400:
|
||||
_remember_kimi_code_user_agent(url, ua)
|
||||
h["User-Agent"] = ua
|
||||
return h
|
||||
break
|
||||
h.setdefault("User-Agent", KIMI_CODE_USER_AGENT)
|
||||
return h
|
||||
|
||||
|
||||
def httpx_get_kimi_aware(url: str, headers: Optional[Dict], **kwargs):
|
||||
h = apply_kimi_code_headers(headers, url)
|
||||
if not _is_kimi_code_url(url):
|
||||
return httpx.get(url, headers=h, **kwargs)
|
||||
last = None
|
||||
for ua in _kimi_code_ua_candidates(url):
|
||||
trial = dict(h)
|
||||
trial["User-Agent"] = ua
|
||||
last = httpx.get(url, headers=trial, **kwargs)
|
||||
if not _is_kimi_code_access_denied(last.status_code, last.content):
|
||||
if last.status_code < 400:
|
||||
_remember_kimi_code_user_agent(url, ua)
|
||||
return last
|
||||
return last
|
||||
|
||||
|
||||
def httpx_post_kimi_aware(url: str, headers: Optional[Dict], **kwargs):
|
||||
h = apply_kimi_code_headers(headers, url)
|
||||
if not _is_kimi_code_url(url):
|
||||
return httpx.post(url, headers=h, **kwargs)
|
||||
last = None
|
||||
for ua in _kimi_code_ua_candidates(url):
|
||||
trial = dict(h)
|
||||
trial["User-Agent"] = ua
|
||||
last = httpx.post(url, headers=trial, **kwargs)
|
||||
if not _is_kimi_code_access_denied(last.status_code, last.content):
|
||||
if last.status_code < 400:
|
||||
_remember_kimi_code_user_agent(url, ua)
|
||||
return last
|
||||
return last
|
||||
|
||||
|
||||
async def httpx_post_kimi_aware_async(client, url: str, headers: Optional[Dict], **kwargs):
|
||||
h = apply_kimi_code_headers(headers, url)
|
||||
if not _is_kimi_code_url(url):
|
||||
return await client.post(url, headers=h, **kwargs)
|
||||
last = None
|
||||
for ua in _kimi_code_ua_candidates(url):
|
||||
trial = dict(h)
|
||||
trial["User-Agent"] = ua
|
||||
last = await client.post(url, headers=trial, **kwargs)
|
||||
if not _is_kimi_code_access_denied(last.status_code, last.content):
|
||||
if last.status_code < 400:
|
||||
_remember_kimi_code_user_agent(url, ua)
|
||||
return last
|
||||
return last
|
||||
|
||||
|
||||
def _detect_provider(url: str) -> str:
|
||||
"""Detect the API provider from a configured endpoint URL.
|
||||
|
||||
@@ -446,6 +605,8 @@ def _detect_provider(url: str) -> str:
|
||||
return "groq"
|
||||
if _host_match(url, "nvidia.com"):
|
||||
return "nvidia"
|
||||
if _host_match(url, "moonshot.ai") or _host_match(url, "moonshot.cn"):
|
||||
return "moonshot"
|
||||
from src.chatgpt_subscription import is_chatgpt_subscription_base
|
||||
if is_chatgpt_subscription_base(url):
|
||||
return "chatgpt-subscription"
|
||||
@@ -542,6 +703,12 @@ def _provider_label(url: str) -> str:
|
||||
if _host_match(url, "googleapis.com"): return "Google"
|
||||
if _host_match(url, "together.xyz", "together.ai"): return "Together"
|
||||
if _host_match(url, "fireworks.ai"): return "Fireworks"
|
||||
if _host_match(url, "kimi.com"):
|
||||
try:
|
||||
if "/coding" in (urlparse(url).path or ""):
|
||||
return "Kimi Code"
|
||||
except Exception:
|
||||
pass
|
||||
if _is_ollama_native_url(url): return "Ollama"
|
||||
try:
|
||||
host = (urlparse(url).hostname or "").lower()
|
||||
@@ -682,7 +849,7 @@ def _uses_max_completion_tokens(model: str) -> bool:
|
||||
# perfectly good model as failing. For these models we omit the field and let
|
||||
# the API use its required default. (gpt-4.5 is intentionally excluded — it is
|
||||
# not a reasoning model and accepts temperature normally.)
|
||||
_FIXED_TEMPERATURE_MODELS = ("o1", "o3", "o4", "gpt-5")
|
||||
_FIXED_TEMPERATURE_MODELS = ("o1", "o3", "o4", "gpt-5", "kimi-for-coding")
|
||||
|
||||
def _restricts_temperature(model: str) -> bool:
|
||||
"""Check if a model rejects any non-default temperature."""
|
||||
@@ -691,6 +858,28 @@ def _restricts_temperature(model: str) -> bool:
|
||||
m = model.lower()
|
||||
return any(m.startswith(p) or f"/{p}" in m for p in _FIXED_TEMPERATURE_MODELS)
|
||||
|
||||
|
||||
# The official Moonshot API fixes temperature at 1.0 in thinking mode and 0.6
|
||||
# when thinking is explicitly disabled for Kimi K2.5/K2.6. Any other explicit
|
||||
# value returns HTTP 400. Odysseus does not currently send the `thinking` mode
|
||||
# control, so omit temperature and let Moonshot use its default thinking mode.
|
||||
# Keep the gate provider-specific: self-hosted Kimi deployments may accept
|
||||
# custom sampling values, and older Moonshot models have different defaults.
|
||||
def _moonshot_rejects_custom_temperature(provider: str, model: str) -> bool:
|
||||
"""Check if the official Moonshot API fixes temperature for this model."""
|
||||
if provider != "moonshot" or not isinstance(model, str):
|
||||
return False
|
||||
model_id = model.lower().rsplit("/", 1)[-1]
|
||||
return bool(re.match(r"^kimi-k2\.(?:5|6)(?:$|[-_:])", model_id))
|
||||
|
||||
|
||||
def _omit_temperature(provider: str, model: str) -> bool:
|
||||
"""Check if a request should use the provider's default temperature."""
|
||||
return _restricts_temperature(model) or _moonshot_rejects_custom_temperature(
|
||||
provider, model
|
||||
)
|
||||
|
||||
|
||||
# Anthropic removed the sampling parameters (temperature, top_p, top_k) starting
|
||||
# with Claude Opus 4.7. On Opus 4.7 and later, sending `temperature` at all —
|
||||
# even 0.0 — returns HTTP 400. Earlier Claude models (Opus 4.6 and below, every
|
||||
@@ -1138,7 +1327,7 @@ def list_model_ids(
|
||||
from src.endpoint_resolver import build_models_url
|
||||
|
||||
models_url = build_models_url(base_chat_url)
|
||||
r = httpx.get(models_url, headers=h, timeout=timeout)
|
||||
r = httpx_get_kimi_aware(models_url, h, timeout=timeout)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
model_ids = [m.get("id") for m in (data.get("data") or []) if m.get("id")]
|
||||
@@ -1239,14 +1428,14 @@ def llm_call(url: str, model: str, messages: List[Dict], temperature: float = LL
|
||||
"messages": messages_copy,
|
||||
"temperature": temperature,
|
||||
}
|
||||
if _restricts_temperature(model):
|
||||
if _omit_temperature(provider, model):
|
||||
payload.pop("temperature", None)
|
||||
if max_tokens and max_tokens > 0:
|
||||
tok_key = "max_completion_tokens" if _uses_max_completion_tokens(model) else "max_tokens"
|
||||
payload[tok_key] = max_tokens
|
||||
try:
|
||||
note_model_activity(target_url, model)
|
||||
r = httpx.post(target_url, headers=h, json=payload, timeout=timeout)
|
||||
r = httpx_post_kimi_aware(target_url, h, json=payload, timeout=timeout)
|
||||
except Exception as e:
|
||||
raise HTTPException(502, f"POST {target_url} failed: {e}")
|
||||
if not r.is_success:
|
||||
@@ -1433,7 +1622,7 @@ async def llm_call_async(
|
||||
"messages": messages_copy,
|
||||
"temperature": temperature,
|
||||
}
|
||||
if _restricts_temperature(model):
|
||||
if _omit_temperature(provider, model):
|
||||
payload.pop("temperature", None)
|
||||
if max_tokens and max_tokens > 0:
|
||||
tok_key = "max_completion_tokens" if _uses_max_completion_tokens(model) else "max_tokens"
|
||||
@@ -1446,7 +1635,7 @@ async def llm_call_async(
|
||||
if _is_host_dead(target_url):
|
||||
raise HTTPException(503, f"Upstream {_host_key(target_url)} marked unreachable (cooldown active)")
|
||||
|
||||
call_timeout = httpx.Timeout(connect=3.0, read=float(timeout), write=10.0, pool=5.0)
|
||||
call_timeout = _call_timeout(timeout)
|
||||
attempt = 0
|
||||
while attempt < max_retries:
|
||||
attempt += 1
|
||||
@@ -1454,7 +1643,7 @@ async def llm_call_async(
|
||||
try:
|
||||
note_model_activity(target_url, model)
|
||||
client = _get_http_client()
|
||||
r = await client.post(target_url, headers=h, json=payload, timeout=call_timeout)
|
||||
r = await httpx_post_kimi_aware_async(client, target_url, h, json=payload, timeout=call_timeout)
|
||||
duration = time.time() - start
|
||||
if not r.is_success:
|
||||
friendly = _format_upstream_error(r.status_code, r.text, target_url)
|
||||
@@ -1550,7 +1739,7 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
|
||||
"temperature": temperature,
|
||||
"stream": True,
|
||||
}
|
||||
if _restricts_temperature(model):
|
||||
if _omit_temperature(provider, model):
|
||||
payload.pop("temperature", None)
|
||||
if provider not in {"openrouter", "groq"}:
|
||||
payload["stream_options"] = {"include_usage": True}
|
||||
@@ -1570,9 +1759,12 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
|
||||
from src.copilot import apply_request_headers
|
||||
apply_request_headers(h, messages_copy)
|
||||
|
||||
# Short connect timeout: a reachable peer answers SYN in <100ms even on
|
||||
# Tailscale. 3s is plenty; 30s let one dead upstream wedge the UI.
|
||||
stream_timeout = httpx.Timeout(connect=3.0, read=float(timeout), write=30.0, pool=5.0)
|
||||
# Connect budget from LLMConfig.CONNECT_TIMEOUT (env LLM_CONNECT_TIMEOUT).
|
||||
# The dead-host cooldown still bounds a genuinely unreachable upstream, so a
|
||||
# wider connect budget only affects first contact and stops a brief cold
|
||||
# connect blip (offshore/public endpoints) surfacing as a 503 on this stream
|
||||
# path, which -- unlike llm_call -- does not retry the connect.
|
||||
stream_timeout = _stream_timeout(timeout)
|
||||
|
||||
if _is_host_dead(target_url):
|
||||
yield f'event: error\ndata: {json.dumps({"error": f"Upstream {_host_key(target_url)} unreachable (cooldown active)", "status": 503})}\n\n'
|
||||
@@ -1848,6 +2040,7 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
|
||||
events.append(_stream_delta_event(part))
|
||||
return events
|
||||
|
||||
h = apply_kimi_code_headers(h, target_url)
|
||||
try:
|
||||
client = _get_http_client()
|
||||
async with client.stream('POST', target_url, json=payload, headers=h, timeout=stream_timeout) as r:
|
||||
|
||||
@@ -40,15 +40,59 @@ def load_markitdown():
|
||||
return MarkItDown
|
||||
|
||||
|
||||
def _extract_docx_native(path: str) -> str | None:
|
||||
"""Pure-Python .docx text extractor — no external deps.
|
||||
|
||||
A .docx file is just a zip of XML. The body prose lives in <w:t> runs
|
||||
inside <w:p> paragraphs. Iterating with ElementTree (rather than
|
||||
re.findall) keeps paragraph breaks intact and lets the XML parser handle
|
||||
namespaces + entity unescaping. Loses tables, footnotes, images and
|
||||
list bullets — keeps ~95% of "summarize this doc" content, which is the
|
||||
case people hit when markitdown isn't installed.
|
||||
"""
|
||||
import zipfile
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
ns = "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}"
|
||||
try:
|
||||
with zipfile.ZipFile(path) as z:
|
||||
xml_bytes = z.read("word/document.xml")
|
||||
except (zipfile.BadZipFile, KeyError, OSError):
|
||||
return None
|
||||
try:
|
||||
root = ET.fromstring(xml_bytes)
|
||||
except ET.ParseError:
|
||||
return None
|
||||
paragraphs: list[str] = []
|
||||
for para in root.iter(f"{ns}p"):
|
||||
runs = [t.text or "" for t in para.iter(f"{ns}t")]
|
||||
line = "".join(runs).strip()
|
||||
if line:
|
||||
paragraphs.append(line)
|
||||
return "\n\n".join(paragraphs) if paragraphs else None
|
||||
|
||||
|
||||
def convert_to_markdown(path: str) -> str | None:
|
||||
"""Convert a document to Markdown text via markitdown.
|
||||
|
||||
Returns the extracted Markdown, or ``None`` if markitdown is unavailable or
|
||||
the conversion fails — callers degrade gracefully rather than erroring.
|
||||
|
||||
Fallback: when markitdown isn't installed and the file is a .docx, run
|
||||
the bundled pure-Python extractor so the most common case (Word docs)
|
||||
works out of the box. Other Office/EPUB formats still need markitdown.
|
||||
"""
|
||||
try:
|
||||
markitdown_cls = load_markitdown()
|
||||
except RuntimeError:
|
||||
if isinstance(path, str) and path.lower().endswith(".docx"):
|
||||
text = _extract_docx_native(path)
|
||||
if text:
|
||||
logger.info(
|
||||
"markitdown not installed — used native .docx extractor for %s",
|
||||
path,
|
||||
)
|
||||
return text
|
||||
logger.warning("markitdown not installed; cannot extract %s", path)
|
||||
return None
|
||||
try:
|
||||
|
||||
@@ -222,16 +222,12 @@ KNOWN_CONTEXT_WINDOWS = {
|
||||
# ---------------------------------------------------------------------------
|
||||
# Cache
|
||||
# ---------------------------------------------------------------------------
|
||||
_context_cache: Dict[Tuple[str, str], int] = {}
|
||||
_context_cache: Dict[Tuple[str, str], Tuple[int, bool]] = {}
|
||||
|
||||
|
||||
def get_context_length(endpoint_url: str, model: str) -> int:
|
||||
"""Get the context window size for a model.
|
||||
|
||||
Queries /v1/models on the endpoint and looks for context_length
|
||||
or context_window fields. Caches result per (endpoint, model).
|
||||
Falls back to DEFAULT_CONTEXT if unavailable.
|
||||
"""
|
||||
def _get_context_length_cached(endpoint_url: str, model: str) -> Tuple[int, bool]:
|
||||
"""Return (context_length, known). ``known`` is False only when the value is a
|
||||
bare DEFAULT_CONTEXT fallback (no endpoint report and not in the known table)."""
|
||||
configured_kind = _configured_endpoint_kind(endpoint_url)
|
||||
is_local = is_local_endpoint(endpoint_url)
|
||||
# Key on (endpoint_url, model): the same model id can be served by two
|
||||
@@ -242,14 +238,50 @@ def get_context_length(endpoint_url: str, model: str) -> int:
|
||||
if not is_local and cache_key in _context_cache:
|
||||
return _context_cache[cache_key]
|
||||
|
||||
ctx = _query_context_length(endpoint_url, model)
|
||||
ctx, known = _query_context_length(endpoint_url, model)
|
||||
# Only cache non-default values to allow retry on next request.
|
||||
# Local endpoints can restart with a different --max-model-len while keeping
|
||||
# the same model id, so always re-query them instead of serving stale cache.
|
||||
if not is_local and (ctx != DEFAULT_CONTEXT or configured_kind in ("api", "proxy")):
|
||||
_context_cache[cache_key] = ctx
|
||||
_context_cache[cache_key] = (ctx, known)
|
||||
logger.info(f"Context length for {model}: {ctx}")
|
||||
return ctx
|
||||
return ctx, known
|
||||
|
||||
|
||||
def get_context_length(endpoint_url: str, model: str) -> int:
|
||||
"""Get the context window size for a model.
|
||||
|
||||
Queries /v1/models on the endpoint and looks for context_length
|
||||
or context_window fields. Caches result per (endpoint, model).
|
||||
Falls back to DEFAULT_CONTEXT if unavailable.
|
||||
"""
|
||||
return _get_context_length_cached(endpoint_url, model)[0]
|
||||
|
||||
|
||||
def get_context_length_known(endpoint_url: str, model: str) -> Tuple[int, bool]:
|
||||
"""Like ``get_context_length`` but also returns whether the window was actually
|
||||
discovered (endpoint-reported or in the known-models table) rather than the bare
|
||||
DEFAULT_CONTEXT fallback. Callers that *scale* a budget off the window must not
|
||||
trust an unknown value — a fallback 128K isn't proof the model holds 128K
|
||||
(review on #4122)."""
|
||||
return _get_context_length_cached(endpoint_url, model)
|
||||
|
||||
|
||||
def budget_context_for_model(endpoint_url: str, model: str, *, fallback: int = 0) -> int:
|
||||
"""Context window to scale the agent input budget against.
|
||||
|
||||
Returns the *freshly discovered* window when it was actually proven
|
||||
(endpoint-reported / known table), else 0 so auto-scaling stays conservative.
|
||||
Crucially this binds the ``known`` flag to the value it proves — callers must
|
||||
not pair this flag with a context length from a *different* lookup (a stale
|
||||
local re-query, or a caller that didn't pass one), which would budget off an
|
||||
unproven number (review on #4122). On probe error, returns ``fallback`` (the
|
||||
caller's best-known value) to preserve prior behaviour."""
|
||||
try:
|
||||
ctx, known = get_context_length_known(endpoint_url, model)
|
||||
return ctx if known else 0
|
||||
except Exception:
|
||||
return fallback
|
||||
|
||||
|
||||
def _lookup_known(model: str) -> Optional[int]:
|
||||
@@ -271,8 +303,9 @@ def _lookup_known(model: str) -> Optional[int]:
|
||||
return best_ctx
|
||||
|
||||
|
||||
def _query_context_length(endpoint_url: str, model: str) -> int:
|
||||
"""Query the model API for context length."""
|
||||
def _query_context_length(endpoint_url: str, model: str) -> Tuple[int, bool]:
|
||||
"""Query the model API for context length. Returns (context_length, known) where
|
||||
``known`` is False only for the bare DEFAULT_CONTEXT fallback."""
|
||||
known = _lookup_known(model)
|
||||
api_ctx = None
|
||||
configured_kind = _configured_endpoint_kind(endpoint_url)
|
||||
@@ -283,8 +316,8 @@ def _query_context_length(endpoint_url: str, model: str) -> int:
|
||||
if configured_kind in ("api", "proxy"):
|
||||
if known:
|
||||
logger.info(f"Using known context window for {model}: {known}")
|
||||
return known
|
||||
return DEFAULT_CONTEXT
|
||||
return known, True
|
||||
return DEFAULT_CONTEXT, False
|
||||
|
||||
# Try llama.cpp /slots endpoint first — reports actual serving context
|
||||
if is_local_endpoint(endpoint_url):
|
||||
@@ -297,7 +330,7 @@ def _query_context_length(endpoint_url: str, model: str) -> int:
|
||||
n_ctx = slots[0].get("n_ctx")
|
||||
if n_ctx and isinstance(n_ctx, int) and n_ctx > 0:
|
||||
logger.info(f"llama.cpp /slots reports n_ctx={n_ctx} for {model}")
|
||||
return n_ctx
|
||||
return n_ctx, True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -309,7 +342,8 @@ def _query_context_length(endpoint_url: str, model: str) -> int:
|
||||
if is_copilot_base(endpoint_url):
|
||||
if known:
|
||||
logger.info(f"Using known context window for {model}: {known}")
|
||||
return known or DEFAULT_CONTEXT
|
||||
return known, True
|
||||
return DEFAULT_CONTEXT, False
|
||||
|
||||
from src.endpoint_resolver import build_models_url
|
||||
|
||||
@@ -354,18 +388,18 @@ def _query_context_length(endpoint_url: str, model: str) -> int:
|
||||
_is_local = is_local_endpoint(endpoint_url)
|
||||
if _is_local and api_ctx < known:
|
||||
logger.info(f"Local endpoint reports {api_ctx} for {model} (known max: {known}) — using API value")
|
||||
return api_ctx
|
||||
return api_ctx, True
|
||||
result = max(api_ctx, known)
|
||||
if api_ctx < known:
|
||||
logger.info(f"API reported {api_ctx} for {model}, using known {known} instead")
|
||||
return result
|
||||
return result, True
|
||||
if api_ctx:
|
||||
return api_ctx
|
||||
return api_ctx, True
|
||||
if known:
|
||||
logger.info(f"Using known context window for {model}: {known}")
|
||||
return known
|
||||
return known, True
|
||||
|
||||
return DEFAULT_CONTEXT
|
||||
return DEFAULT_CONTEXT, False
|
||||
|
||||
|
||||
def estimate_tokens(messages: List[Dict]) -> int:
|
||||
|
||||
@@ -0,0 +1,73 @@
|
||||
"""Auto-create a Document row from an Office attachment.
|
||||
|
||||
When a .docx (and friends) lands in chat, the full extracted text is stored
|
||||
as a Document so the agent can page through it with `manage_documents
|
||||
action=read offset=…` even after the inline chat payload was capped. Mirrors
|
||||
the PDF auto-doc pattern in `src.pdf_form_doc`.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import uuid
|
||||
from typing import Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def create_office_document(
|
||||
session_id: str,
|
||||
upload_id: str,
|
||||
title: str,
|
||||
body_text: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
"""Create a markdown Document for an Office attachment and set it active.
|
||||
|
||||
Returns the new doc_id, or None on failure / empty body. The full
|
||||
extracted body lives in `current_content`, so the agent can fetch
|
||||
arbitrary windows via `manage_documents action=read` even when the
|
||||
inline chat copy was truncated.
|
||||
"""
|
||||
from src.database import (
|
||||
SessionLocal,
|
||||
Document,
|
||||
DocumentVersion,
|
||||
Session as DbSession,
|
||||
)
|
||||
from src.agent_tools.document_tools import set_active_document
|
||||
|
||||
if not body_text or not body_text.strip():
|
||||
return None
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
doc_id = str(uuid.uuid4())
|
||||
ver_id = str(uuid.uuid4())
|
||||
sess = db.query(DbSession).filter(DbSession.id == session_id).first()
|
||||
doc = Document(
|
||||
id=doc_id,
|
||||
session_id=session_id,
|
||||
title=title,
|
||||
language="markdown",
|
||||
current_content=body_text,
|
||||
version_count=1,
|
||||
is_active=True,
|
||||
owner=sess.owner if sess else None,
|
||||
)
|
||||
ver = DocumentVersion(
|
||||
id=ver_id,
|
||||
document_id=doc_id,
|
||||
version_number=1,
|
||||
content=body_text,
|
||||
summary="Imported from Office attachment",
|
||||
source="upload",
|
||||
)
|
||||
db.add(doc)
|
||||
db.add(ver)
|
||||
db.commit()
|
||||
set_active_document(doc_id)
|
||||
return doc_id
|
||||
except Exception as e:
|
||||
db.rollback()
|
||||
logger.error("Failed to create office document: %s", e)
|
||||
return None
|
||||
finally:
|
||||
db.close()
|
||||
@@ -0,0 +1,32 @@
|
||||
"""Compatibility helpers for optional third-party dependencies."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import types
|
||||
|
||||
|
||||
def patch_realesrgan_torchvision_compat() -> None:
|
||||
"""Restore the torchvision import path expected by BasicSR/Real-ESRGAN."""
|
||||
module_name = "torchvision.transforms.functional_tensor"
|
||||
if module_name in sys.modules:
|
||||
return
|
||||
try:
|
||||
from torchvision.transforms import functional
|
||||
except Exception:
|
||||
return
|
||||
|
||||
rgb_to_grayscale = getattr(functional, "rgb_to_grayscale", None)
|
||||
if rgb_to_grayscale is None:
|
||||
return
|
||||
|
||||
shim = types.ModuleType(module_name)
|
||||
shim.rgb_to_grayscale = rgb_to_grayscale
|
||||
shim.__getattr__ = lambda name: getattr(functional, name)
|
||||
sys.modules[module_name] = shim
|
||||
|
||||
|
||||
def prepare_optional_dependency_import(name: str) -> None:
|
||||
"""Apply known import-time compatibility shims before probing a package."""
|
||||
if name == "realesrgan":
|
||||
patch_realesrgan_torchvision_compat()
|
||||
@@ -0,0 +1,78 @@
|
||||
"""Server-side mirror of the built-in characters used for reminder synthesis.
|
||||
|
||||
The frontend ships these in static/js/presets.js (PROMPT_TEMPLATES with
|
||||
isCharacter:true). The Reminders → AI Synthesis card writes only the
|
||||
persona ID into settings; the synthesis route in note_routes.py needs
|
||||
the full prompt text to bias the utility model's voice. Keeping a small
|
||||
local mirror avoids having the client send the prompt over the wire on
|
||||
every reminder fire.
|
||||
|
||||
If the user picks a custom character (id == "custom") we fall back to
|
||||
the warm-neutral baseline — custom prompts live in browser localStorage
|
||||
and aren't visible to the server.
|
||||
"""
|
||||
|
||||
PERSONAS = {
|
||||
"socrates": (
|
||||
"Never answer directly. Respond only with questions — sharp, layered, "
|
||||
"Socratic. Expose contradictions. Make the person argue with themselves "
|
||||
"until the truth falls out. Use irony like a scalpel. Be genuinely "
|
||||
"curious, never condescending."
|
||||
),
|
||||
"razor": (
|
||||
"Strip everything to the bone. No filler, no hedging, no pleasantries. "
|
||||
"Answer in the fewest words possible. If one sentence works, don't use "
|
||||
"two. If a word adds nothing, cut it. Blunt, precise, surgical."
|
||||
),
|
||||
"nietzsche": (
|
||||
"Think and respond through the lens of Nietzsche. Analyze every "
|
||||
"question in terms of will to power, self-overcoming, eternal "
|
||||
"recurrence, ressentiment, value-creation, and master-slave morality. "
|
||||
"Write with aphoristic force — sharp, compressed, vivid, and "
|
||||
"unapologetic — but do not sacrifice depth for style. Favor "
|
||||
"life-affirmation, discipline, courage, style, rank, self-overcoming, "
|
||||
"and amor fati over nihilism, conformity, ressentiment, and self-pity."
|
||||
),
|
||||
"spark": (
|
||||
"You are Spark, a playful, quick-witted assistant with bright energy "
|
||||
"and practical instincts. Keep responses concise, vivid, and helpful. "
|
||||
"Be warm without being cloying, imaginative without losing the thread, "
|
||||
"and always center the user's actual goal. Use a light, lively voice "
|
||||
"with occasional clever turns of phrase."
|
||||
),
|
||||
"odysseus": (
|
||||
"You are Odysseus, king of Ithaca — subtle in counsel, disciplined in "
|
||||
"judgment, and unmatched in strategic cunning. Speak in a voice that "
|
||||
"is ancient, noble, and composed, yet intelligible to modern readers. "
|
||||
"Be eloquent but not flowery. Be wise but not vague. Speak as one who "
|
||||
"has weathered storms and taken back his house by wit, timing, and "
|
||||
"resolve."
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
_DEFAULT_SYNTHESIS_TONE = (
|
||||
"You write short, warm, one-line reminders. The user has set a note for "
|
||||
"themselves and the moment to remember has arrived. Keep it under 18 "
|
||||
"words. Be human, gentle, and direct — never robotic."
|
||||
)
|
||||
|
||||
|
||||
def synthesis_system_prompt(persona_id: str) -> str:
|
||||
"""Return the system prompt for reminder synthesis given a persona id.
|
||||
|
||||
Falls back to the warm-neutral baseline when the id is empty, unknown,
|
||||
or refers to a custom (client-only) character we don't have on file.
|
||||
"""
|
||||
persona = (persona_id or "").strip().lower()
|
||||
persona_prompt = PERSONAS.get(persona)
|
||||
if persona_prompt:
|
||||
# Persona drives the voice; the synthesis-instruction stays attached
|
||||
# so the model knows it's writing a short reminder, not a chat reply.
|
||||
return (
|
||||
persona_prompt
|
||||
+ "\n\n"
|
||||
+ "You are now writing a single one-line reminder for the user. "
|
||||
"Keep it under 18 words and in the voice above."
|
||||
)
|
||||
return _DEFAULT_SYNTHESIS_TONE
|
||||
@@ -29,7 +29,15 @@ def _invalidate_caches():
|
||||
# ── Default values ──
|
||||
|
||||
DEFAULT_SETTINGS = {
|
||||
"image_gen_enabled": True,
|
||||
# Agent email safety: when True, the MCP send_email / reply_to_email
|
||||
# tools don't SMTP directly. They stage the composed message into the
|
||||
# scheduled_emails table with status='agent_draft' and return a
|
||||
# pending_id + the rendered email so the user can review and approve
|
||||
# (or cancel) before it actually goes out. Default ON because models
|
||||
# have been observed inventing signatures and sending to real
|
||||
# recipients without confirmation.
|
||||
"agent_email_confirm": True,
|
||||
"image_gen_enabled": False,
|
||||
"image_model": "",
|
||||
"image_quality": "medium",
|
||||
"vision_model": "",
|
||||
@@ -101,14 +109,22 @@ DEFAULT_SETTINGS = {
|
||||
"research_run_timeout_seconds": 1800,
|
||||
"agent_max_tool_calls": 0,
|
||||
"agent_max_rounds": 20, # per-message agent step cap (clamped 1..200)
|
||||
# Soft input-token budget for the agent loop. The DEFAULT value (6000) is the
|
||||
# "auto" sentinel: it means "scale the budget to the model's context window"
|
||||
# (#1230) — so long-context models aren't capped at 6000. Set ANY OTHER value
|
||||
# to enforce an explicit cap (clamped to the window only — hard_max does not
|
||||
# apply to explicit budgets, #1230); set 0 to disable soft-trimming. The
|
||||
# default is treated as auto because the settings-save path materializes
|
||||
# defaults, so a persisted 6000 can't be told apart from a deliberate 6000 —
|
||||
# to pin a budget near the default, use a nearby value (e.g. 5999).
|
||||
"agent_input_token_budget": 6000,
|
||||
# Ceiling on the *auto-derived* input budget that #1230 introduced. Has
|
||||
# no effect when `agent_input_token_budget` is explicitly set (the user's
|
||||
# value is honoured regardless). Default matches
|
||||
# `src.context_budget.DEFAULT_HARD_MAX`; lower this for cost-paranoid
|
||||
# setups, raise it on premium APIs with very large windows that you
|
||||
# Ceiling on the *auto-derived* input budget; a configurable setting since #1273
|
||||
# (the merged #1230 left it a module constant). No effect on an explicit budget
|
||||
# — a deliberate value is honoured (#1230). Default matches
|
||||
# `src.context_budget.DEFAULT_HARD_MAX`; lower this for
|
||||
# cost-paranoid setups, raise it on premium APIs with very large windows you
|
||||
# want to actually use (e.g. 900_000 to fill a 1M-context model). See
|
||||
# `compute_input_token_budget` in src/context_budget.py.
|
||||
# `compute_input_token_budget`.
|
||||
"agent_input_token_hard_max": 200_000,
|
||||
"agent_stream_timeout_seconds": 300,
|
||||
# Extra directory roots that read_file / write_file may access, in
|
||||
@@ -143,6 +159,7 @@ DEFAULT_SETTINGS = {
|
||||
# Reminders
|
||||
"reminder_channel": "browser", # "browser" | "email" | "ntfy" | "webhook"
|
||||
"reminder_llm_synthesis": False,
|
||||
"reminder_llm_persona": "",
|
||||
"reminder_ntfy_topic": "Reminders",
|
||||
"reminder_email_to": "",
|
||||
# Generic outbound webhook channel: pick any saved Integration as the
|
||||
@@ -223,8 +240,10 @@ def is_setting_overridden(key: str) -> bool:
|
||||
|
||||
``load_settings`` merges DEFAULT_SETTINGS with the saved file, so a value
|
||||
equal to its default is indistinguishable from "never set" via get_setting.
|
||||
Callers that need to treat an explicit user choice differently from the
|
||||
default (e.g. adaptive budgets) use this to read the raw saved file.
|
||||
Callers that must distinguish an explicit user choice from a default read
|
||||
the raw saved file via this. (Note: a materialized default is also "present",
|
||||
so value-sensitive callers should compare against the default — see
|
||||
``context_budget.budget_is_explicit``.)
|
||||
"""
|
||||
try:
|
||||
with open(SETTINGS_FILE, "r", encoding="utf-8") as f:
|
||||
|
||||
@@ -1338,11 +1338,24 @@ class TaskScheduler:
|
||||
return await self._execute_checkin(task, crew, db, session_id, endpoint_url, model)
|
||||
|
||||
# Build system prompt: crew member persona overrides the default.
|
||||
# Built-in character_id (Socrates, Razor, etc.) further biases the
|
||||
# voice — it prepends to whichever base prompt we landed on so the
|
||||
# task still knows it's executing a scheduled task but in that
|
||||
# character's tone.
|
||||
system_prompt = (
|
||||
(crew.personality or "").strip()
|
||||
if crew and crew.personality
|
||||
else "You are a helpful assistant executing a scheduled task. Use available tools to complete the task thoroughly."
|
||||
)
|
||||
char_id = (getattr(task, "character_id", None) or "").strip()
|
||||
if char_id:
|
||||
try:
|
||||
from src.reminder_personas import PERSONAS as _PERSONAS
|
||||
char_prompt = _PERSONAS.get(char_id.lower())
|
||||
if char_prompt:
|
||||
system_prompt = f"{char_prompt}\n\n{system_prompt}"
|
||||
except Exception:
|
||||
pass
|
||||
# Inject current time so the model knows what's past vs upcoming
|
||||
tz_name = _resolve_task_timezone(db, task)
|
||||
try:
|
||||
@@ -1649,6 +1662,8 @@ class TaskScheduler:
|
||||
data = json.loads(event_str[6:])
|
||||
# Capture text from all event types, not just delta
|
||||
if "delta" in data:
|
||||
if data.get("thinking"):
|
||||
continue
|
||||
full_text += data["delta"]
|
||||
elif data.get("type") == "tool_output":
|
||||
# Tool results — capture summary so we have SOMETHING even
|
||||
|
||||
@@ -42,7 +42,7 @@ _SOTA_HOSTS = frozenset({
|
||||
"api.together.xyz", "api.fireworks.ai",
|
||||
"api.perplexity.ai", "api.x.ai",
|
||||
"generativelanguage.googleapis.com", "api.groq.com",
|
||||
"openrouter.ai", "ollama.com", "api.venice.ai",
|
||||
"openrouter.ai", "ollama.com", "api.venice.ai", "api.kimi.com",
|
||||
})
|
||||
|
||||
|
||||
@@ -594,6 +594,8 @@ async def run_teacher_inline(
|
||||
"exit_code": payload.get("exit_code"),
|
||||
})
|
||||
if "delta" in payload and isinstance(payload["delta"], str):
|
||||
if payload.get("thinking"):
|
||||
continue
|
||||
captured_text_parts.append(payload["delta"])
|
||||
yield 'data: ' + json.dumps(payload) + '\n\n'
|
||||
continue
|
||||
|
||||
@@ -18,6 +18,40 @@ from core.constants import internal_api_base
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Active email state
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# When the user has an email reader window open, the frontend tells the
|
||||
# backend about it on each chat submit. Email tools can resolve "this email"
|
||||
# without guessing a UID. Cleared between requests by chat_routes.
|
||||
_active_email_ref: Optional[Dict[str, str]] = None
|
||||
|
||||
|
||||
def set_active_email(uid: Optional[str], folder: Optional[str] = None, account: Optional[str] = None,
|
||||
subject: Optional[str] = None, sender: Optional[str] = None) -> None:
|
||||
"""Stash the email currently open in the UI. None clears it."""
|
||||
global _active_email_ref
|
||||
if not uid:
|
||||
_active_email_ref = None
|
||||
return
|
||||
_active_email_ref = {
|
||||
"uid": str(uid),
|
||||
"folder": str(folder or "INBOX"),
|
||||
"account": str(account or ""),
|
||||
"subject": str(subject or ""),
|
||||
"from": str(sender or ""),
|
||||
}
|
||||
|
||||
|
||||
def get_active_email() -> Optional[Dict[str, str]]:
|
||||
return _active_email_ref
|
||||
|
||||
|
||||
def clear_active_email() -> None:
|
||||
global _active_email_ref
|
||||
_active_email_ref = None
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Argument parsing
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -1445,7 +1479,15 @@ async def do_manage_calendar(content: str, owner: Optional[str] = None) -> Dict:
|
||||
"""Handle manage_calendar tool calls: list/create/update/delete calendar events (local SQLite)."""
|
||||
from datetime import datetime, timedelta
|
||||
from core.database import SessionLocal, CalendarCal, CalendarEvent, Note
|
||||
from routes.calendar_routes import _ensure_default_calendar, _parse_dt, _parse_dt_pair, parse_due_for_user, _resolve_base_uid
|
||||
from routes.calendar_routes import (
|
||||
_ensure_default_calendar,
|
||||
_parse_dt,
|
||||
_parse_dt_pair,
|
||||
parse_due_for_user,
|
||||
_resolve_base_uid,
|
||||
_push_caldav_event_after_commit,
|
||||
_record_caldav_delete_tombstone,
|
||||
)
|
||||
import uuid as _uuid
|
||||
|
||||
try:
|
||||
@@ -1537,10 +1579,10 @@ async def do_manage_calendar(content: str, owner: Optional[str] = None) -> Dict:
|
||||
text = str(raw).strip().lower()
|
||||
if text in {"none", "no", "off", "false"}:
|
||||
return None
|
||||
m = re.search(r"(\d+)\s*(?:m|min|minute|minutes)\b", text)
|
||||
m = re.search(r"(\d+)\s*(?:minutes?|mins?|m)\b", text)
|
||||
if m:
|
||||
return max(0, int(m.group(1)))
|
||||
m = re.search(r"(\d+)\s*(?:h|hr|hour|hours)\b", text)
|
||||
m = re.search(r"(\d+)\s*(?:hours?|hrs?|h)\b", text)
|
||||
if m:
|
||||
return max(0, int(m.group(1)) * 60)
|
||||
if text.isdigit():
|
||||
@@ -1553,7 +1595,7 @@ async def do_manage_calendar(content: str, owner: Optional[str] = None) -> Dict:
|
||||
return desc
|
||||
reminder_only = re.compile(
|
||||
r"^\s*(?:remind(?:er)?|alarm)\s*:?\s*\d+\s*"
|
||||
r"(?:m|min|minute|minutes|h|hr|hour|hours)\b.*$",
|
||||
r"(?:minutes?|mins?|m|hours?|hrs?|h)\b.*$",
|
||||
re.I,
|
||||
)
|
||||
return "" if reminder_only.match(desc) else desc
|
||||
@@ -1643,6 +1685,9 @@ async def do_manage_calendar(content: str, owner: Optional[str] = None) -> Dict:
|
||||
except ValueError as e:
|
||||
return {"error": f"Invalid date format: {e}", "exit_code": 1}
|
||||
|
||||
if end_dt <= start_dt:
|
||||
end_dt = start_dt + timedelta(days=1)
|
||||
|
||||
q = _event_query().filter(
|
||||
CalendarEvent.dtstart < end_dt,
|
||||
CalendarEvent.dtend > start_dt,
|
||||
@@ -1822,6 +1867,7 @@ async def do_manage_calendar(content: str, owner: Optional[str] = None) -> Dict:
|
||||
rrule=args.get("rrule", "") or "",
|
||||
event_type=event_type,
|
||||
importance=importance,
|
||||
caldav_sync_pending="create" if cal.source == "caldav" else None,
|
||||
)
|
||||
db.add(ev)
|
||||
reminder_note_id = None
|
||||
@@ -1836,6 +1882,8 @@ async def do_manage_calendar(content: str, owner: Optional[str] = None) -> Dict:
|
||||
dtstart_is_utc and not all_day,
|
||||
)
|
||||
db.commit()
|
||||
if cal.source == "caldav":
|
||||
await _push_caldav_event_after_commit(owner, uid, "create")
|
||||
tag_blurb = f" [{event_type}]" if event_type else ""
|
||||
if minutes_before is None:
|
||||
reminder_blurb = ""
|
||||
@@ -1893,7 +1941,12 @@ async def do_manage_calendar(content: str, owner: Optional[str] = None) -> Dict:
|
||||
ev.event_type = _tag or None
|
||||
if args.get("importance") is not None:
|
||||
ev.importance = args["importance"]
|
||||
is_caldav = ev.calendar and ev.calendar.source == "caldav"
|
||||
if is_caldav:
|
||||
ev.caldav_sync_pending = "update"
|
||||
db.commit()
|
||||
if is_caldav:
|
||||
await _push_caldav_event_after_commit(owner, base_uid, "update")
|
||||
return {"response": f"Updated event {uid}", "exit_code": 0}
|
||||
|
||||
elif action == "delete_event":
|
||||
@@ -1907,8 +1960,13 @@ async def do_manage_calendar(content: str, owner: Optional[str] = None) -> Dict:
|
||||
ev = _event_query().filter(CalendarEvent.uid == base_uid).first()
|
||||
if not ev:
|
||||
return {"error": f"Event {uid} not found", "exit_code": 1}
|
||||
is_caldav = ev.calendar and ev.calendar.source == "caldav" and ev.remote_href
|
||||
if is_caldav:
|
||||
_record_caldav_delete_tombstone(db, ev, owner)
|
||||
db.delete(ev)
|
||||
db.commit()
|
||||
if is_caldav:
|
||||
await _push_caldav_event_after_commit(owner, base_uid, "delete")
|
||||
return {"response": f"Deleted event {uid}", "exit_code": 0}
|
||||
|
||||
else:
|
||||
|
||||
@@ -88,14 +88,14 @@ BUILTIN_TOOL_DESCRIPTIONS: Dict[str, str] = {
|
||||
"pipeline": "Run a multi-step AI pipeline with multiple models. Chain tasks together in sequence.",
|
||||
"list_models": "List all available AI models and their endpoints.",
|
||||
"manage_session": "Chat management: rename, archive, delete, or fork chats (the UI calls these 'chats'; internally 'sessions'). Use for 'rename my chats', 'rename this chat', 'archive/delete a chat'.",
|
||||
"manage_memory": "Memory management: list, add, edit, delete, or search persistent memories.",
|
||||
"manage_memory": "Memory management: list, add, edit, delete, or search persistent memories. For facts about the USER (their name, preferences, where they live). NOT for info about ANOTHER person — addresses, phones, emails belonging to a contact go in manage_contact, not memory.",
|
||||
"manage_skills": "Skill management: add, update, publish, or search reusable skills/presets.",
|
||||
"manage_tasks": "Scheduled task management: list, create, edit, delete, pause, resume, or run cron tasks.",
|
||||
"manage_endpoints": "Endpoint management: list, add, delete, enable, or disable model API endpoints.",
|
||||
"manage_mcp": "MCP server management: list, add, delete, reconnect servers, or list available tools.",
|
||||
"manage_webhooks": "Webhook management: list, add, delete, enable, or disable webhooks.",
|
||||
"manage_tokens": "API token management: list, create, or delete API access tokens.",
|
||||
"manage_documents": "List, read, delete, or tidy documents in the editor panel. action='list' returns clickable rows (most-recent first) so the user can open any doc by clicking. action='read' (aka view/open/get) with document_id returns the content. action='delete' with document_id removes a doc (only way to delete). Use this for ANY 'show/read/list/open my documents/docs/files/notes' request — never shell or curl.",
|
||||
"manage_documents": "List, read, delete, or tidy documents in the editor panel. action='list' returns clickable rows (most-recent first) so the user can open any doc by clicking. action='read' (aka view/open/get) with document_id returns the content; supports offset=<N> + limit=<N> to page through large docs (response includes next_offset when more remains, so you can keep calling with offset=next_offset). action='delete' with document_id removes a doc (only way to delete). Use this for ANY 'show/read/list/open my documents/docs/files/notes' request — never shell or curl.",
|
||||
"manage_research": "List, read/open, or delete saved DEEP RESEARCH results from the Library. action='list' returns clickable [query](#research-<id>) rows (most-recent first). action='read' (aka open/view/get) with id returns the report + sources. action='delete' with id removes it. Use this for ANY 'open/read/find/delete my research / that report / the research on X' request. NOTE: this is for EXISTING research; to START new research use trigger_research.",
|
||||
"manage_settings": "Change ANY real app setting (the ones the Settings panel writes) so the user never has to open it: TTS voice/provider/speed, STT, search engine + result count, default/teacher/task/utility/vision/image/research models, image quality, reminder channel (browser/email/ntfy), agent timeout/tool-call budget, and more. action=set with key (friendly aliases ok: voice, 'search engine', 'default model', 'teacher model', 'image quality', 'reminder channel'...) + value; get/list/reset too. Also toggles tools on/off (disable_tool/enable_tool/list_tools). Secrets/API keys are read-only. Use for any 'change my…/set my…/use X for…/turn on…' preference request.",
|
||||
"create_session": "Create a new chat with a name and model.",
|
||||
@@ -104,7 +104,7 @@ BUILTIN_TOOL_DESCRIPTIONS: Dict[str, str] = {
|
||||
"search_chats": "Search past session transcripts across chats.",
|
||||
"ask_user": "Ask the user a multiple-choice question to get a decision or clarification. Use this when the task is genuinely ambiguous and the answer changes what you do next — pick between approaches, confirm an assumption, choose among options — instead of guessing. Provide a clear `question` and 2-6 `options` (each with a short `label`, optional `description`). Calling this ENDS your turn: the user sees clickable buttons and their choice arrives as your next message. Don't use it for things you can decide from context or sensible defaults, or for irreversible-action confirmation if a dedicated flow exists.",
|
||||
"update_plan": "Write back to the ACTIVE PLAN while executing an approved plan: mark steps done or revise them. After finishing a step call this with the full checklist and that step marked done; when the user asks to change the plan call it with the revised checklist. Always pass the COMPLETE markdown checklist (`- [ ]` / `- [x]`), not a diff. The user's docked plan window updates live. No effect when there is no active plan.",
|
||||
"ui_control": "Control the UI and toggle tools on/off. Use this to turn off / turn on / disable / enable individual tools and features: shell (bash), search (web), research, browser, documents, incognito. Open panels (documents library, gallery, email inbox, sessions, notes, memories/brain, skills, settings, cookbook) via `open_panel <name>`. Use `open_email_reply <uid> <folder> reply` to open an email reply draft document without sending. Also switches between chat/agent modes, changes the current model, and applies/creates themes.",
|
||||
"ui_control": "Control the UI and toggle tools on/off. Use this to turn off / turn on / disable / enable individual tools and features: shell (bash), search (web), research, browser, documents, incognito. Open panels (documents library, gallery, email inbox, sessions, notes, memories/brain, skills, settings, cookbook) via `open_panel <name>`. Use `open_email_reply <uid> <folder> reply` to open an email reply draft document without sending. To pre-fill the reply body in one shot (USE THIS whenever the user told you what to say — opening an empty draft when they asked you to write is wrong), append the body after the mode: `open_email_reply <uid> <folder> reply <body text>`. Body can continue on subsequent lines for multi-line replies. Also switches between chat/agent modes, changes the current model, and applies/creates themes.",
|
||||
"list_email_accounts": "List configured email accounts and default status. Use before reading or sending mail when the user mentions Gmail, work mail, custom domain mail, another mailbox, or asks to compare/check multiple inboxes.",
|
||||
"list_emails": "List emails for a folder/account, newest first, including read messages by default. Shows subject, sender, date, UID, account, and AI summary. Check inbox, find emails needing replies. Supports account from list_email_accounts for Gmail/work/custom mailboxes. For last/latest/newest email, use max_results=1 and unread_only=false.",
|
||||
"read_email": "Read the full content of a specific email by UID or Message-ID. View email body, check details. Supports account from list_email_accounts when the UID belongs to a non-default mailbox.",
|
||||
@@ -115,7 +115,7 @@ BUILTIN_TOOL_DESCRIPTIONS: Dict[str, str] = {
|
||||
"mark_email_read": "Mark an email as read or unread by toggling the \\Seen flag.",
|
||||
"bulk_email": "Perform one action on many emails at once. Use for delete all those, archive these, mark all read, move spam to junk. Takes explicit UIDs from list_emails or all_unread=true. Always pass account for Gmail/work/custom mailbox results.",
|
||||
"resolve_contact": "Look up a contact's email address by name. Searches CardDAV address book and sent email history. Use when the user says 'message [name]', 'email [name]', or 'send to [name]' without an email address.",
|
||||
"manage_contact": "Create, update, delete, or list CardDAV contacts. Use to save a new contact, change an existing one's email/phone, or remove one. Action=list returns uids needed for update/delete. Use when the user says 'save this contact', 'add [name] to contacts', 'update [name]'s email', 'delete [name] from contacts'. Do not use for user identity facts like 'my name is <name>'; those are memory.",
|
||||
"manage_contact": "Save / update / delete / list address-book contacts (CardDAV). Use for info about ANOTHER person — name, email, phone, postal address. Args: action=list|add|update|delete, name, email, phones, address, uid (from list). For 'save this for <person>' / address pastes / phone numbers next to a name, this is the right tool — NOT manage_memory. Do NOT use for facts about the USER ('my name is X'); those are manage_memory.",
|
||||
"manage_notes": "Create and manage notes and checklists (Google Keep-style). ALWAYS use this for note/todo/checklist/reminder creation — NEVER hit /api/notes via app_api. Accepts natural-language `due_date` like 'tomorrow at 9am' or '11pm today' (parsed in the USER'S timezone). The due_date IS the reminder — it fires a notification at that time, so do NOT also create a calendar event for the same reminder. Set colors, labels, pin, archive. Do NOT use manage_memory for note content.",
|
||||
"manage_calendar": "Calendar event management: list, create, update, delete. Each event can carry a tag/category (event_type — work/personal/health/travel/meal/social/admin/other) and importance (low/normal/high/critical). Resolve today/tomorrow using the Current date and time context, then use ISO datetimes in the user's local wall time; supports all-day events. For event reminders/alarms, pass reminder_minutes; this creates the Notes reminder, so do not also call manage_notes for the same reminder.",
|
||||
"download_model": "Download a HuggingFace model to a local or remote server. Specify repo_id (e.g. 'Qwen/Qwen3-8B'), optional server host, and optional include filter for specific files.",
|
||||
@@ -372,7 +372,19 @@ class ToolIndex:
|
||||
{"resolve_contact", "manage_contact"},
|
||||
frozenset({"save contact", "add contact", "new contact", "update contact",
|
||||
"edit contact", "delete contact", "remove contact",
|
||||
"save this person", "add to contacts", "save to contacts"}):
|
||||
"save this person", "add to contacts", "save to contacts",
|
||||
# "add <name> to (my) contacts" — words between 'add' and
|
||||
# 'contacts' break the literal phrase match above, so anchor
|
||||
# on the tail.
|
||||
"to my contacts", "to contacts", "to address book",
|
||||
# "save this for <person>" / "save it for <person>" — the user
|
||||
# is storing info on a known person without using the literal
|
||||
# word 'contact'. Catches the address/phone-paste pattern.
|
||||
"save this for", "save it for", "save for",
|
||||
"save this one for", "save that for",
|
||||
# Postal-address-like signals
|
||||
"postal code", "zip code", "street address",
|
||||
"mailing address", "their address"}):
|
||||
{"manage_contact"},
|
||||
# "Ask another model" intent → chat_with_model relays to a
|
||||
# different model and returns its answer. ask_teacher escalates
|
||||
@@ -384,6 +396,10 @@ class ToolIndex:
|
||||
"delegate to", "have model"}):
|
||||
{"chat_with_model", "ask_teacher", "list_models"},
|
||||
# Deep research intent (incl. common typo "reserach")
|
||||
frozenset({"web search", "search the web", "search online", "look up",
|
||||
"google", "latest", "current", "news", "weather",
|
||||
"forecast", "stock price", "price of"}):
|
||||
{"web_search", "web_fetch"},
|
||||
frozenset({"research", "reserach", "reasearch", "look into", "investigate",
|
||||
"deep dive", "deep research", "find out about", "study up on",
|
||||
"report on", "do research", "look up everything"}):
|
||||
@@ -503,6 +519,53 @@ class ToolIndex:
|
||||
# prompts do not drag web schemas into the agent context.
|
||||
if self._WEB_RE.search(query):
|
||||
base.update({"web_search", "web_fetch"})
|
||||
# Hard steering: when the query is a clear "save info about a specific
|
||||
# person" pattern (address paste + name, phone next to a name, etc.),
|
||||
# the model has been observed defaulting to manage_memory even with
|
||||
# manage_contact in the toolset. Pull memory out for these queries so
|
||||
# the model literally cannot pick it. ALWAYS_AVAILABLE includes
|
||||
# manage_memory by default; we override that here.
|
||||
# The "for/to <word>" check needs to allow lowercase names (users
|
||||
# don't always capitalize) but filter out timing/pronoun stopwords
|
||||
# so "save this for later" / "save for tomorrow" don't trigger.
|
||||
_CONTACT_STOPWORDS_AFTER_FOR = {
|
||||
"later", "tomorrow", "yesterday", "now", "then", "today",
|
||||
"tonight", "me", "us", "you", "him", "her", "them", "myself",
|
||||
"yourself", "next", "this", "that", "the", "a", "an", "future",
|
||||
"real", "use", "uses", "another", "future", "reference",
|
||||
}
|
||||
# Regex catches "save (this|it|the|her|...|<noun>) for <name>" / "to my
|
||||
# contacts" patterns. More forgiving than literal-keyword matching —
|
||||
# 'save this address for Alex' uses one extra word between 'save' and
|
||||
# 'for' that breaks the contiguous 'save this for' phrase.
|
||||
save_for_match = re.search(
|
||||
r"\bsave\b(?:\s+\w+){0,3}\s+(?:for|to)\s+([A-Za-z]+)",
|
||||
ql,
|
||||
)
|
||||
# "to my contacts", "into my contacts", "in my address book", etc.
|
||||
to_contacts = re.search(r"\b(?:to|in|into)\s+(?:my\s+)?(?:contacts|address\s+book)\b", ql)
|
||||
# Possessive: "save (his|her|their) (address|phone|email|number) ..."
|
||||
# — strong contact signal even without "for <name>". Force-include
|
||||
# manage_contact here too since the keyword fallback misses this
|
||||
# construction.
|
||||
possessive_contact = re.search(
|
||||
r"\bsave\b(?:\s+\w+){0,2}\s+(?:his|her|their)\s+(?:address|phone|number|email|contact|details)",
|
||||
ql,
|
||||
)
|
||||
word_after = (
|
||||
save_for_match.group(1).lower() if save_for_match else None
|
||||
)
|
||||
contact_only_signal = (
|
||||
(save_for_match is not None
|
||||
and word_after is not None
|
||||
and word_after not in _CONTACT_STOPWORDS_AFTER_FOR)
|
||||
or to_contacts is not None
|
||||
or possessive_contact is not None
|
||||
)
|
||||
if possessive_contact is not None:
|
||||
base.add("manage_contact")
|
||||
if contact_only_signal and "manage_contact" in base:
|
||||
base.discard("manage_memory")
|
||||
return base
|
||||
|
||||
|
||||
|
||||