fix(devops): harden docker config defaults (#4349)

This commit is contained in:
RaresKeY
2026-06-16 06:03:43 +03:00
committed by GitHub
parent 33fe7276be
commit b5edbd3df7
10 changed files with 252 additions and 60 deletions
+4
View File
@@ -15,6 +15,10 @@ build/
# at runtime — never baked into the image. Mirrored in .gitignore.
secrets.env
secrets.env.*
secrets.env~
.secrets.env.swp
.secrets.env.swo
**/#secrets.env#
!secrets.env.example
/data/
/logs/
+2 -1
View File
@@ -113,12 +113,13 @@ app = FastAPI(
)
# ========= CORS =========
CORS_ALLOW_METHODS = ["GET", "POST", "PUT", "PATCH", "DELETE"]
allowed_origins = os.getenv("ALLOWED_ORIGINS", "http://localhost,http://127.0.0.1").split(",")
app.add_middleware(
CORSMiddleware,
allow_origins=allowed_origins,
allow_credentials=True,
allow_methods=["GET", "POST", "PUT", "DELETE"],
allow_methods=CORS_ALLOW_METHODS,
allow_headers=[
"Accept",
"Authorization",
+7
View File
@@ -60,6 +60,13 @@ services:
- ODYSSEUS_INPROCESS_TASKS=${ODYSSEUS_INPROCESS_TASKS:-1}
- ODYSSEUS_SCRIPT_HOST=${ODYSSEUS_SCRIPT_HOST:-localhost}
- ODYSSEUS_CHAT_UPLOAD_MAX_BYTES=${ODYSSEUS_CHAT_UPLOAD_MAX_BYTES:-10485760}
- ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES=${ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES:-104857600}
- ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES=${ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES:-26214400}
- ODYSSEUS_MEMORY_IMPORT_MAX_BYTES=${ODYSSEUS_MEMORY_IMPORT_MAX_BYTES:-10485760}
- ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES=${ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES:-26214400}
- ODYSSEUS_EMAIL_COMPOSE_UPLOAD_MAX_BYTES=${ODYSSEUS_EMAIL_COMPOSE_UPLOAD_MAX_BYTES:-26214400}
- ODYSSEUS_STT_MAX_AUDIO_BYTES=${ODYSSEUS_STT_MAX_AUDIO_BYTES:-26214400}
- ODYSSEUS_ICS_MAX_BYTES=${ODYSSEUS_ICS_MAX_BYTES:-10485760}
- DATA_BRAVE_API_KEY=${DATA_BRAVE_API_KEY:-}
- GOOGLE_API_KEY=${GOOGLE_API_KEY:-}
- GOOGLE_PSE_CX=${GOOGLE_PSE_CX:-}
+7
View File
@@ -59,6 +59,13 @@ services:
- ODYSSEUS_INPROCESS_TASKS=${ODYSSEUS_INPROCESS_TASKS:-1}
- ODYSSEUS_SCRIPT_HOST=${ODYSSEUS_SCRIPT_HOST:-localhost}
- ODYSSEUS_CHAT_UPLOAD_MAX_BYTES=${ODYSSEUS_CHAT_UPLOAD_MAX_BYTES:-10485760}
- ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES=${ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES:-104857600}
- ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES=${ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES:-26214400}
- ODYSSEUS_MEMORY_IMPORT_MAX_BYTES=${ODYSSEUS_MEMORY_IMPORT_MAX_BYTES:-10485760}
- ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES=${ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES:-26214400}
- ODYSSEUS_EMAIL_COMPOSE_UPLOAD_MAX_BYTES=${ODYSSEUS_EMAIL_COMPOSE_UPLOAD_MAX_BYTES:-26214400}
- ODYSSEUS_STT_MAX_AUDIO_BYTES=${ODYSSEUS_STT_MAX_AUDIO_BYTES:-26214400}
- ODYSSEUS_ICS_MAX_BYTES=${ODYSSEUS_ICS_MAX_BYTES:-10485760}
- DATA_BRAVE_API_KEY=${DATA_BRAVE_API_KEY:-}
- GOOGLE_API_KEY=${GOOGLE_API_KEY:-}
- GOOGLE_PSE_CX=${GOOGLE_PSE_CX:-}
+7
View File
@@ -48,6 +48,13 @@ services:
- ODYSSEUS_INPROCESS_TASKS=${ODYSSEUS_INPROCESS_TASKS:-1}
- ODYSSEUS_SCRIPT_HOST=${ODYSSEUS_SCRIPT_HOST:-localhost}
- ODYSSEUS_CHAT_UPLOAD_MAX_BYTES=${ODYSSEUS_CHAT_UPLOAD_MAX_BYTES:-10485760}
- ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES=${ODYSSEUS_GALLERY_UPLOAD_MAX_BYTES:-104857600}
- ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES=${ODYSSEUS_GALLERY_TRANSFORM_UPLOAD_MAX_BYTES:-26214400}
- ODYSSEUS_MEMORY_IMPORT_MAX_BYTES=${ODYSSEUS_MEMORY_IMPORT_MAX_BYTES:-10485760}
- ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES=${ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES:-26214400}
- ODYSSEUS_EMAIL_COMPOSE_UPLOAD_MAX_BYTES=${ODYSSEUS_EMAIL_COMPOSE_UPLOAD_MAX_BYTES:-26214400}
- ODYSSEUS_STT_MAX_AUDIO_BYTES=${ODYSSEUS_STT_MAX_AUDIO_BYTES:-26214400}
- ODYSSEUS_ICS_MAX_BYTES=${ODYSSEUS_ICS_MAX_BYTES:-10485760}
- DATA_BRAVE_API_KEY=${DATA_BRAVE_API_KEY:-}
- GOOGLE_API_KEY=${GOOGLE_API_KEY:-}
- GOOGLE_PSE_CX=${GOOGLE_PSE_CX:-}
+52 -19
View File
@@ -13,6 +13,8 @@ set -e
PUID="${PUID:-1000}"
PGID="${PGID:-1000}"
GOSU_BIN="$(command -v gosu)"
PYTHON_BIN="$(command -v python)"
# Reuse an existing matching group/user if the host's UID/GID already
# corresponds to one in /etc/passwd (e.g. when the image is rebuilt
@@ -24,26 +26,57 @@ if ! getent passwd "$PUID" >/dev/null 2>&1; then
useradd -u "$PUID" -g "$PGID" -M -s /bin/sh -d /app odysseus
fi
# Repair ownership on every writable path the app touches at runtime.
#
# Bind-mounted dirs (/app/data, /app/logs) are the obvious ones, but
# the app ALSO writes inside the image's own source tree at runtime:
# - services/cache/{search,content}/* (search cache LRU)
# - services/search_analytics.json
# - services/search_engine_error.log
# - services/tts cache, etc.
# These dirs were created as root during `docker build`, so dropping
# to PUID:PGID would otherwise crash on the first import that tries
# to mkdir them. Chown the whole /app tree — fast (<1s on this size)
# and idempotent via the `-not -uid` filter so we only touch files
# that need fixing.
for dir in /app /app/data /app/logs; do
mount_root_for() {
awk -v target="$1" '$5 == target { print $4; exit }' /proc/self/mountinfo 2>/dev/null || true
}
is_broad_mount_root() {
case "$1" in
/|/home|/srv|/var|/usr|/opt|/tmp|/mnt|/media)
return 0
;;
esac
return 1
}
repair_tree_ownership() {
dir="$1"
if [ -d "$dir" ]; then
# `find ... -not -uid` keeps this O(touched-files), not
# O(everything), so terabyte-sized maildirs don't slow startup.
find "$dir" -not -uid "$PUID" -print0 2>/dev/null \
find "$dir" -xdev -not -uid "$PUID" -print0 2>/dev/null \
| xargs -0 -r chown "$PUID:$PGID" 2>/dev/null || true
fi
}
repair_app_tree_ownership() {
if [ -d /app ]; then
find /app -xdev \
\( -path /app/data -o -path /app/logs -o -path /app/.ssh -o -path /app/.cache -o -path /app/.local \) -prune \
-o -not -uid "$PUID" -print0 2>/dev/null \
| xargs -0 -r chown "$PUID:$PGID" 2>/dev/null || true
fi
}
repair_bind_mount_ownership() {
dir="$1"
if [ ! -d "$dir" ]; then
return
fi
mount_root="$(mount_root_for "$dir")"
if is_broad_mount_root "$mount_root"; then
echo "Skipping recursive ownership repair for $dir because it maps to broad host path $mount_root" >&2
chown "$PUID:$PGID" "$dir" 2>/dev/null || true
return
fi
repair_tree_ownership "$dir"
}
# Repair image-owned writable paths without walking into bind-mounted host
# trees, then repair the app-owned mount roots separately.
repair_app_tree_ownership
for dir in /app/data /app/logs /app/.ssh /app/.cache/huggingface /app/.local; do
repair_bind_mount_ownership "$dir"
done
# Cookbook installs vllm/etc. via `pip install --user`, which pulls
@@ -83,9 +116,9 @@ export PATH="/app/.local/bin:$PATH"
# Run first-time setup as the app user so data/ files get the right ownership.
# setup.py is idempotent — skips auth.json / .env if they already exist.
# || true so a setup failure never prevents the container from starting.
gosu "$PUID:$PGID" python /app/setup.py || true
"$GOSU_BIN" "$PUID:$PGID" "$PYTHON_BIN" /app/setup.py || true
# Drop root and run the actual app. `gosu` is preferred over `su` /
# `sudo` because it cleans up the process tree (no extra shell layer)
# so signals (SIGTERM from `docker stop`) reach uvicorn directly.
exec gosu "$PUID:$PGID" "$@"
exec "$GOSU_BIN" "$PUID:$PGID" "$@"
+9 -9
View File
@@ -119,7 +119,7 @@ Read-only checks, run from the repo root on this branch. Note the real API is
```bash
# Compute the area_cli set and confirm test_backup_cli_security.py is
# area_security. Expected: 28 files, then "security".
.venv/bin/python - <<'PY'
./venv/bin/python - <<'PY'
from pathlib import Path
from tests._taxonomy import classify_test_path
@@ -138,7 +138,7 @@ rg -n "TestClient|FastAPI|create_app|SessionLocal|sqlite|dependency_overrides" \
tests/test_*cli*.py tests/test_sessions_cli.py
# Hard-coded flat paths to the exact CLI files outside tests/. Expected: no matches.
.venv/bin/python - <<'PY2' > /tmp/area_cli_paths.txt
./venv/bin/python - <<'PY2' > /tmp/area_cli_paths.txt
from pathlib import Path
from tests._taxonomy import classify_test_path
@@ -158,26 +158,26 @@ tokens only (plus the `tests/helpers/` directory rule), so the markers of the
## Validation for the future move PR
Run with the project venv (`.venv/bin/python`); system `python3` may miss
Run with the project venv (`./venv/bin/python`); system `python3` may miss
pinned deps. Before the move, record the baseline; after, compare:
```bash
# Selection must match the 28 files before and after the move.
.venv/bin/python tests/run_focus.py --dry-run --area cli
.venv/bin/python -m pytest -m area_cli -q
./venv/bin/python tests/run_focus.py --dry-run --area cli
./venv/bin/python -m pytest -m area_cli -q
# Moved files pass when targeted directly.
.venv/bin/python -m pytest tests/cli/ -q
./venv/bin/python -m pytest tests/cli/ -q
# Whole-suite collection still succeeds (catches import/path breakage).
.venv/bin/python -m pytest --collect-only -q
./venv/bin/python -m pytest --collect-only -q
# Taxonomy/runner infrastructure is unaffected.
.venv/bin/python -m pytest tests/test_taxonomy.py tests/test_run_focus.py -q
./venv/bin/python -m pytest tests/test_taxonomy.py tests/test_run_focus.py -q
# No stale flat-path references to the moved files. Expected: no matches
# outside tests/cli/ itself.
.venv/bin/python - <<'PY2' > /tmp/area_cli_paths.txt
./venv/bin/python - <<'PY2' > /tmp/area_cli_paths.txt
from pathlib import Path
from tests._taxonomy import classify_test_path
+26 -26
View File
@@ -22,8 +22,8 @@ markers only - it moves no files and changes no test behavior. Use them to run a
focused slice:
```bash
python3 -m pytest -m area_security
python3 -m pytest -m "area_services and sub_cookbook"
./venv/bin/python -m pytest -m area_security
./venv/bin/python -m pytest -m "area_services and sub_cookbook"
```
Areas are `security`, `routes`, `services`, `cli`, `js`, `helpers`, `unit`, and
@@ -38,13 +38,13 @@ sub-area names, accepts sub-areas with or without the `sub_` prefix, and passes
extra pytest arguments after `--`:
```bash
python3 tests/run_focus.py --area security
python3 tests/run_focus.py --area services --sub-area cookbook
python3 tests/run_focus.py --sub-area sub_cookbook
python3 tests/run_focus.py --keyword taxonomy
python3 tests/run_focus.py --last-failed
python3 tests/run_focus.py --dry-run --area services --sub-area cookbook
python3 tests/run_focus.py --area services -- --maxfail=1 -q
./venv/bin/python tests/run_focus.py --area security
./venv/bin/python tests/run_focus.py --area services --sub-area cookbook
./venv/bin/python tests/run_focus.py --sub-area sub_cookbook
./venv/bin/python tests/run_focus.py --keyword taxonomy
./venv/bin/python tests/run_focus.py --last-failed
./venv/bin/python tests/run_focus.py --dry-run --area services --sub-area cookbook
./venv/bin/python tests/run_focus.py --area services -- --maxfail=1 -q
```
### Fast lane and duration visibility
@@ -61,15 +61,15 @@ so you can see where time goes. They are reporting only and do not count as a
focus selector, so `--durations` must be combined with a real selector
(`--area`, `--sub-area`, `--keyword`, `--last-failed`, or `--fast`).
Activate or otherwise use the project Python environment before running these
commands. The examples use `python3` intentionally to avoid hard-coding a local
venv path.
Use the project Python environment before running these commands. The examples
use the repo's documented `./venv/bin/python` path so they do not accidentally
fall back to system Python.
```bash
python3 tests/run_focus.py --fast
python3 tests/run_focus.py --area services --fast
python3 tests/run_focus.py --area services --durations 25
python3 tests/run_focus.py --area services --fast --durations 25 --durations-min 0.05
./venv/bin/python tests/run_focus.py --fast
./venv/bin/python tests/run_focus.py --area services --fast
./venv/bin/python tests/run_focus.py --area services --durations 25
./venv/bin/python tests/run_focus.py --area services --fast --durations 25 --durations-min 0.05
```
The `slow` marker is opt-in. Mark a test `slow` only with duration evidence
@@ -79,8 +79,8 @@ replace the full suite before merge. A `slow` mark only excludes a test from the
fast lane; the test stays runnable directly, e.g.:
```bash
python3 -m pytest tests/test_auth_config_lock_concurrency.py
python3 -m pytest -m slow
./venv/bin/python -m pytest tests/test_auth_config_lock_concurrency.py
./venv/bin/python -m pytest -m slow
```
## Order-sensitivity reporting (report-only)
@@ -93,8 +93,8 @@ ordering - the shuffle exists only inside this runner. The seed is always
printed, and pytest targets/options go after a literal `--`:
```bash
python3 tests/run_order_report.py --seed 123 -- tests/cli/ -q
python3 tests/run_order_report.py -- tests/cli/ -q # generates and prints a seed
./venv/bin/python tests/run_order_report.py --seed 123 -- tests/cli/ -q
./venv/bin/python tests/run_order_report.py -- tests/cli/ -q # generates and prints a seed
```
The same seed reproduces the same order when the reported working directory,
@@ -108,7 +108,7 @@ A generated-seed run starts with output like:
[order-report] working directory: /path/to/odysseus
[order-report] shuffling test order with seed 284734921
[order-report] reproduce from this working directory with the same test environment:
[order-report] reproduce with: /path/to/odysseus/.venv/bin/python /path/to/odysseus/tests/run_order_report.py --seed 284734921 -- tests/cli/ -q
[order-report] reproduce with: /path/to/odysseus/venv/bin/python /path/to/odysseus/tests/run_order_report.py --seed 284734921 -- tests/cli/ -q
```
Run the printed command from the reported working directory to reproduce the
@@ -118,7 +118,7 @@ same fixed-seed order:
[order-report] working directory: /path/to/odysseus
[order-report] shuffling test order with seed 284734921
[order-report] reproduce from this working directory with the same test environment:
[order-report] reproduce with: /path/to/odysseus/.venv/bin/python /path/to/odysseus/tests/run_order_report.py --seed 284734921 -- tests/cli/ -q
[order-report] reproduce with: /path/to/odysseus/venv/bin/python /path/to/odysseus/tests/run_order_report.py --seed 284734921 -- tests/cli/ -q
```
Pytest output remains visible between the report header and footer. A failing
@@ -237,10 +237,10 @@ helpers:
Run validation locally before opening or approving a PR. Practical checks:
- `git diff --check` - catch whitespace and conflict-marker errors.
- `python3 -m py_compile <changed files>` - confirm changed files compile.
- Focused `pytest` on the changed test files.
- `pytest` on neighboring or order-sensitive test groups that share import
state with the changed files.
- `./venv/bin/python -m py_compile <changed files>` - confirm changed files compile.
- Focused `./venv/bin/python -m pytest` on the changed test files.
- `./venv/bin/python -m pytest` on neighboring or order-sensitive test groups
that share import state with the changed files.
- `grep` for the old boilerplate when replacing it, to confirm no stragglers
remain.
- A fresh audit worktree when changing the helpers themselves, so stale
+5 -5
View File
@@ -24,7 +24,7 @@ The goal is not only to reorganize `tests/`. The goal is for the suite to be a
reliable foundation for future development: deterministic, modular, informative,
behavior-focused, and complete enough to replace manual QA wherever practical.
Run tests with the project virtualenv interpreter (`.venv/bin/python -m pytest`).
Run tests with the project virtualenv interpreter (`./venv/bin/python -m pytest`).
The system `python3` may be missing pinned dependencies (e.g. `nh3`), which
shows up as import/collection errors that are environmental, not real failures.
@@ -172,10 +172,10 @@ Prefer tests that exercise real behavior over tests that inspect source code.
Run locally before opening or approving a refactor PR:
- `git diff --check` - whitespace and conflict-marker errors.
- `python3 -m py_compile <changed .py files>` - changed files compile.
- Focused `pytest` on the changed files (use `.venv/bin/python -m pytest`).
- `pytest` on neighboring / order-sensitive groups that share import state with
the changed files.
- `./venv/bin/python -m py_compile <changed .py files>` - changed files compile.
- Focused `./venv/bin/python -m pytest` on the changed files.
- `./venv/bin/python -m pytest` on neighboring / order-sensitive groups that
share import state with the changed files.
- When replacing boilerplate, `grep` for the old pattern to confirm no stragglers.
- When changing a helper itself, validate in a fresh worktree so stale
`__pycache__` or import state cannot mask a regression.
+133
View File
@@ -0,0 +1,133 @@
"""Static regressions for Docker/devops hardening contracts."""
import ast
import re
from pathlib import Path
import yaml
from starlette.applications import Starlette
from starlette.middleware.cors import CORSMiddleware
from starlette.responses import PlainTextResponse
from starlette.routing import Route
from starlette.testclient import TestClient
ROOT = Path(__file__).resolve().parents[1]
COMPOSE_FILES = [
ROOT / "docker-compose.yml",
ROOT / "docker-compose.gpu-nvidia.yml",
ROOT / "docker-compose.gpu-amd.yml",
]
TEST_DOCS = [
ROOT / "tests" / "README.md",
ROOT / "tests" / "TESTING_STANDARD.md",
ROOT / "tests" / "LAYOUT_INVENTORY.md",
]
def _compose_env_names(path: Path) -> set[str]:
compose = yaml.safe_load(path.read_text(encoding="utf-8"))
env = compose["services"]["odysseus"]["environment"]
return {entry.split("=", 1)[0] for entry in env}
def _upload_limit_env_names() -> set[str]:
source = (ROOT / "src" / "upload_limits.py").read_text(encoding="utf-8")
return set(re.findall(r'"(ODYSSEUS_[A-Z_]*BYTES)"', source)) | {
"ODYSSEUS_CHAT_UPLOAD_MAX_BYTES"
}
def _cors_allow_methods() -> list[str]:
tree = ast.parse((ROOT / "app.py").read_text(encoding="utf-8"))
for node in tree.body:
if isinstance(node, ast.Assign):
names = [target.id for target in node.targets if isinstance(target, ast.Name)]
if "CORS_ALLOW_METHODS" in names:
return ast.literal_eval(node.value)
raise AssertionError("CORS_ALLOW_METHODS not found")
def test_compose_files_forward_every_upload_limit_env_var():
expected = _upload_limit_env_names()
assert expected
for path in COMPOSE_FILES:
assert expected <= _compose_env_names(path), path.name
def test_docker_entrypoint_does_not_resolve_root_commands_from_app_local_path():
script = (ROOT / "docker" / "entrypoint.sh").read_text(encoding="utf-8")
path_export = script.index('export PATH="/app/.local/bin:$PATH"')
gosu_capture = script.index('GOSU_BIN="$(command -v gosu)"')
python_capture = script.index('PYTHON_BIN="$(command -v python)"')
setup_call = script.index('"$GOSU_BIN" "$PUID:$PGID" "$PYTHON_BIN" /app/setup.py')
final_exec = script.index('exec "$GOSU_BIN" "$PUID:$PGID" "$@"')
assert gosu_capture < path_export < setup_call
assert python_capture < path_export < setup_call
assert final_exec > path_export
def test_docker_entrypoint_ownership_repair_stays_inside_expected_mounts():
script = (ROOT / "docker" / "entrypoint.sh").read_text(encoding="utf-8")
assert "find /app -xdev" in script
for path in ("/app/data", "/app/logs", "/app/.ssh", "/app/.cache", "/app/.local"):
assert f"-path {path}" in script
assert "mount_root_for" in script
assert "is_broad_mount_root" in script
assert "Skipping recursive ownership repair" in script
def test_dockerignore_excludes_secrets_editor_backups():
patterns = set((ROOT / ".dockerignore").read_text(encoding="utf-8").splitlines())
assert {
"secrets.env",
"secrets.env.*",
"secrets.env~",
".secrets.env.swp",
".secrets.env.swo",
"**/#secrets.env#",
} <= patterns
assert "!secrets.env.example" in patterns
def test_cors_allow_methods_include_patch():
methods = _cors_allow_methods()
assert "PATCH" in methods
def test_patch_preflight_is_allowed_by_configured_cors_methods():
async def patched(_request):
return PlainTextResponse("ok")
app = Starlette(routes=[Route("/api/document/1", patched, methods=["PATCH"])])
app.add_middleware(
CORSMiddleware,
allow_origins=["http://client.local"],
allow_credentials=True,
allow_methods=_cors_allow_methods(),
allow_headers=["Content-Type"],
)
response = TestClient(app).options(
"/api/document/1",
headers={
"Origin": "http://client.local",
"Access-Control-Request-Method": "PATCH",
},
)
assert response.status_code == 200
def test_testing_docs_use_project_venv_for_python_validation():
stale_patterns = [
"python3 -m pytest",
"python3 -m py_compile",
"Focused `pytest`",
"`pytest` on neighboring",
".venv/bin/python",
]
for path in TEST_DOCS:
text = path.read_text(encoding="utf-8")
for stale in stale_patterns:
assert stale not in text, f"{path.name} still contains {stale!r}"