#!/bin/sh # Entrypoint that fixes the #1 self-host footgun: a Docker container # that runs as root writes root-owned files into bind-mounted host # volumes, and the host user (or a non-root service user) then can't # update them — silently breaking skill extraction, prefs saves, mail # attachments, etc. # # Standard PUID/PGID pattern: pick the UID/GID we should drop to, # chown the writable bind-mounts so existing root-owned content gets # repaired on every start (idempotent), then exec the real command # as that user via gosu. set -e PUID="${PUID:-1000}" PGID="${PGID:-1000}" # Reuse an existing matching group/user if the host's UID/GID already # corresponds to one in /etc/passwd (e.g. when the image is rebuilt # and "odysseus" already exists at the same id). Otherwise create. if ! getent group "$PGID" >/dev/null 2>&1; then groupadd -g "$PGID" odysseus fi if ! getent passwd "$PUID" >/dev/null 2>&1; then useradd -u "$PUID" -g "$PGID" -M -s /bin/sh -d /app odysseus fi # Docker-socket group plumbing. When /var/run/docker.sock is bind- # mounted (cookbook uses `docker exec` to reach sibling containers # like ollama-rocm), the socket is owned by root:docker on the host. # We need the in-container odysseus user to be in the matching group # so `gosu PUID:PGID` doesn't strip it. compose's `group_add` only # applies to the initial root process — gosu drop resets supplementary # groups — so detect the socket's GID here and add the user via the # username form `gosu odysseus` below. DOCKER_SOCK="${DOCKER_SOCK:-/var/run/docker.sock}" if [ -S "$DOCKER_SOCK" ]; then SOCK_GID="$(stat -c '%g' "$DOCKER_SOCK" 2>/dev/null || echo '')" if [ -n "$SOCK_GID" ] && [ "$SOCK_GID" != "0" ]; then # Create the group locally if missing, then add odysseus to it. if ! getent group "$SOCK_GID" >/dev/null 2>&1; then groupadd -g "$SOCK_GID" docker_host || true fi SOCK_GROUP="$(getent group "$SOCK_GID" | cut -d: -f1)" if [ -n "$SOCK_GROUP" ]; then ODY_USER="$(getent passwd "$PUID" | cut -d: -f1)" [ -z "$ODY_USER" ] && ODY_USER=odysseus usermod -aG "$SOCK_GROUP" "$ODY_USER" 2>/dev/null || true fi fi fi # Repair ownership on every writable path the app touches at runtime. # # Bind-mounted dirs (/app/data, /app/logs) are the obvious ones, but # the app ALSO writes inside the image's own source tree at runtime: # - services/cache/{search,content}/* (search cache LRU) # - services/search_analytics.json # - services/search_engine_error.log # - services/tts cache, etc. # These dirs were created as root during `docker build`, so dropping # to PUID:PGID would otherwise crash on the first import that tries # to mkdir them. Chown the whole /app tree — fast (<1s on this size) # and idempotent via the `-not -uid` filter so we only touch files # that need fixing. for dir in /app /app/data /app/logs; do if [ -d "$dir" ]; then # `find ... -not -uid` keeps this O(touched-files), not # O(everything), so terabyte-sized maildirs don't slow startup. find "$dir" -not -uid "$PUID" -print0 2>/dev/null \ | xargs -0 -r chown "$PUID:$PGID" 2>/dev/null || true fi done # Cookbook installs vllm/etc. via `pip install --user`, which pulls # nvidia-cuda-* wheels into /app/.local but does not set CUDA_HOME or # symlink /usr/local/cuda. vllm 0.22+ then crashes during engine init # when FlashInfer tries to JIT a sampler kernel ("Could not find nvcc", # then "CUDA compiler and toolkit headers are incompatible" on the # mixed cuda-nvcc 13.3 / cuda-runtime 13.0 wheel combo). # # Auto-set CUDA_HOME if a pip-installed nvcc is present, and disable the # FlashInfer JIT sampler — sampler only, no impact on attention path. # No-op when vllm isn't installed. # # Checked layouts (all are real pip-wheel install paths): # nvidia/cu13 — nvidia-nvcc-cu13 (CUDA 13.x wheel style) # nvidia/cu12 — nvidia-nvcc-cu12 (CUDA 12.x wheel style) # nvidia/cuda_nvcc — nvidia-cuda-nvcc-cu12 (older cu12 sub-package style) for cu in \ /app/.local/lib/python*/site-packages/nvidia/cu13 \ /app/.local/lib/python*/site-packages/nvidia/cu12 \ /app/.local/lib/python*/site-packages/nvidia/cuda_nvcc; do if [ -x "$cu/bin/nvcc" ]; then export CUDA_HOME="$cu" break fi done # Disable the FlashInfer JIT sampler unconditionally — it is sampler-only # and has no impact on the attention path, but requires nvcc + matching # CUDA headers at startup. Without this, vLLM crashes with "Could not find # nvcc" even when the GPU itself is fully visible to the container. export VLLM_USE_FLASHINFER_SAMPLER="${VLLM_USE_FLASHINFER_SAMPLER:-0}" # Make Cookbook-installed Python CLIs visible after `pip install --user`. # vLLM and helper scripts land here because /app is the non-root user's HOME. export PATH="/app/.local/bin:$PATH" # Run first-time setup as the app user so data/ files get the right ownership. # setup.py is idempotent — skips auth.json / .env if they already exist. # || true so a setup failure never prevents the container from starting. # Use the username form (no :GID) so supplementary groups from /etc/group # (including the docker-socket group set above) flow through to the child. ODY_USER="$(getent passwd "$PUID" | cut -d: -f1)" [ -z "$ODY_USER" ] && ODY_USER="$PUID:$PGID" gosu "$ODY_USER" python /app/setup.py || true # Drop root and run the actual app. `gosu` is preferred over `su` / # `sudo` because it cleans up the process tree (no extra shell layer) # so signals (SIGTERM from `docker stop`) reach uvicorn directly. exec gosu "$ODY_USER" "$@"