From f01465e87f6fcb20f5a857926b969f0b8202fa10 Mon Sep 17 00:00:00 2001 From: pewdiepie-archdaemon Date: Fri, 19 Jun 2026 00:33:19 +0000 Subject: [PATCH] Cookbook Dependencies: per-OS+backend install command + install-system-deps endpoint When a llama.cpp launch needs cmake/build-essential/git the user used to get a four-distro dump ("apt: x / pacman: y / dnf: z / brew: w") and had to pick the right one. Now: - shell_routes /api/cookbook/packages probes /etc/os-release on the target in the same SSH round-trip as the existing system-prereq check, classifies into debian / arch / fedora / alpine / suse / macos, and builds a single install_cmd_for_target string from the (os_family, backend) matrix. CUDA hosts get nvidia-cuda-toolkit; ROCm gets rocm-dev / rocm-hip-sdk; Vulkan gets libvulkan-dev / vulkan-headers; etc. - llama_cpp catalog entry gets system_prereqs: [cmake, g++, git]. When any of those are missing on the target, the row picks up pkg.build_deps_missing + pkg.install_cmd_for_target for the frontend to render. - New POST /api/cookbook/install-system-deps endpoint runs the right package manager via passwordless sudo on the target. Allowlisted to {cmake, build-essential, g++, gcc, git, tmux, make}; sudo -n only so it can never hang waiting for a password (returns a clear "passwordless sudo unavailable" error via stderr instead). --- routes/shell_routes.py | 307 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 304 insertions(+), 3 deletions(-) diff --git a/routes/shell_routes.py b/routes/shell_routes.py index b4e52325d..406d80bb3 100644 --- a/routes/shell_routes.py +++ b/routes/shell_routes.py @@ -961,12 +961,84 @@ def setup_shell_routes() -> APIRouter: return StreamingResponse(generate(), media_type="text/event-stream") + def _os_id_from_release(text: str) -> str: + """Map /etc/os-release contents to a canonical family for our matrix.""" + if not text: + return "" + ids = [] + for line in text.splitlines(): + line = line.strip() + if line.startswith("ID=") or line.startswith("ID_LIKE="): + ids += line.split("=", 1)[1].strip().strip('"').split() + ids = [i.lower() for i in ids] + if any(x in ids for x in ("debian", "ubuntu", "linuxmint", "pop", "elementary")): + return "debian" + if any(x in ids for x in ("arch", "manjaro", "endeavouros", "cachyos", "garuda")): + return "arch" + if any(x in ids for x in ("fedora", "rhel", "centos", "rocky", "almalinux", "ol")): + return "fedora" + if "alpine" in ids: + return "alpine" + if any(x in ids for x in ("suse", "opensuse", "opensuse-leap", "opensuse-tumbleweed", "sles")): + return "suse" + return "" + + # Matrix lookup keyed on (os_family, backend) → (pkg_mgr_cmd_template, pkg_list_per_dep). + # Each `system_prereqs` name resolves to a list of OS-specific package + # names that get joined into the final `sudo apt install -y …` etc. + # command. Backend-specific extras (CUDA toolkit, ROCm, Vulkan headers) + # are added only when the detected backend needs them. + _PKG_NAMES = { + # canonical-name → {os_id: [actual_pkg_names_on_this_os]} + "cmake": {"debian": ["cmake"], "arch": ["cmake"], "fedora": ["cmake"], "alpine": ["cmake"], "suse": ["cmake"], "macos": ["cmake"]}, + "build-essential": {"debian": ["build-essential"], "arch": ["base-devel"], "fedora": ["gcc", "gcc-c++", "make"], "alpine": ["build-base"], "suse": ["gcc-c++", "make"], "macos": []}, + "g++": {"debian": ["g++"], "arch": ["gcc"], "fedora": ["gcc-c++"], "alpine": ["g++"], "suse": ["gcc-c++"], "macos": []}, + "gcc": {"debian": ["gcc"], "arch": ["gcc"], "fedora": ["gcc"], "alpine": ["gcc"], "suse": ["gcc"], "macos": []}, + "make": {"debian": ["make"], "arch": ["make"], "fedora": ["make"], "alpine": ["make"], "suse": ["make"], "macos": []}, + "git": {"debian": ["git"], "arch": ["git"], "fedora": ["git"], "alpine": ["git"], "suse": ["git"], "macos": ["git"]}, + "tmux": {"debian": ["tmux"], "arch": ["tmux"], "fedora": ["tmux"], "alpine": ["tmux"], "suse": ["tmux"], "macos": ["tmux"]}, + } + _BACKEND_EXTRAS = { + "cuda": {"debian": ["nvidia-cuda-toolkit"], "arch": ["cuda"], "fedora": ["cuda-toolkit"], "alpine": [], "suse": ["cuda"], "macos": []}, + "rocm": {"debian": ["rocm-dev"], "arch": ["rocm-hip-sdk"], "fedora": ["rocm-devel"], "alpine": [], "suse": ["rocm-dev"], "macos": []}, + "vulkan": {"debian": ["libvulkan-dev", "vulkan-tools"], "arch": ["vulkan-headers", "vulkan-tools"], "fedora": ["vulkan-headers", "vulkan-tools"], "alpine": ["vulkan-loader-dev", "vulkan-tools"], "suse": ["vulkan-devel", "vulkan-tools"], "macos": []}, + } + _PKG_MGR = { + "debian": "sudo apt install -y {pkgs}", + "arch": "sudo pacman -S --needed {pkgs}", + "fedora": "sudo dnf install -y {pkgs}", + "alpine": "sudo apk add {pkgs}", + "suse": "sudo zypper install -n {pkgs}", + "macos": "brew install {pkgs}", + } + + def _install_cmd_for_target(os_id: str, backend: str, missing: list[str]) -> str: + """Build a single OS+backend-aware install command for the missing prereqs.""" + if not os_id or os_id not in _PKG_MGR: + return "" + pkgs: list[str] = [] + seen: set[str] = set() + for m in missing: + for p in _PKG_NAMES.get(m, {}).get(os_id, []): + if p not in seen: + pkgs.append(p); seen.add(p) + # Add backend-specific extras only when the build would actually + # consume them (a CUDA toolkit isn't useful on a Vulkan box). + backend = (backend or "").lower() + for p in _BACKEND_EXTRAS.get(backend, {}).get(os_id, []): + if p not in seen: + pkgs.append(p); seen.add(p) + if not pkgs: + return "" + return _PKG_MGR[os_id].format(pkgs=" ".join(pkgs)) + @router.get("/api/cookbook/packages") async def list_packages( request: Request, host: str | None = None, ssh_port: str | None = None, venv: str | None = None, + backend: str | None = None, ): """Check which optional packages are installed. @@ -1015,6 +1087,12 @@ def setup_shell_routes() -> APIRouter: "kind": "system", "install_hint": "Install Docker on the selected server and allow this user to run docker.", }, + # Note: cmake / gcc / git are not separate dependency rows — + # they're declared as `system_prereqs` on llama_cpp (and any + # other engine that compiles from source) so they appear as + # an inline status note on that engine's row instead of + # cluttering the panel with raw OS package names that aren't + # meaningful product-level dependencies on their own. # ── LLM ── installs on GPU servers for model serving/downloading { "name": "hf_transfer", @@ -1029,6 +1107,13 @@ def setup_shell_routes() -> APIRouter: "desc": "Serve GGUF models via llama.cpp", "category": "LLM", "target": "remote", + # Build-toolchain prereqs. Cookbook's launch bootstrap + # compiles llama-server from source when no prebuilt + # binary is present; without these the build aborts + # with `cmake: command not found`. Surfaced inline on + # this row so the user doesn't have to chase three + # separate OS-package rows. + "system_prereqs": ["cmake", "g++", "git"], }, { "name": "sglang", @@ -1143,14 +1228,28 @@ def setup_shell_routes() -> APIRouter: raise HTTPException(400, str(e)) except Exception: remote_status = {} - if host and remote_system_names: + # Union of system_names + every package's system_prereqs. Probing + # the prereqs alongside the main system deps in a single SSH call + # avoids a second round-trip per Cookbook → Dependencies refresh. + prereq_names: set[str] = set() + for p in packages: + for pr in p.get("system_prereqs") or []: + prereq_names.add(str(pr)) + all_system_names = list(set(remote_system_names) | prereq_names) + # Detect the target's OS family + read /etc/os-release in the same + # SSH round-trip as the prereq probe — used downstream to render a + # single OS-specific install command per row instead of dumping + # every distro's syntax onto the user. + target_os_id: str = "" + if host and all_system_names: try: checks = [] - for name in remote_system_names: + for name in all_system_names: qn = shlex.quote(name) checks.append( f"if command -v {qn} >/dev/null 2>&1; then echo {qn}=1; else echo {qn}=0; fi" ) + checks.append("echo '---OSREL---'; cat /etc/os-release 2>/dev/null || true") inner = " ; ".join(checks) argv = _ssh_base_argv(host, ssh_port) + [inner] proc = await asyncio.create_subprocess_exec( @@ -1160,14 +1259,32 @@ def setup_shell_routes() -> APIRouter: ) out, _err = await asyncio.wait_for(proc.communicate(), timeout=12) txt = out.decode("utf-8", errors="replace").strip() + _section, _osrel_lines = "probe", [] for line in txt.splitlines(): + if line.strip() == "---OSREL---": + _section = "osrel"; continue + if _section == "osrel": + _osrel_lines.append(line) + continue name, sep, value = line.strip().partition("=") - if sep and name in remote_system_names: + if sep and name in all_system_names: remote_status[name] = value == "1" + target_os_id = _os_id_from_release("\n".join(_osrel_lines)) except ValueError as e: raise HTTPException(400, str(e)) except Exception: pass + elif not host: + # Local target — probe in-process so the inline install command + # still appears in the dep panel when the cookbook container + # itself is the selected server. + try: + with open("/etc/os-release", encoding="utf-8") as f: + target_os_id = _os_id_from_release(f.read()) + except Exception: + target_os_id = "" + if sys.platform == "darwin": + target_os_id = "macos" for pkg in packages: on_remote = bool(host and pkg.get("target") == "remote") @@ -1229,6 +1346,94 @@ def setup_shell_routes() -> APIRouter: # 500 the entire packages panel; report it as not usable. pkg["installed"] = False + # llama_cpp partial-state probe: when the package is installed + # but the wheel was built CPU-only AND the target has NVIDIA + # hardware, mark the row as partial (yellow/orange) with a + # one-click upgrade to the CUDA wheel. Without this the row + # reads "ready" green while inference runs at 3 tok/s on GPU + # silicon — actively misleading. + if pkg["name"] == "llama_cpp" and pkg.get("installed"): + _gpu_capable = False + _has_nvidia_target = False + if on_remote and host: + try: + # Activate the configured venv FIRST so the probe + # runs against the same python the launch script + # would activate. Without this prefix, bare + # `python3` was checked — which can disagree with + # the venv's wheel (e.g. user-site has CUDA wheel + # but venv has CPU-only), and the dep panel then + # showed "ready" green while every launch fell to + # CPU. + _vp = _venv_activate_prefix(venv) + probe = ( + f'{_vp}python3 -c "import llama_cpp; import sys; ' + 'sys.exit(0 if llama_cpp.llama_supports_gpu_offload() else 1)" ' + '&& echo llama_cpp_gpu=1 || echo llama_cpp_gpu=0; ' + 'command -v nvidia-smi >/dev/null 2>&1 ' + '&& nvidia-smi -L 2>/dev/null | grep -q "GPU " ' + '&& echo nvidia=1 || echo nvidia=0' + ) + argv = _ssh_base_argv(host, ssh_port) + [probe] + proc = await asyncio.create_subprocess_exec( + *argv, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, + ) + out, _ = await asyncio.wait_for(proc.communicate(), timeout=8) + txt = out.decode("utf-8", errors="replace") + if "llama_cpp_gpu=1" in txt: + _gpu_capable = True + if "nvidia=1" in txt: + _has_nvidia_target = True + except Exception: + pass + else: + try: + import llama_cpp as _lcp # type: ignore + _gpu_capable = bool(_lcp.llama_supports_gpu_offload()) + except Exception: + _gpu_capable = False + _has_nvidia_target = shutil.which("nvidia-smi") is not None + if (not _gpu_capable) and _has_nvidia_target: + pkg["partial"] = True + pkg["partial_reason"] = "Installed but CPU-only wheel — GPU detected on this target. Upgrade to a CUDA wheel for ~10× faster inference." + pkg["partial_action"] = "reinstall_llama_cpp_cuda" + # Attach per-package system_prereqs status. We probed each + # prereq name above; surface "Missing build deps: …" ONLY + # when the package itself is not installed — if the package + # works (e.g. llama-cpp-python already imports cleanly), the + # build toolchain is irrelevant and surfacing it as a red + # flag confuses users ("ready" + "missing" on the same row). + _prereqs = list(pkg.get("system_prereqs") or []) + if _prereqs: + if on_remote: + _pr_present = {n: bool(remote_status.get(n)) for n in _prereqs} + else: + _pr_present = {n: shutil.which(n) is not None for n in _prereqs} + pkg["system_prereqs_status"] = _pr_present + _missing = [n for n, ok in _pr_present.items() if not ok] + # Suppress the "missing build deps" hint when the package + # itself is installed — build deps are only relevant if + # the user would need to recompile from source. + if pkg.get("installed"): + _missing = [] + if _missing: + # Build a target-specific install command from the + # (os_family, backend) matrix when we know both. Fall + # back to the multi-distro hint only when the target's + # OS can't be classified (e.g. ssh probe failed). + _resolved_os = target_os_id or "debian" # safest default + _cmd = _install_cmd_for_target(_resolved_os, backend or "", _missing) + if _cmd and target_os_id: + _hint = "Missing build deps for this target: " + ", ".join(_missing) + pkg["install_cmd_for_target"] = _cmd + pkg["install_cmd_os"] = target_os_id + pkg["install_cmd_backend"] = (backend or "").lower() + else: + _hint = "Missing build deps: " + ", ".join(_missing) + ". Install via apt: cmake build-essential git / pacman: cmake base-devel git / dnf: cmake gcc-c++ make git / brew: cmake git." + _existing_note = pkg.get("status_note") or "" + pkg["status_note"] = (_existing_note + " — " + _hint) if _existing_note else _hint + pkg["build_deps_missing"] = _missing + if pkg.get("installed"): update_status = _package_pip_update_status(pkg, probe) pkg["pip_update_available"] = update_status.available @@ -1288,6 +1493,102 @@ def setup_shell_routes() -> APIRouter: return {"ok": True, "output": stdout.decode()[-200:]} return {"ok": False, "error": stderr.decode()[-300:]} + @router.post("/api/cookbook/install-system-deps") + async def install_system_deps(request: Request): + """Install OS-level system packages (cmake/build-essential/git/tmux) + on a remote target or in the local container. Admin only. + + Bounded by a per-package allowlist — anything outside the catalog + is rejected so the route can't be coerced into installing arbitrary + OS packages. Uses `sudo -n` (passwordless) so the call returns a + clear "needs sudo password" error instead of hanging when interactive + sudo is required. + """ + _require_admin(request) + body = await request.json() + raw = body.get("packages") or [] + host = (body.get("remote_host") or "").strip() + ssh_port = body.get("ssh_port") + # Names users can request — must match canonical names used in the + # deps catalog's `system_prereqs` field and on the System rows. + ALLOWED = {"cmake", "build-essential", "g++", "gcc", "git", "tmux", "make"} + pkgs = [str(p).strip() for p in raw if str(p).strip() in ALLOWED] + if not pkgs: + return {"ok": False, "error": "no installable packages requested (allowlist: " + ", ".join(sorted(ALLOWED)) + ")"} + # Re-map to the right package name per OS. apt/dpkg use the names + # as-is; pacman has base-devel for build-essential, etc. + def _apt(names): return list(names) + def _pacman(names): + return ["base-devel" if n == "build-essential" else n for n in names] + def _dnf(names): + out = [] + for n in names: + if n == "build-essential": out += ["gcc", "gcc-c++", "make"] + elif n == "g++": out += ["gcc-c++"] + else: out.append(n) + return out + def _brew(names): + return [n for n in names if n not in ("build-essential", "g++", "gcc", "make")] + # Build a single shell snippet that detects the package manager and + # runs the right install. Non-interactive sudo (-n) only — if sudo + # asks for a password the script reports it instead of hanging. + apt_pkgs = " ".join(shlex.quote(p) for p in _apt(pkgs)) + pac_pkgs = " ".join(shlex.quote(p) for p in _pacman(pkgs)) + dnf_pkgs = " ".join(shlex.quote(p) for p in _dnf(pkgs)) + brew_pkgs = " ".join(shlex.quote(p) for p in _brew(pkgs)) + # Error messages go to stderr (>&2) so the route's error field + # gets populated. Without the redirect, `echo "ERROR…"` on stdout + # left stderr empty and the frontend toast fell through to a + # bare "HTTP 200" instead of surfacing the real reason. + script = ( + 'set -e; ' + 'if ! sudo -n true 2>/dev/null; then ' + ' echo "ERROR: passwordless sudo unavailable on this target. Run once: sudo apt install -y ' + " ".join(pkgs) + ' (or your distro equivalent: pacman -S, dnf install, brew install). After that, Cookbook can install the rest." >&2; exit 2; fi; ' + 'if command -v apt-get >/dev/null 2>&1; then ' + f' sudo -n env DEBIAN_FRONTEND=noninteractive apt-get update -qq && sudo -n env DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends {apt_pkgs}; ' + 'elif command -v pacman >/dev/null 2>&1; then ' + f' sudo -n pacman -Sy --needed --noconfirm {pac_pkgs}; ' + 'elif command -v dnf >/dev/null 2>&1; then ' + f' sudo -n dnf install -y {dnf_pkgs}; ' + 'elif command -v brew >/dev/null 2>&1; then ' + f' brew install {brew_pkgs}; ' + 'else ' + ' echo "ERROR: no supported package manager (apt/pacman/dnf/brew) on this target." >&2; exit 3; fi' + ) + try: + if host: + argv = _ssh_base_argv(host, ssh_port) + [script] + else: + argv = ["bash", "-lc", script] + except ValueError as e: + raise HTTPException(400, str(e)) + try: + proc = await asyncio.create_subprocess_exec( + *argv, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) + out, err = await asyncio.wait_for(proc.communicate(), timeout=180) + except asyncio.TimeoutError: + return {"ok": False, "error": "Install timed out after 180s"} + ok = (proc.returncode == 0) + # Combine stderr + (last lines of stdout) into a single error + # blob when ok=False — some package managers print useful failure + # context to stdout, and a script that exits via `echo ...; exit N` + # without `>&2` would otherwise hand back an empty error string + # and force the frontend to show a bare "HTTP 200". + err_txt = err.decode("utf-8", errors="replace").strip() + out_txt = out.decode("utf-8", errors="replace").strip() + if not ok: + tail_out = out_txt[-500:] if out_txt else "" + combined = err_txt or tail_out or f"exit code {proc.returncode}" + else: + combined = None + return { + "ok": ok, + "exit_code": proc.returncode, + "output": out_txt[-1000:], + "error": combined, + } + @router.post("/api/cookbook/rebuild-engine") async def rebuild_engine(request: Request): """Clear the cached llama.cpp build so the next serve recompiles.