mirror of
https://github.com/pewdiepie-archdaemon/odysseus.git
synced 2026-06-17 02:05:22 -04:00
Add a 'Rebuild llama.cpp' Cookbook action to force a fresh GPU build (#1787)
The serve bootstrap builds llama-server from source only when it is missing from PATH, so a host that first compiled CPU-only (no nvcc present at build time) reuses that CPU-only binary on every later serve and never gets a GPU build, even after a CUDA/ROCm toolkit is installed. There was no UI lever to force a rebuild. Adds a 'Rebuild llama.cpp' button to the Cookbook Dependencies tab. It clears the cached ~/bin/llama-server symlink and ~/llama.cpp/build directory (locally or on the selected remote server) so the next serve recompiles and picks up CUDA/HIP if a toolchain is now present. It installs and downloads nothing. - routes/cookbook_helpers.py: _llama_cpp_rebuild_cmd() (single source of truth) - routes/shell_routes.py: POST /api/cookbook/rebuild-engine (admin-only, reuses the existing SSH plumbing for remote hosts) - static/js/cookbook.js: header button + handler honoring the deps server selector - tests: cover the command shape and a clean run on a fresh HOME Motivated by #831 (RTX 4070 user stuck on a CPU-only build with no way to re-trigger the build). Co-authored-by: ghreprimand <203024559+ghreprimand@users.noreply.github.com>
This commit is contained in:
@@ -552,6 +552,27 @@ def _append_llama_cpp_linux_accel_build_lines(runner_lines: list[str]) -> None:
|
||||
runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
|
||||
runner_lines.append(' fi')
|
||||
|
||||
|
||||
def _llama_cpp_rebuild_cmd() -> str:
|
||||
"""Shell command that clears the Cookbook-managed llama.cpp build.
|
||||
|
||||
Removes the cached ``llama-server`` symlink and the ``~/llama.cpp/build``
|
||||
directory so the next llama.cpp serve recompiles from source, picking up a
|
||||
CUDA or HIP toolchain if one is now available. The serve bootstrap only
|
||||
builds when ``llama-server`` is missing from PATH, so without this an
|
||||
existing CPU-only build is reused forever. It deliberately installs and
|
||||
downloads nothing; the rebuild itself happens on the next serve.
|
||||
"""
|
||||
return (
|
||||
'mkdir -p "$HOME/bin" && '
|
||||
'rm -f "$HOME/bin/llama-server" && '
|
||||
'rm -rf "$HOME/llama.cpp/build" && '
|
||||
'echo "[odysseus] Cleared the cached llama.cpp build. '
|
||||
'Re-launch the serve task to rebuild llama-server from source '
|
||||
'(CUDA or HIP will be used if a toolchain is now available)."'
|
||||
)
|
||||
|
||||
|
||||
class ModelDownloadRequest(BaseModel):
|
||||
repo_id: str
|
||||
include: str | None = None # glob pattern e.g. "*Q4_K_M*"
|
||||
|
||||
@@ -1058,4 +1058,39 @@ def setup_shell_routes() -> APIRouter:
|
||||
return {"ok": True, "output": stdout.decode()[-200:]}
|
||||
return {"ok": False, "error": stderr.decode()[-300:]}
|
||||
|
||||
@router.post("/api/cookbook/rebuild-engine")
|
||||
async def rebuild_engine(request: Request):
|
||||
"""Clear the cached llama.cpp build so the next serve recompiles.
|
||||
|
||||
Admin only — this removes the Cookbook-managed ``~/bin/llama-server``
|
||||
symlink and ``~/llama.cpp/build`` directory, locally or on the selected
|
||||
remote server. It installs and downloads nothing; the next llama.cpp
|
||||
serve rebuilds from source and picks up CUDA/HIP if a toolchain is now
|
||||
present. This is the missing "force a fresh GPU build" lever for hosts
|
||||
stuck on a CPU-only llama-server.
|
||||
"""
|
||||
_require_admin(request)
|
||||
from routes.cookbook_helpers import _llama_cpp_rebuild_cmd
|
||||
body = await request.json()
|
||||
engine = str(body.get("engine") or "llamacpp").strip()
|
||||
if engine != "llamacpp":
|
||||
return {"ok": False, "error": f"Unsupported engine: {engine}"}
|
||||
host = str(body.get("remote_host") or "").strip()
|
||||
ssh_port = body.get("ssh_port")
|
||||
cmd = _llama_cpp_rebuild_cmd()
|
||||
try:
|
||||
argv = (_ssh_base_argv(host, ssh_port) + [cmd]) if host else ["bash", "-lc", cmd]
|
||||
except ValueError as e:
|
||||
raise HTTPException(400, str(e))
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*argv, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
||||
)
|
||||
out, err = await asyncio.wait_for(proc.communicate(), timeout=30)
|
||||
except asyncio.TimeoutError:
|
||||
return {"ok": False, "error": "Rebuild-engine command timed out."}
|
||||
if proc.returncode == 0:
|
||||
return {"ok": True, "output": out.decode("utf-8", errors="replace")[-400:]}
|
||||
return {"ok": False, "error": err.decode("utf-8", errors="replace")[-400:]}
|
||||
|
||||
return router
|
||||
|
||||
Reference in New Issue
Block a user