Check cudart before llama.cpp CUDA build (#1466)

This commit is contained in:
Alexandre Teixeira
2026-06-03 06:23:55 +01:00
committed by GitHub
parent b55c970ec5
commit 1c2ec288dd
2 changed files with 70 additions and 2 deletions
+23 -2
View File
@@ -558,8 +558,29 @@ def _append_llama_cpp_linux_accel_build_lines(runner_lines: list[str]) -> None:
runner_lines.append(' echo "[odysseus] ROCm/HIP detected — building llama-server with HIP support..."')
runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_HIP=ON && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
runner_lines.append(' elif command -v nvcc &>/dev/null; then')
runner_lines.append(' echo "[odysseus] CUDA nvcc found — building llama-server with CUDA (GPU) support..."')
runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_CUDA=ON && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
# nvcc alone is not sufficient — pip-installed CUDA wheels or incomplete
# tooling can expose nvcc without shipping libcudart, causing cmake to fail
# mid-build with "CUDA runtime library not found". Check cudart explicitly
# via a small helper so the guard stays readable.
runner_lines.append(' _odysseus_has_cudart() {')
runner_lines.append(' ldconfig -p 2>/dev/null | grep -q \'libcudart\\.so\' && return 0')
runner_lines.append(' local _cuh="${CUDA_HOME:-/usr/local/cuda}"')
runner_lines.append(' ls "$_cuh/lib64/libcudart.so"* &>/dev/null && return 0')
runner_lines.append(' ls "$_cuh/lib/libcudart.so"* &>/dev/null && return 0')
runner_lines.append(' ls /usr/local/cuda/lib64/libcudart.so* &>/dev/null && return 0')
runner_lines.append(' ls /usr/local/cuda/lib/libcudart.so* &>/dev/null && return 0')
runner_lines.append(' ls "${_cuh%/cuda_nvcc}/cuda_runtime/lib/libcudart.so"* &>/dev/null && return 0')
runner_lines.append(' return 1')
runner_lines.append(' }')
runner_lines.append(' if _odysseus_has_cudart; then')
runner_lines.append(' echo "[odysseus] CUDA nvcc + cudart found — building llama-server with CUDA (GPU) support..."')
runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_CUDA=ON && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
runner_lines.append(' else')
runner_lines.append(' echo "[odysseus] WARNING: nvcc found but CUDA runtime (libcudart.so) is not visible — building llama-server for CPU only."')
runner_lines.append(' echo "[odysseus] GPU inference will not be available for this llama.cpp build."')
runner_lines.append(' echo "[odysseus] Ensure libcudart is installed (e.g. cuda-runtime package) and visible via ldconfig or CUDA_HOME."')
runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
runner_lines.append(' fi')
runner_lines.append(' else')
runner_lines.append(' echo "[odysseus] WARNING: no HIP/CUDA toolchain found — building llama-server for CPU only."')
runner_lines.append(' echo "[odysseus] GPU inference will not be available for this llama.cpp build."')