Improve Cookbook serve diagnostics and recommendations

2026-06-17 02:05:22 -04:00 · 2026-06-02 12:15:41 +09:00
parent bdc99d746a
commit 966b53df77
14 changed files with 1113 additions and 191 deletions
@@ -962,13 +962,23 @@ def setup_cookbook_routes() -> APIRouter:
                # failed CUDA attempt) doesn't cause the next configure to reuse
                # stale settings and silently produce a CPU-only binary.
                runner_lines.append('    cd ~/llama.cpp && rm -rf build')
+                runner_lines.append('    _ody_has_cuda_runtime=0')
                runner_lines.append('    if command -v nvcc &>/dev/null; then')
+                runner_lines.append('      for _cudalib in "${CUDA_HOME:-}/lib64"/libcudart.so* "${CUDA_HOME:-}/lib"/libcudart.so* /usr/local/cuda/lib64/libcudart.so* /usr/lib*/libcudart.so*; do')
+                runner_lines.append('        [ -e "$_cudalib" ] && _ody_has_cuda_runtime=1 && break')
+                runner_lines.append('      done')
+                runner_lines.append('    fi')
+                runner_lines.append('    if command -v nvcc &>/dev/null && [ "$_ody_has_cuda_runtime" = "1" ]; then')
                runner_lines.append('      echo "[odysseus] CUDA nvcc found — building llama-server with CUDA (GPU) support..."')
                runner_lines.append('      cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_CUDA=ON \\')
                runner_lines.append('        && cmake --build build -j"$NPROC" --target llama-server \\')
                runner_lines.append('        && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
                runner_lines.append('    else')
-                runner_lines.append('      echo "[odysseus] WARNING: nvcc not found — building llama-server for CPU only."')
+                runner_lines.append('      if command -v nvcc &>/dev/null; then')
+                runner_lines.append('        echo "[odysseus] WARNING: nvcc found but CUDA runtime library was not found — building llama-server for CPU only."')
+                runner_lines.append('      else')
+                runner_lines.append('        echo "[odysseus] WARNING: nvcc not found — building llama-server for CPU only."')
+                runner_lines.append('      fi')
                runner_lines.append('      echo "[odysseus]   GPU inference will not be available for this llama.cpp build."')
                runner_lines.append('      echo "[odysseus]   To get a GPU build, first install vLLM via Cookbook -> Dependencies"')
                runner_lines.append('      echo "[odysseus]   (its CUDA wheels include nvcc), then re-launch this serve task."')
@@ -982,6 +992,10 @@ def setup_cookbook_routes() -> APIRouter:
                runner_lines.append('    echo "llama-server build failed — installing Python bindings as fallback..."')
                runner_lines.append(f"    {_pip_install_fallback_chain('llama-cpp-python', python_cmd='pip')} || true")
                runner_lines.append('  fi')
+                runner_lines.append('  if ! command -v llama-server &>/dev/null && ! python3 -c "import llama_cpp" 2>/dev/null; then')
+                runner_lines.append('    echo "ERROR: llama.cpp serving is not available after install/build attempts."')
+                runner_lines.append('    ODYSSEUS_PREFLIGHT_EXIT=127')
+                runner_lines.append('  fi')
                runner_lines.append('fi')
            elif "ollama" in req.cmd:
                handled_ollama_serve = True
@@ -1037,19 +1051,24 @@ def setup_cookbook_routes() -> APIRouter:
                # find the `vllm` CLI ("command not found"). Mirrors llama.cpp above.
                runner_lines.append('export PATH="$HOME/.local/bin:$PATH"')
                runner_lines.append('if ! command -v vllm &>/dev/null; then')
-                runner_lines.append('  echo "ERROR: vLLM is not installed. Open Cookbook -> Dependencies and install vllm on this server, then launch again."')
+                runner_lines.append('  echo "ERROR: vLLM is not installed."')
                runner_lines.append('  ODYSSEUS_PREFLIGHT_EXIT=127')
                runner_lines.append('fi')
            elif "sglang.launch_server" in req.cmd:
                runner_lines.append('export PATH="$HOME/.local/bin:$PATH"')
-                runner_lines.append('if ! python3 -c "import sglang" 2>/dev/null; then')
-                runner_lines.append('  echo "ERROR: SGLang is not installed. Open Cookbook -> Dependencies and install sglang on this server, then launch again."')
+                runner_lines.append('if ! command -v sglang &>/dev/null; then')
+                runner_lines.append('  echo "ERROR: SGLang is not installed."')
+                runner_lines.append('  ODYSSEUS_PREFLIGHT_EXIT=127')
+                runner_lines.append('elif ! ODYSSEUS_SGLANG_IMPORT_ERROR="$(python3 -c "import sglang" 2>&1)"; then')
+                runner_lines.append('  echo "ERROR: SGLang is installed but failed to import."')
+                runner_lines.append('  printf "%s\\n" "$ODYSSEUS_SGLANG_IMPORT_ERROR"')
                runner_lines.append('  ODYSSEUS_PREFLIGHT_EXIT=127')
                runner_lines.append('fi')
            elif "scripts/diffusion_server.py" in req.cmd or ".diffusion_server.py" in req.cmd:
                runner_lines.append('export PATH="$HOME/.local/bin:$PATH"')
-                runner_lines.append('if ! python3 -c "import torch, diffusers" 2>/dev/null; then')
-                runner_lines.append('  echo "ERROR: Diffusion serving requires PyTorch + diffusers. Open Cookbook -> Dependencies and install diffusers on this server, then launch again."')
+                runner_lines.append('if ! ODYSSEUS_DIFFUSION_IMPORT_ERROR="$(python3 -c "import torch, diffusers" 2>&1)"; then')
+                runner_lines.append('  echo "ERROR: Diffusion serving requires PyTorch + diffusers."')
+                runner_lines.append('  printf "%s\\n" "$ODYSSEUS_DIFFUSION_IMPORT_ERROR"')
                runner_lines.append('  ODYSSEUS_PREFLIGHT_EXIT=127')
                runner_lines.append('fi')