fix(cookbook): install llama-cpp-python[server] so llama.cpp serving works (#730) (#1338)

The llama.cpp serve auto-install built a bare `llama-cpp-python` in the Linux
source-build fallback and the Termux path, but the serve command runs
`python3 -m llama_cpp.server`, which needs the `[server]` extra. Because the
"already installed?" guard only checks `import llama_cpp` (a bare install
satisfies it), the missing extra was never added, so serving crashed with
`ModuleNotFoundError: No module named 'starlette_context'` (issue #730).

- Request the `[server]` extra in both the Termux direct install and the Linux
  Python-bindings fallback (the Windows path already used `[server]`).
- Shell-quote the package spec in `_pip_install_fallback_chain` via `shlex.quote`
  so the `[server]` brackets aren't treated as a bash glob; plain names unaffected.

Tests: tests/test_cookbook_helpers.py gains extras-quoting coverage and a
serve-runner regression guard.
This commit is contained in:
Shaw
2026-06-03 01:24:26 -04:00
committed by GitHub
parent 552bc15067
commit b10e6bc870
3 changed files with 41 additions and 4 deletions
+2 -2
View File
@@ -1039,7 +1039,7 @@ def setup_cookbook_routes() -> APIRouter:
runner_lines.append(' if ! python3 -c "import llama_cpp" 2>/dev/null; then')
runner_lines.append(' pkg install -y cmake 2>/dev/null')
runner_lines.append(' pip install numpy diskcache jinja2 2>/dev/null')
runner_lines.append(' CMAKE_ARGS="-DGGML_BLAS=OFF -DGGML_LLAMAFILE=OFF" pip install llama-cpp-python --no-build-isolation --no-cache-dir 2>&1 || true')
runner_lines.append(' CMAKE_ARGS="-DGGML_BLAS=OFF -DGGML_LLAMAFILE=OFF" pip install \'llama-cpp-python[server]\' --no-build-isolation --no-cache-dir 2>&1 || true')
runner_lines.append(' fi')
runner_lines.append('elif ! command -v llama-server &>/dev/null; then')
runner_lines.append(' echo "Native llama-server not found — building from source (one-time, may take a few minutes)..."')
@@ -1066,7 +1066,7 @@ def setup_cookbook_routes() -> APIRouter:
runner_lines.append(' # If the native build failed, fall back to the Python bindings.')
runner_lines.append(' if ! command -v llama-server &>/dev/null && ! python3 -c "import llama_cpp" 2>/dev/null; then')
runner_lines.append(' echo "llama-server build failed — installing Python bindings as fallback..."')
runner_lines.append(f" {_pip_install_fallback_chain('llama-cpp-python', python_cmd='pip')} || true")
runner_lines.append(f" {_pip_install_fallback_chain('llama-cpp-python[server]', python_cmd='pip')} || true")
runner_lines.append(' fi')
runner_lines.append(' if ! command -v llama-server &>/dev/null && ! python3 -c "import llama_cpp" 2>/dev/null; then')
runner_lines.append(' echo "ERROR: llama.cpp serving is not available after install/build attempts."')