From ec4f91afdd2d4bbcf52ce42f8b9b5cfbda8e0d68 Mon Sep 17 00:00:00 2001 From: Dividesbyzer0 <54127744+zoomdbz@users.noreply.github.com> Date: Mon, 15 Jun 2026 02:12:18 -0400 Subject: [PATCH] fix(cookbook): normalize llama-cpp-python cache types Map llama-cpp-python --type_k/--type_v cache names to integer enum values after serve-command validation while preserving native llama-server flags. --- routes/cookbook_helpers.py | 46 ++++++++++++++++++++++++++++++++++ routes/cookbook_routes.py | 2 ++ tests/test_cookbook_helpers.py | 31 +++++++++++++++++++++++ 3 files changed, 79 insertions(+) diff --git a/routes/cookbook_helpers.py b/routes/cookbook_helpers.py index e54d6560b..78b644ea0 100644 --- a/routes/cookbook_helpers.py +++ b/routes/cookbook_helpers.py @@ -573,6 +573,36 @@ _GGUF_PRELUDE_RE = re.compile( _OLLAMA_HOST_ASSIGNMENT_RE = re.compile(r"(?:^|\s)OLLAMA_HOST=([^\s]+)") _OLLAMA_BIND_RE = re.compile(r"^\[([^\]]+)\]:(\d+)$|^([^:]+):(\d+)$") _OLLAMA_BIND_HOST_RE = re.compile(r"^[A-Za-z0-9._:-]+$") +_LLAMA_CPP_PYTHON_GGML_TYPES = { + "f32": "0", + "f16": "1", + "q4_0": "2", + "q4_1": "3", + "q5_0": "6", + "q5_1": "7", + "q8_0": "8", + "q8_1": "9", + "q2_k": "10", + "q3_k": "11", + "q4_k": "12", + "q5_k": "13", + "q6_k": "14", + "q8_k": "15", + "iq2_xxs": "16", + "iq2_xs": "17", + "iq3_xxs": "18", + "iq1_s": "19", + "iq4_nl": "20", + "iq3_s": "21", + "iq2_s": "22", + "iq4_xs": "23", + "mxfp4": "39", + "nvfp4": "40", + "q1_0": "41", +} +_LLAMA_CPP_PYTHON_TYPE_FLAG_RE = re.compile( + r"(?P--type_[kv])(?P\s+|=)(?P['\"]?)(?P[A-Za-z0-9_]+)(?P=quote)" +) def _ollama_bind_from_cmd(cmd: str | None, *, default_host: str = "127.0.0.1") -> tuple[str, str]: @@ -604,6 +634,22 @@ def _ollama_bind_from_cmd(cmd: str | None, *, default_host: str = "127.0.0.1") - return f"[{host}]" if bracketed_host else host, port +def _normalize_llama_cpp_python_cache_types(cmd: str | None) -> str | None: + """Map llama.cpp KV cache type names to llama-cpp-python's integer enum.""" + if not cmd or "llama_cpp.server" not in cmd: + return cmd + + def repl(match: re.Match[str]) -> str: + value = match.group("value") + mapped = _LLAMA_CPP_PYTHON_GGML_TYPES.get(value.lower()) + if not mapped: + return match.group(0) + quote = match.group("quote") + return f"{match.group('flag')}{match.group('sep')}{quote}{mapped}{quote}" + + return _LLAMA_CPP_PYTHON_TYPE_FLAG_RE.sub(repl, cmd) + + def _check_serve_binary(seg: str) -> None: """Validate that a single command segment starts with an allowlisted binary (after skipping leading env-var assignments like `CUDA_VISIBLE_DEVICES=0`).""" diff --git a/routes/cookbook_routes.py b/routes/cookbook_routes.py index edbba3ad7..320b17780 100644 --- a/routes/cookbook_routes.py +++ b/routes/cookbook_routes.py @@ -46,6 +46,7 @@ from routes.cookbook_helpers import ( _diagnose_serve_output, run_ssh_command_async, _ollama_bind_from_cmd, _pip_install_fallback_chain, _pip_install_no_cache, _user_shell_path_bootstrap, _venv_safe_local_pip_install_cmd, + _normalize_llama_cpp_python_cache_types, ModelDownloadRequest, ServeRequest, ) @@ -1211,6 +1212,7 @@ def setup_cookbook_routes() -> APIRouter: # many downstream `"engine" in req.cmd` membership checks can't hit # `TypeError: argument of type 'NoneType'` (a 500 instead of a clean 400). req.cmd = _validate_serve_cmd(req.cmd) or "" + req.cmd = _normalize_llama_cpp_python_cache_types(req.cmd) or "" req.cmd = _venv_safe_local_pip_install_cmd( req.cmd, local=not bool(req.remote_host), diff --git a/tests/test_cookbook_helpers.py b/tests/test_cookbook_helpers.py index 696b610df..1259132cd 100644 --- a/tests/test_cookbook_helpers.py +++ b/tests/test_cookbook_helpers.py @@ -2,6 +2,7 @@ import json import os import subprocess import sys +from pathlib import Path import pytest from fastapi import HTTPException @@ -21,6 +22,7 @@ from routes.cookbook_helpers import ( _safe_env_prefix, _user_shell_path_bootstrap, _venv_safe_local_pip_install_cmd, + _normalize_llama_cpp_python_cache_types, _validate_gpus, _validate_local_dir, _validate_repo_id, @@ -549,6 +551,35 @@ def test_validate_serve_cmd_accepts_windows_printf_format(): assert _validate_serve_cmd(cmd) == cmd +def test_normalize_llama_cpp_python_cache_types_for_stale_client_cmd(): + cmd = ( + "python -m llama_cpp.server --model model.gguf --host 0.0.0.0 --port 8000 " + "--type_k q4_0 --type_v q4_0" + ) + + assert _normalize_llama_cpp_python_cache_types(cmd).endswith("--type_k 2 --type_v 2") + + +def test_normalize_llama_cpp_python_cache_types_preserves_native_cache_flags(): + cmd = ( + "llama-server --model model.gguf --cache-type-k q4_0 --cache-type-v q4_0 " + "|| python3 -m llama_cpp.server --model model.gguf --type_k=q8_0 --type_v='f16'" + ) + + normalized = _normalize_llama_cpp_python_cache_types(cmd) + assert "--cache-type-k q4_0 --cache-type-v q4_0" in normalized + assert "--type_k=8" in normalized + assert "--type_v='1'" in normalized + + +def test_model_serve_normalizes_llama_cpp_python_cache_types_after_validation(): + src = (Path(__file__).resolve().parents[1] / "routes" / "cookbook_routes.py").read_text(encoding="utf-8") + + assert "req.cmd = _validate_serve_cmd(req.cmd) or \"\"" in src + assert "req.cmd = _normalize_llama_cpp_python_cache_types(req.cmd) or \"\"" in src + assert src.index("_validate_serve_cmd(req.cmd)") < src.index("_normalize_llama_cpp_python_cache_types(req.cmd)") + + def test_ollama_serve_defaults_to_loopback_bind(): assert _ollama_bind_from_cmd("ollama serve") == ("127.0.0.1", "11434") assert _ollama_bind_from_cmd("ollama run qwen2.5:0.5b") == ("127.0.0.1", "11434")